From 1b0eadba24dc12b40187f75d91bfaba4dfd4a082 Mon Sep 17 00:00:00 2001 From: Preston Hartzell Date: Mon, 1 May 2023 07:37:47 -0400 Subject: [PATCH] Update Sentinel-5P (#167) --------- Co-authored-by: Pete Gadomski Co-authored-by: Tom Augspurger --- datasets/io-land-cover/dataset.yaml | 2 +- datasets/ms-buildings/Dockerfile | 74 ++++++ datasets/sentinel-5p/Dockerfile | 74 ++++++ datasets/sentinel-5p/README.md | 12 +- .../sentinel-5p/collection/description.md | 19 ++ datasets/sentinel-5p/collection/template.json | 247 ++++++++++++++++++ datasets/sentinel-5p/dataset.yaml | 52 +++- datasets/sentinel-5p/requirements.txt | 1 + datasets/sentinel-5p/s5/__init__.py | 0 datasets/sentinel-5p/sentinel-5p-l2.json | 132 ---------- datasets/sentinel-5p/sentinel_5p.py | 198 ++++++++++++++ datasets/sentinel-5p/summarize-wf.yaml | 71 ----- 12 files changed, 666 insertions(+), 216 deletions(-) create mode 100644 datasets/ms-buildings/Dockerfile create mode 100644 datasets/sentinel-5p/Dockerfile create mode 100644 datasets/sentinel-5p/collection/description.md create mode 100755 datasets/sentinel-5p/collection/template.json create mode 100644 datasets/sentinel-5p/requirements.txt delete mode 100644 datasets/sentinel-5p/s5/__init__.py delete mode 100755 datasets/sentinel-5p/sentinel-5p-l2.json create mode 100644 datasets/sentinel-5p/sentinel_5p.py delete mode 100644 datasets/sentinel-5p/summarize-wf.yaml diff --git a/datasets/io-land-cover/dataset.yaml b/datasets/io-land-cover/dataset.yaml index b7f59ca3..1bdde809 100644 --- a/datasets/io-land-cover/dataset.yaml +++ b/datasets/io-land-cover/dataset.yaml @@ -2,7 +2,7 @@ id: io_lulc image: ${{ args.registry }}/pctasks-task-base:latest args: -- registry + - registry code: src: ${{ local.path(./io_lulc.py) }} diff --git a/datasets/ms-buildings/Dockerfile b/datasets/ms-buildings/Dockerfile new file mode 100644 index 00000000..1cb83935 --- /dev/null +++ b/datasets/ms-buildings/Dockerfile @@ -0,0 +1,74 @@ +FROM ubuntu:20.04 + +# Setup timezone info +ENV TZ=UTC + +ENV LC_ALL=C.UTF-8 +ENV LANG=C.UTF-8 + +RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone + +RUN apt-get update && apt-get install -y software-properties-common + +RUN add-apt-repository ppa:ubuntugis/ppa && \ + apt-get update && \ + apt-get install -y build-essential python3-dev python3-pip \ + jq unzip ca-certificates wget curl git && \ + apt-get autoremove && apt-get autoclean && apt-get clean + +RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 10 + +# See https://github.com/mapbox/rasterio/issues/1289 +ENV CURL_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt + +# Install Python 3.11 +RUN curl -L -O "https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-$(uname)-$(uname -m).sh" \ + && bash "Mambaforge-$(uname)-$(uname -m).sh" -b -p /opt/conda \ + && rm -rf "Mambaforge-$(uname)-$(uname -m).sh" + +ENV PATH /opt/conda/bin:$PATH +ENV LD_LIBRARY_PATH /opt/conda/lib/:$LD_LIBRARY_PATH + +RUN mamba install -y -c conda-forge python=3.11 gdal pip setuptools cython numpy + +RUN python -m pip install --upgrade pip + +# Install common packages +COPY requirements-task-base.txt /tmp/requirements.txt +RUN python -m pip install --no-build-isolation -r /tmp/requirements.txt + +# +# Copy and install packages +# + +COPY pctasks/core /opt/src/pctasks/core +RUN cd /opt/src/pctasks/core && \ + pip install . + +COPY pctasks/cli /opt/src/pctasks/cli +RUN cd /opt/src/pctasks/cli && \ + pip install . + +COPY pctasks/task /opt/src/pctasks/task +RUN cd /opt/src/pctasks/task && \ + pip install . + +COPY pctasks/client /opt/src/pctasks/client +RUN cd /opt/src/pctasks/client && \ + pip install . + +COPY pctasks/ingest /opt/src/pctasks/ingest +RUN cd /opt/src/pctasks/ingest && \ + pip install . + +COPY pctasks/dataset /opt/src/pctasks/dataset +RUN cd /opt/src/pctasks/dataset && \ + pip install . + +COPY ./datasets/ms-buildings/requirements.txt /opt/src/datasets/ms-buildings/requirements.txt +RUN python3 -m pip install -r /opt/src/datasets/ms-buildings/requirements.txt + +# Setup Python Path to allow import of test modules +ENV PYTHONPATH=/opt/src:$PYTHONPATH + +WORKDIR /opt/src diff --git a/datasets/sentinel-5p/Dockerfile b/datasets/sentinel-5p/Dockerfile new file mode 100644 index 00000000..828da64b --- /dev/null +++ b/datasets/sentinel-5p/Dockerfile @@ -0,0 +1,74 @@ +FROM ubuntu:20.04 + +# Setup timezone info +ENV TZ=UTC + +ENV LC_ALL=C.UTF-8 +ENV LANG=C.UTF-8 + +RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone + +RUN apt-get update && apt-get install -y software-properties-common + +RUN add-apt-repository ppa:ubuntugis/ppa && \ + apt-get update && \ + apt-get install -y build-essential python3-dev python3-pip \ + jq unzip ca-certificates wget curl git && \ + apt-get autoremove && apt-get autoclean && apt-get clean + +RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 10 + +# See https://github.com/mapbox/rasterio/issues/1289 +ENV CURL_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt + +# Install Python 3.8 +RUN curl -L -O "https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-$(uname)-$(uname -m).sh" \ + && bash "Mambaforge-$(uname)-$(uname -m).sh" -b -p /opt/conda \ + && rm -rf "Mambaforge-$(uname)-$(uname -m).sh" + +ENV PATH /opt/conda/bin:$PATH +ENV LD_LIBRARY_PATH /opt/conda/lib/:$LD_LIBRARY_PATH + +RUN mamba install -y -c conda-forge python=3.8 gdal=3.3.3 pip setuptools cython numpy==1.21.5 + +RUN python -m pip install --upgrade pip + +# Install common packages +COPY requirements-task-base.txt /tmp/requirements.txt +RUN python -m pip install --no-build-isolation -r /tmp/requirements.txt + +# +# Copy and install packages +# + +COPY pctasks/core /opt/src/pctasks/core +RUN cd /opt/src/pctasks/core && \ + pip install . + +COPY pctasks/cli /opt/src/pctasks/cli +RUN cd /opt/src/pctasks/cli && \ + pip install . + +COPY pctasks/task /opt/src/pctasks/task +RUN cd /opt/src/pctasks/task && \ + pip install . + +COPY pctasks/client /opt/src/pctasks/client +RUN cd /opt/src/pctasks/client && \ + pip install . + +COPY pctasks/ingest /opt/src/pctasks/ingest +RUN cd /opt/src/pctasks/ingest && \ + pip install . + +COPY pctasks/dataset /opt/src/pctasks/dataset +RUN cd /opt/src/pctasks/dataset && \ + pip install . + +COPY ./datasets/sentinel-5p/requirements.txt /opt/src/datasets/sentinel-5p/requirements.txt +RUN python3 -m pip install -r /opt/src/datasets/sentinel-5p/requirements.txt + +# Setup Python Path to allow import of test modules +ENV PYTHONPATH=/opt/src:$PYTHONPATH + +WORKDIR /opt/src diff --git a/datasets/sentinel-5p/README.md b/datasets/sentinel-5p/README.md index 1bfda52a..1f2b162a 100644 --- a/datasets/sentinel-5p/README.md +++ b/datasets/sentinel-5p/README.md @@ -1 +1,11 @@ -# Work in progress - in a broken state \ No newline at end of file +# planetary-computer-tasks dataset: sentinel-5p + +Sentinel 5 Precursor + +## Building the Docker image + +To build and push a custom docker image to our container registry: + +```shell +az acr build -r {the registry} --subscription {the subscription} -t pctasks-sentinel-5p:latest -f datasets/sentinel-5p/Dockerfile . +``` diff --git a/datasets/sentinel-5p/collection/description.md b/datasets/sentinel-5p/collection/description.md new file mode 100644 index 00000000..fa391880 --- /dev/null +++ b/datasets/sentinel-5p/collection/description.md @@ -0,0 +1,19 @@ +The Copernicus [Sentinel-5 Precursor](https://sentinels.copernicus.eu/web/sentinel/missions/sentinel-5p) mission provides high spatio-temporal resolution measurements of the Earth's atmosphere. The mission consists of one satellite carrying the [TROPOspheric Monitoring Instrument](http://www.tropomi.eu/) (TROPOMI). The satellite flies in loose formation with NASA's [Suomi NPP](https://www.nasa.gov/mission_pages/NPP/main/index.html) spacecraft, allowing utilization of co-located cloud mask data provided by the [Visible Infrared Imaging Radiometer Suite](https://www.nesdis.noaa.gov/current-satellite-missions/currently-flying/joint-polar-satellite-system/visible-infrared-imaging) (VIIRS) instrument onboard Suomi NPP during processing of the TROPOMI methane product. + +The Sentinel-5 Precursor mission aims to reduce the global atmospheric data gap between the retired [ENVISAT](https://earth.esa.int/eogateway/missions/envisat) and [AURA](https://www.nasa.gov/mission_pages/aura/main/index.html) missions and the future [Sentinel-5](https://sentinels.copernicus.eu/web/sentinel/missions/sentinel-5) mission. Sentinel-5 Precursor [Level 2 data](http://www.tropomi.eu/data-products/level-2-products) provide total columns of ozone, sulfur dioxide, nitrogen dioxide, carbon monoxide and formaldehyde, tropospheric columns of ozone, vertical profiles of ozone and cloud & aerosol information. These measurements are used for improving air quality forecasts and monitoring the concentrations of atmospheric constituents. + +This STAC Collection provides Sentinel-5 Precursor Level 2 data, in NetCDF format, since April 2018 for the following products: + +* [`L2__AER_AI`](http://www.tropomi.eu/data-products/uv-aerosol-index): Ultraviolet aerosol index +* [`L2__AER_LH`](http://www.tropomi.eu/data-products/aerosol-layer-height): Aerosol layer height +* [`L2__CH4___`](http://www.tropomi.eu/data-products/methane): Methane (CH4) total column +* [`L2__CLOUD_`](http://www.tropomi.eu/data-products/cloud): Cloud fraction, albedo, and top pressure +* [`L2__CO____`](http://www.tropomi.eu/data-products/carbon-monoxide): Carbon monoxide (CO) total column +* [`L2__HCHO__`](http://www.tropomi.eu/data-products/formaldehyde): Formaldehyde (HCHO) total column +* [`L2__NO2___`](http://www.tropomi.eu/data-products/nitrogen-dioxide): Nitrogen dioxide (NO2) total column +* [`L2__O3____`](http://www.tropomi.eu/data-products/total-ozone-column): Ozone (O3) total column +* [`L2__O3_TCL`](http://www.tropomi.eu/data-products/tropospheric-ozone-column): Ozone (O3) tropospheric column +* [`L2__SO2___`](http://www.tropomi.eu/data-products/sulphur-dioxide): Sulfur dioxide (SO2) total column +* [`L2__NP_BD3`](http://www.tropomi.eu/data-products/auxiliary): Cloud from the Suomi NPP mission, band 3 +* [`L2__NP_BD6`](http://www.tropomi.eu/data-products/auxiliary): Cloud from the Suomi NPP mission, band 6 +* [`L2__NP_BD7`](http://www.tropomi.eu/data-products/auxiliary): Cloud from the Suomi NPP mission, band 7 diff --git a/datasets/sentinel-5p/collection/template.json b/datasets/sentinel-5p/collection/template.json new file mode 100755 index 00000000..db13cd24 --- /dev/null +++ b/datasets/sentinel-5p/collection/template.json @@ -0,0 +1,247 @@ +{ + "stac_version": "1.0.0", + "type": "Collection", + "id": "sentinel-5p-l2-netcdf", + "title": "Sentinel-5P Level-2", + "description": "{{ collection.description }}", + "license": "proprietary", + "links": [ + { + "rel": "license", + "href": "https://sentinel.esa.int/documents/247904/690755/Sentinel_Data_Legal_Notice", + "type": "application/pdf", + "title": "Sentinel Data License" + }, + { + "rel": "about", + "href": "https://sentinel.esa.int/web/sentinel/missions/sentinel-5p", + "type": "text/html", + "title": "Sentinel-5 Precursor Mission" + } + ], + "stac_extensions": [ + "https://stac-extensions.github.io/sat/v1.0.0/schema.json", + "https://stac-extensions.github.io/table/v1.2.0/schema.json", + "https://stac-extensions.github.io/item-assets/v1.0.0/schema.json" + ], + "keywords": [ + "ESA", + "Copernicus", + "Sentinel", + "Air Quality", + "Climate Change", + "Forecasting" + ], + "msft:short_description": "Sentinel-5P Level 2 atmospheric monitoring products in NetCDF format", + "msft:storage_account": "sentinel5euwest", + "msft:container": "sentinel-5p", + "msft:region": "westeurope", + "providers": [ + { + "name": "ESA", + "roles": [ + "producer", + "processor", + "licensor" + ], + "url": "https://earth.esa.int/web/guest/home" + }, + { + "name": "Microsoft", + "roles": [ + "host" + ], + "url": "https://planetarycomputer.microsoft.com" + } + ], + "assets": { + "thumbnail": { + "title": "Sentinel-5P Level-2 NetCDF Thumbnail", + "href": "https://ai4edatasetspublicassets.blob.core.windows.net/assets/pc_thumbnails/sentinel-5p-l2-netcdf-thumb.png", + "media_type": "image/png" + }, + "geoparquet-items": { + "href": "abfs://items/sentinel-5p-l2-netcdf.parquet", + "title": "GeoParquet STAC Items", + "description": "Snapshot of the collection's STAC items exported to GeoParquet format.", + "type": "application/x-parquet", + "roles": [ + "stac-items" + ], + "table:storage_options": { + "account_name": "pcstacitems" + }, + "msft:partition_info": { + "is_partitioned": true, + "partition_frequency": "MS" + } + } + }, + "summaries": { + "constellation": [ + "Sentinel-5P" + ], + "platform": [ + "Sentinel 5 Precursor" + ], + "instruments": [ + "TROPOMI" + ], + "sat:platform_international_designator": [ + "2017-064A" + ], + "s5p:collection_identifier": [ + "01", + "02", + "03" + ], + "s5p:processing_mode": [ + "NRTI", + "OFFL", + "RPRO" + ], + "s5p:product_type": [ + "L2__AER_AI", + "L2__AER_LH", + "L2__CH4___", + "L2__CLOUD_", + "L2__CO____", + "L2__HCHO__", + "L2__NO2___", + "L2__NP_BD3", + "L2__NP_BD6", + "L2__NP_BD7", + "L2__O3_TCL", + "L2__O3____", + "L2__SO2___" + ], + "s5p:product_name": [ + "aer-ai", + "aer-lh", + "ch4", + "cloud", + "co", + "hcho", + "no2", + "np-bd3", + "np-bd6", + "np-bd7", + "o3-tcl", + "o3", + "so2" + ] + }, + "extent": { + "spatial": { + "bbox": [ + [ + -180, + -90, + 180, + 90 + ] + ] + }, + "temporal": { + "interval": [ + [ + "2018-04-30T00:18:50Z", + null + ] + ] + } + }, + "item_assets": { + "aer-ai": { + "title": "Ultraviolet Aerosol Index", + "type": "application/x-netcdf", + "roles": [ + "data" + ] + }, + "aer-lh": { + "title": "Aerosol Layer Height", + "type": "application/x-netcdf", + "roles": [ + "data" + ] + }, + "ch4": { + "title": "Methane Total Column", + "type": "application/x-netcdf", + "roles": [ + "data" + ] + }, + "cloud": { + "title": "Cloud Fraction, Albedo, and Top Pressure", + "type": "application/x-netcdf", + "roles": [ + "data" + ] + }, + "co": { + "title": "Carbon Monoxide Total Column", + "type": "application/x-netcdf", + "roles": [ + "data" + ] + }, + "hcho": { + "title": "Formaldehyde Total Column", + "type": "application/x-netcdf", + "roles": [ + "data" + ] + }, + "no2": { + "title": "Nitrogen Dioxide Total Column", + "type": "application/x-netcdf", + "roles": [ + "data" + ] + }, + "o3": { + "title": "Ozone Total Column", + "type": "application/x-netcdf", + "roles": [ + "data" + ] + }, + "o3-tcl": { + "title": "Ozone Tropospheric Column", + "type": "application/x-netcdf", + "roles": [ + "data" + ] + }, + "so2": { + "title": "Sulphur Dioxide Total Column", + "type": "application/x-netcdf", + "roles": [ + "data" + ] + }, + "np-bd3": { + "title": "VIIRS/NPP Band 3 Cloud Mask", + "type": "application/x-netcdf", + "roles": [ + "data" + ] + }, + "np-bd6": { + "title": "VIIRS/NPP Band 6 Cloud Mask", + "type": "application/x-netcdf", + "roles": [ + "data" + ] + }, + "np-bd7": { + "title": "VIIRS/NPP Band 7 Cloud Mask", + "type": "application/x-netcdf", + "roles": [ + "data" + ] + } + } +} \ No newline at end of file diff --git a/datasets/sentinel-5p/dataset.yaml b/datasets/sentinel-5p/dataset.yaml index 8e9e9cd6..a68c7bb6 100644 --- a/datasets/sentinel-5p/dataset.yaml +++ b/datasets/sentinel-5p/dataset.yaml @@ -1,12 +1,42 @@ -id: sentinel-1-grd +id: sentinel_5p +image: ${{ args.registry }}/pctasks-sentinel-5p:latest + +args: + - registry + +code: + src: ${{ local.path(./sentinel_5p.py) }} + +environment: + AZURE_TENANT_ID: ${{ secrets.task-tenant-id }} + AZURE_CLIENT_ID: ${{ secrets.task-client-id }} + AZURE_CLIENT_SECRET: ${{ secrets.task-client-secret }} + collections: - id: sentinel-1-grd - containers: - assets: sentinel1euwest/s1-grd - stac: sentinel1euwest/s1-grd-stac - etl-data: sentinel1euwest/s1-grd-etl-info -triggers: - storage: - account: sentinel1euwest - container: s1-grd-stac - filter: "*.json" + - id: sentinel-5p-l2-netcdf + template: ${{ local.path(./collection) }} + class: sentinel_5p:Sentinel5pNetCDFCollection + asset_storage: + # The blob storage pattern is + # + # | sentinel-5p-st2ac/ + # | TROPOMI/ + # | L2_AER_AI/ + # | 2018/ + # | 06/ + # | 28/ + # | ... + # | L2_AER_LH + # | ... + # | + # We want to split by product (L2_AER_AI) + - uri: blob://sentinel5euwest/sentinel-5p-stac/TROPOMI/ + token: ${{ pc.get_token(sentinel5euwest, sentinel-5p-stac) }} + chunks: + options: + extensions: [.json] + chunk_length: 5000 + splits: + - depth: 1 + chunk_storage: + uri: blob://sentinel5euwest/sentinel-5p-etl-data/pctasks-chunks/ diff --git a/datasets/sentinel-5p/requirements.txt b/datasets/sentinel-5p/requirements.txt new file mode 100644 index 00000000..f546863c --- /dev/null +++ b/datasets/sentinel-5p/requirements.txt @@ -0,0 +1 @@ +antimeridian==0.2.2 \ No newline at end of file diff --git a/datasets/sentinel-5p/s5/__init__.py b/datasets/sentinel-5p/s5/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/datasets/sentinel-5p/sentinel-5p-l2.json b/datasets/sentinel-5p/sentinel-5p-l2.json deleted file mode 100755 index 98dd3c13..00000000 --- a/datasets/sentinel-5p/sentinel-5p-l2.json +++ /dev/null @@ -1,132 +0,0 @@ -{ - "type": "Collection", - "id": "sentinel-5p-l2", - "stac_version": "1.0.0", - "description": "The Copernicus [Sentinel-5 Precursor mission](https://sentinels.copernicus.eu/web/sentinel/missions/sentinel-5p) is dedicated to monitoring our atmosphere and consists of one satellite carrying the TROPOspheric Monitoring Instrument (TROPOMI). Sentinel-5 Precursor mission aims to fill in the global atmospheric data gap between the retired ENVISAT and AURA missions and the future Sentinel-5 mission. The main objective of TROPOMI is to provide daily global observations of key atmospheric constituents related to air quality, ozone layer and climate change monitoring and forecasting. Level 2 data provide total columns of ozone, sulfur dioxide, nitrogen dioxide, carbon monoxide and formaldehyde, tropospheric columns of ozone, vertical profiles of ozone and cloud & aerosol information. Level 2 data are available since April 2018. These measurements are used for improving air quality forecasts as well as for monitoring the concentrations of atmospheric constituents.", - "links": [ - { - "rel": "license", - "href": "https://sentinel.esa.int/documents/247904/690755/Sentinel_Data_Legal_Notice" - }, - { - "rel": "root", - "href": "./sentinel-5p-l2.json", - "type": "application/json" - }, - { - "rel": "self", - "href": "https://sentinel5euwest.blob.core.windows.net/sentinel-5p-stac/sentinel-5p-l2.json", - "type": "application/json" - } - ], - "stac_extensions": [ - "https://stac-extensions.github.io/sat/v1.0.0/schema.json" - ], - "extent": { - "spatial": { - "bbox": [ - [ - -180, - -85, - 180, - 85 - ] - ] - }, - "temporal": { - "interval": [ - [ - "2018-04-30T00:18:50Z", - null - ] - ] - } - }, - "license": "proprietary", - "keywords": [ - "eu", - "esa", - "copernicus", - "sentinel", - "systematic", - "aerosols", - "air quality", - "climate change", - "ozone", - "forecasting", - "N02" - ], - "providers": [ - { - "name": "ESA", - "roles": [ - "producer", - "processor", - "licensor" - ], - "url": "https://earth.esa.int/web/guest/home" - }, - { - "name": "Microsoft", - "roles": [ - "host", - "processor" - ], - "url": "https://planetarycomputer.microsoft.com" - } - ], - "msft:container": "sentinel-5p", - "msft:region": "westeurope", - "msft:storage_account": "sentinel5euwest", - "msft:short_description": "The Copernicus [Sentinel-5 Precursor mission](https://sentinels.copernicus.eu/web/sentinel/missions/sentinel-5p) is dedicated to monitoring our atmosphere and consists of one satellite carrying the TROPOspheric Monitoring Instrument (TROPOMI). Level 2 data provide total columns of ozone, sulfur dioxide, nitrogen dioxide, carbon monoxide and formaldehyde, tropospheric columns of ozone, vertical profiles of ozone and cloud & aerosol information.", - "summaries": { - "constellation": [ - "Sentinel-5P" - ], - "platform" : [ - "Sentinel 5 Precursor" - ], - "instruments" : [ - "TROPOMI" - ], - "sat:platform_international_designator": [ - "2017-064A" - ], - "sat:absolute_orbit" : { - "minimum": 1, - "maximum": 2147483647 - }, - "s5p:processing_mode": [ - "NRTI", - "OFFL", - "RPRO" - ], - "s5p:product_type": [ - "L2__AER_AI", - "L2__AER_LH", - "L2__CH4___", - "L2__CLOUD_", - "L2__CO____", - "L2__HCHO__", - "L2__NO2___", - "L2__NP_BD3", - "L2__NP_BD6", - "L2__NP_BD7", - "L2__O3____", - "L2__O3_TCL", - "L2__SO2___" - ], - "s5p:spatial_resolution": [ - "5.5x3.5 km2", - "5.5x7 km2", - "7x3.5 km2", - "7x7 km2" - ], - "s5p:shape": { - "dimensions": "scanline x ground_pixel", - "scanline": "The dimension that indicates the flight direction", - "ground_pixel": "The dimension perpendicular to the flight direction" - } - }, - "title": "Sentinel-5P Level-2" -} diff --git a/datasets/sentinel-5p/sentinel_5p.py b/datasets/sentinel-5p/sentinel_5p.py new file mode 100644 index 00000000..5355f489 --- /dev/null +++ b/datasets/sentinel-5p/sentinel_5p.py @@ -0,0 +1,198 @@ +import logging +import re +from pathlib import Path +from typing import Any, List, Union + +import antimeridian +import pystac +import shapely.geometry +from shapely.geometry import Polygon + +from pctasks.core.models.task import WaitTaskResult +from pctasks.core.storage import StorageFactory +from pctasks.dataset.collection import Collection + +FILENAME_EXPR = re.compile( + r"S5P_(?P[A-Z]{4})_L(?P[0-9]{1})_(?P.{7})_" + r"(?P[0-9,A-Z]{15})_(?P[0-9,A-Z]{15})_" + r"(?P[0-9]{5})_(?P[0-9]{2})_(?P[0-9]{6})_" + r"(?P[0-9,A-Z]{15})" +) + +ABOUT_LINKS = { + "L2__AER_AI": "http://www.tropomi.eu/data-products/uv-aerosol-index", + "L2__AER_LH": "http://www.tropomi.eu/data-products/aerosol-layer-height", + "L2__CH4___": "http://www.tropomi.eu/data-products/methane", + "L2__CLOUD_": "http://www.tropomi.eu/data-products/cloud", + "L2__CO____": "http://www.tropomi.eu/data-products/carbon-monoxide", + "L2__HCHO__": "http://www.tropomi.eu/data-products/formaldehyde", + "L2__NO2___": "http://www.tropomi.eu/data-products/nitrogen-dioxide", + "L2__O3____": "http://www.tropomi.eu/data-products/total-ozone-column", + "L2__O3_TCL": "http://www.tropomi.eu/data-products/tropospheric-ozone-column", + "L2__SO2___": "http://www.tropomi.eu/data-products/sulphur-dioxide", + "L2__NP_BD3": "https://sentinel.esa.int/web/sentinel/technical-guides/sentinel-5p/products-algorithms", # noqa + "L2__NP_BD6": "https://sentinel.esa.int/web/sentinel/technical-guides/sentinel-5p/products-algorithms", # noqa + "L2__NP_BD7": "https://sentinel.esa.int/web/sentinel/technical-guides/sentinel-5p/products-algorithms", # noqa +} + +ASSET_TITLES = { + "L2__AER_AI": "Ultraviolet Aerosol Index", + "L2__AER_LH": "Aerosol Layer Height", + "L2__CH4___": "Methane Total Column", + "L2__CLOUD_": "Cloud Fraction, Albedo, and Top Pressure", + "L2__CO____": "Carbon Monoxide Total Column", + "L2__HCHO__": "Formaldehyde Total Column", + "L2__NO2___": "Nitrogen Dioxide Total Column", + "L2__O3____": "Ozone Total Column", + "L2__O3_TCL": "Ozone Tropospheric Column", + "L2__SO2___": "Sulphur Dioxide Total Column", + "L2__NP_BD3": "VIIRS/NPP Band 3 Cloud Mask", + "L2__NP_BD6": "VIIRS/NPP Band 6 Cloud Mask", + "L2__NP_BD7": "VIIRS/NPP Band 7 Cloud Mask", +} + +O3_TCL_GEOMETRY = shapely.geometry.mapping( + Polygon([(-180, -19.75), (180, -19.75), (180, 19.75), (-180, 19.75)]) +) +O3_TCL_BBOX = [-180, -19.75, 180, 19.75] + +handler = logging.StreamHandler() +handler.setFormatter(logging.Formatter("[%(levelname)s]:%(asctime)s: %(message)s")) +handler.setLevel(logging.INFO) +logger = logging.getLogger(__name__) +logger.addHandler(handler) +logger.setLevel(logging.INFO) + + +def recursive_round(coordinates: List[Any], precision: int) -> List[Any]: + """Rounds a list of numbers. The list can contain additional nested lists + or tuples of numbers. + + Any tuples encountered will be converted to lists. + + Args: + coordinates (List[Any]): A list of numbers, possibly containing nested + lists or tuples of numbers. + precision (int): Number of decimal places to use for rounding. + + Returns: + List[Any]: The list of numbers rounded to the given precision. + """ + rounded: List[Any] = [] + for value in coordinates: + if isinstance(value, (int, float)): + rounded.append(round(value, precision)) + else: + rounded.append(recursive_round(list(value), precision)) + return rounded + + +class Sentinel5pNetCDFCollection(Collection): # type: ignore + @classmethod + def create_item( + cls, asset_uri: str, storage_factory: StorageFactory + ) -> Union[List[pystac.Item], WaitTaskResult]: + + storage, json_path = storage_factory.get_storage_for_file(asset_uri) + item_dict = storage.read_json(json_path) + + netcdf_filename = item_dict["assets"]["data"]["href"] + match = FILENAME_EXPR.match(Path(netcdf_filename).stem) + if not match: + raise ValueError(f"Could not parse filename {Path(json_path).stem}") + + prefix = match.groupdict()["product"].strip("_").lower() + if prefix.startswith("np"): + prefix = prefix.replace("_", "") + + collection_identifier = match.groupdict()["collection"] + product_type = item_dict["properties"]["s5p:product_type"] + product_name = product_type.lstrip("L2_").rstrip("_").lower().replace("_", "-") + + # ---- PROPERTIES ---- + properties = item_dict.pop("properties") + + # providers should be supplied in the collection, not the item + properties.pop("providers", None) + + # combine the product custom properties to a single object + product_custom_fields = {} + keys = [k for k in properties.keys() if str(k).startswith(prefix)] + if keys: + for key in keys: + product_custom_fields[ + str(key).replace(f"{prefix}:", "") + ] = properties.pop(key) + properties[f"s5p:{prefix}"] = product_custom_fields + + # add sentinel-5p collection_identifier + properties["s5p:collection_identifier"] = collection_identifier + + # convert spatial resolution to meters, store in list to match sentinel-3 + # order is [height, width], aka [along track, across track] + resolution = properties["s5p:spatial_resolution"] + resolution = resolution.strip().strip("km2").strip("km").strip() + parts = resolution.split("x") + assert len(parts) == 2 + properties["s5p:spatial_resolution"] = [int(float(p)) * 1000 for p in parts] + + # correct bad datetimes + for k, v in properties.items(): + if k.endswith("datetime") and v.endswith("ZZ"): + properties[k] = v[:-2] + "Z" + if f"s5p:{prefix}" in properties: + for k, v in properties[f"s5p:{prefix}"].items(): + if k.endswith("datetime") and v.endswith("ZZ"): + properties[f"s5p:{prefix}"][k] = v[:-2] + "Z" + + # add a user-friendly product name + properties["s5p:product_name"] = product_name + + item_dict["properties"] = properties + + # ---- ASSETS ---- + asset = item_dict["assets"].pop("data") + + # the supplied asset description is too brief to be a description and + # too inconsistent to use as a title; use a custom title instead + asset.pop("description") + asset["title"] = ASSET_TITLES[product_type] + + item_dict["assets"][product_name] = asset + + # ---- LINKS ---- + links = item_dict.pop("links") + + # license is the same for all items; include on the collection, not the item + for link in links: + if link["rel"] == "license": + links.remove(link) + + # add a unique link to the product landing page + links.append( + { + "rel": "about", + "href": ABOUT_LINKS.get(product_type), + "type": "text/html", + } + ) + + item_dict["links"] = links + + # ---- GEOMETRY ---- + # fix antimeridian, except for o3_tcl, where we do some hardcode hacks instead + item = pystac.Item.from_dict(item_dict) + if product_name == "o3-tcl": + item.geometry = O3_TCL_GEOMETRY + item.bbox = O3_TCL_BBOX + else: + polygon = shapely.geometry.shape(item.geometry) + geometry = antimeridian.fix_polygon(polygon) + item.bbox = list(geometry.bounds) + item.geometry = shapely.geometry.mapping(geometry) + item.bbox = recursive_round(item.bbox, precision=6) + item.geometry["coordinates"] = recursive_round( # type: ignore + list(item.geometry["coordinates"]), precision=6 # type: ignore + ) + + return [item] diff --git a/datasets/sentinel-5p/summarize-wf.yaml b/datasets/sentinel-5p/summarize-wf.yaml deleted file mode 100644 index 5d28333d..00000000 --- a/datasets/sentinel-5p/summarize-wf.yaml +++ /dev/null @@ -1,71 +0,0 @@ -name: Summarize Sentinel 5P L2 Items - -dataset: - owner: microsoft - name: sentinel-5p-l2 - -args: -- registry - -jobs: - create-splits: - tasks: - - id: list-prefixes - image: ${{ args.registry }}/pctasks-task-base:latest - task: pctasks.task.common.list_prefixes:task - args: - src_uri: blob://sentinel5euwest/sentinel-5p-stac - depth: 4 - environment: - AZURE_TENANT_ID: ${{ secrets.task-tenant-id }} - AZURE_CLIENT_ID: ${{ secrets.task-client-id }} - AZURE_CLIENT_SECRET: ${{ secrets.task-client-secret }} - schema_version: 1.0.0 - summarize-map: - foreach: - items: ${{ jobs.create-splits.tasks.list-prefixes.output.uris }} - tasks: - - id: list-files - image: ${{ args.registry }}/pctasks-task-base:latest - task: pctasks.task.common.list_files:task - args: - src_uri: ${{ item }} - extensions: - - .json - environment: - AZURE_TENANT_ID: ${{ secrets.task-tenant-id }} - AZURE_CLIENT_ID: ${{ secrets.task-client-id }} - AZURE_CLIENT_SECRET: ${{ secrets.task-client-secret }} - schema_version: 1.0.0 - - id: summarize-map - image: ${{ args.registry }}/pctasks-task-base:latest - task: pctasks.task.common.summarize:map_task - args: - uris: ${{ tasks.list-files.output.uris }} - include_keys: - - assets - - properties - environment: - AZURE_TENANT_ID: ${{ secrets.task-tenant-id }} - AZURE_CLIENT_ID: ${{ secrets.task-client-id }} - AZURE_CLIENT_SECRET: ${{ secrets.task-client-secret }} - schema_version: 1.0.0 - summarize-reduce: - tasks: - - id: summarize-reduce - image: ${{ args.registry }}/pctasks-task-base:latest - task: pctasks.task.common.summarize:reduce_task - args: - summaries: ${{ jobs.summarize-map.tasks.summarize-map.output.summary }} - - id: write-output - image: ${{ args.registry }}/pctasks-task-base:latest - task: pctasks.task.common.write:task - args: - uri: blob://sentinel5euwest/sentinel-5p-etl-data/summaries/2022-09-08.json - content: ${{ tasks.summarize-reduce.output.summary }} - environment: - AZURE_TENANT_ID: ${{ secrets.task-tenant-id }} - AZURE_CLIENT_ID: ${{ secrets.task-client-id }} - AZURE_CLIENT_SECRET: ${{ secrets.task-client-secret }} - schema_version: 1.0.0 -