From f46d57dcc403d3e689488815038fc94befe60241 Mon Sep 17 00:00:00 2001 From: Nathan Clerkx Date: Fri, 15 Dec 2023 15:59:48 +0100 Subject: [PATCH] Basic report functionality --- App/functions/report-python-cloud-run/main.py | 18 ++- .../report/datasets/__init__.py | 0 .../report/datasets/base_dataset.py | 14 ++- .../report/datasets/datasetcontent.py | 11 ++ .../report/datasets/esl.py | 104 ++++++++++++++++++ .../report-python-cloud-run/report/report.py | 77 +++++++++++-- .../report/template.html.jinja | 11 +- .../report/utils/__init__.py | 0 .../report/utils/stac.py | 20 +++- .../report/utils/zarr_slicing.py | 2 +- .../report-python-cloud-run/requirements.txt | 7 +- 11 files changed, 241 insertions(+), 23 deletions(-) create mode 100644 App/functions/report-python-cloud-run/report/datasets/__init__.py create mode 100644 App/functions/report-python-cloud-run/report/datasets/datasetcontent.py create mode 100644 App/functions/report-python-cloud-run/report/utils/__init__.py diff --git a/App/functions/report-python-cloud-run/main.py b/App/functions/report-python-cloud-run/main.py index d6130d113..239efe61d 100644 --- a/App/functions/report-python-cloud-run/main.py +++ b/App/functions/report-python-cloud-run/main.py @@ -1,16 +1,28 @@ from io import BytesIO +import json import os -from flask import Flask, make_response +from shapely import Polygon # type: ignore +from shapely.geometry import shape # type: ignore +from flask import Flask, make_response, request -from report.report import create_report_pdf +from report.report import ( + create_report_html, + create_report_pdf, + POLYGON_DEFAULT, + STAC_ROOT_DEFAULT, +) app = Flask(__name__) @app.route("/") def return_report(): - pdf_object: BytesIO = create_report_pdf() + polygon_str = POLYGON_DEFAULT + geo: dict = json.loads(polygon_str) + polygon: Polygon = shape(geo) + web_page_content = create_report_html(polygon=polygon, stac_root=STAC_ROOT_DEFAULT) + pdf_object = create_report_pdf(web_page_content) response = make_response(pdf_object.getvalue()) response.headers["Content-Type"] = "application/pdf" diff --git a/App/functions/report-python-cloud-run/report/datasets/__init__.py b/App/functions/report-python-cloud-run/report/datasets/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/App/functions/report-python-cloud-run/report/datasets/base_dataset.py b/App/functions/report-python-cloud-run/report/datasets/base_dataset.py index a26a968f6..39313b39e 100644 --- a/App/functions/report-python-cloud-run/report/datasets/base_dataset.py +++ b/App/functions/report-python-cloud-run/report/datasets/base_dataset.py @@ -1 +1,13 @@ -from abc import ABC +from typing import Optional +import xarray as xr + +from .datasetcontent import DatasetContent +from .esl import get_esl_content + + +def get_dataset_content(dataset_id: str, xarr: xr.Dataset) -> Optional[DatasetContent]: + match dataset_id: + case "esl_gwl": + return get_esl_content(xarr) + case _: + return None diff --git a/App/functions/report-python-cloud-run/report/datasets/datasetcontent.py b/App/functions/report-python-cloud-run/report/datasets/datasetcontent.py new file mode 100644 index 000000000..12f7679a3 --- /dev/null +++ b/App/functions/report-python-cloud-run/report/datasets/datasetcontent.py @@ -0,0 +1,11 @@ +from dataclasses import dataclass +from typing import Optional + + +@dataclass +class DatasetContent: + dataset_id: str + title: str + text: str + image_base64: Optional[str] = None + image_svg: Optional[str] = None diff --git a/App/functions/report-python-cloud-run/report/datasets/esl.py b/App/functions/report-python-cloud-run/report/datasets/esl.py index e69de29bb..d3c2dc02c 100644 --- a/App/functions/report-python-cloud-run/report/datasets/esl.py +++ b/App/functions/report-python-cloud-run/report/datasets/esl.py @@ -0,0 +1,104 @@ +import base64 +from io import BytesIO, StringIO +import matplotlib + +matplotlib.use("Agg") +from matplotlib import colors +from matplotlib import pyplot as plt +import numpy as np +import xarray as xr + +import geopandas as gpd + +from .datasetcontent import DatasetContent + + +def get_esl_content(xarr: xr.Dataset) -> DatasetContent: + """Get content for ESL dataset""" + dataset_id = "esl" + title = "Extreme Sea Level" + text = "Here we generate some content based on the ESL dataset" + + image_base64 = create_esl_plot(xarr) + return DatasetContent( + dataset_id=dataset_id, + title=title, + text=text, + image_base64=image_base64, + ) + + +def create_esl_plot(xarr): + GWL = 0 # look at ds.gwl.values for options + GWLs = "present-day" + # ens = 50 # look at ds.ensemble.values for options + rp = 50.0 # look at ds.rp.values for options + world = gpd.read_file( + """https://public.opendatasoft.com/api/explore/v2.1/catalog/datasets/world-administrative-boundaries/exports/shp?lang=en&timezone=Europe%2FBerlin""" + ) + cmap = matplotlib.cm.RdYlGn_r + norm = colors.BoundaryNorm(np.arange(0, 7.5, 0.5), cmap.N) + ds_fil = xarr.sel(gwl=GWL, rp=rp) # filter the other params + lonmin = min(ds_fil.lon.values) + lonmax = max(ds_fil.lon.values) + latmin = min(ds_fil.lat.values) + latmax = max(ds_fil.lat.values) + fig, ax = plt.subplots() + fig.set_size_inches(15, 8) # fig.set_size_inches(15, 20) + base = world.boundary.plot( + ax=ax, edgecolor="grey", facecolor="grey", alpha=0.1, zorder=0 + ) + im1 = ax.scatter( + ds_fil.lon.values, + ds_fil.lat.values, + 10 * ds_fil.sel(ensemble=5).esl.values, + ds_fil.sel(ensemble=5).esl.values, + cmap=cmap, + norm=norm, + zorder=1, + ) + # plt.set_clim(0,5) + im2 = ax.scatter( + ds_fil.lon.values, + ds_fil.lat.values + 0.1, + 10 * ds_fil.sel(ensemble=50).esl.values, + ds_fil.sel(ensemble=50).esl.values, + cmap=cmap, + norm=norm, + zorder=1, + ) + im3 = ax.scatter( + ds_fil.lon.values, + ds_fil.lat.values + 0.2, + 10 * ds_fil.sel(ensemble=95).esl.values, + ds_fil.sel(ensemble=95).esl.values, + cmap=cmap, + norm=norm, + zorder=1, + ) + ax.set_title("%s-year extreme sea level for %s global warming level" % (rp, GWLs)) + ax.axis("square") + ax.set( + xlabel="lon", + ylabel="lat", + xlim=[lonmin - 2, lonmax + 2], + ylim=[latmin - 2, latmax + 2], + ) + # fig.colorbar(im1, ax=ax) + im1.set_clim(0, 7) + + cax = fig.add_axes( + [ + ax.get_position().x1 + 0.01, + ax.get_position().y0, + 0.02, + ax.get_position().height, + ] + ) # to give colorbar own axes + plt.colorbar(im1, cax=cax) # Similar to fig.colorbar(im, cax = cax) + cax.set_title("ESL in meters") + # + imgdata = BytesIO() + fig.savefig(imgdata, format="png") + + return base64.b64encode(imgdata.getbuffer()).decode("ascii") diff --git a/App/functions/report-python-cloud-run/report/report.py b/App/functions/report-python-cloud-run/report/report.py index 3b5891069..dc77bb7ae 100644 --- a/App/functions/report-python-cloud-run/report/report.py +++ b/App/functions/report-python-cloud-run/report/report.py @@ -1,22 +1,46 @@ +from dataclasses import dataclass from io import BytesIO from pathlib import Path -import fitz -from shapely import Polygon +import fitz # type: ignore +import jinja2 +from shapely import Polygon # type: ignore +from report.utils.stac import STACClientGCA, ZarrDataset +from report.utils.zarr_slicing import ZarrSlicer +from report.datasets.datasetcontent import DatasetContent +from report.datasets.base_dataset import get_dataset_content -def create_report_html(): - pass +POLYGON_DEFAULT = """{"coordinates":[[[2.3915028831735015,51.7360381463356],[5.071438932343227,50.89406012060684],[6.955992986278972,51.49577449585874],[7.316959036046541,53.18700330195111],[6.636226617140238,53.961350092621075],[3.8631377106468676,54.14643052276938],[2.1218958391276317,53.490771261555096],[2.3915028831735015,51.7360381463356]]],"type":"Polygon"}""" +STAC_ROOT_DEFAULT = "https://raw.githubusercontent.com/openearth/global-coastal-atlas/subsidence_etienne/STAC/data/current/catalog.json" -def create_report_pdf() -> BytesIO: - story = fitz.Story() - htmlpath = Path(__file__).parent / Path("template.html") +@dataclass +class WebPageContent: + html: str + css: str + + +@dataclass +class ReportContent: + datasets: list[DatasetContent] + + +def create_report_html(polygon: Polygon, stac_root: str) -> WebPageContent: + htmlpath = Path(__file__).parent / Path("template.html.jinja") csspath = Path(__file__).parent / Path("template.css") - HTML = htmlpath.read_bytes().decode() - CSS = csspath.read_bytes().decode() + with htmlpath.open() as f: + template = jinja2.Template(f.read()) + + data = generate_report_content(polygon=polygon, stac_root=stac_root) + html = template.render(data=data) + css: str = csspath.read_bytes().decode() + + return WebPageContent(html=html, css=css) - story = fitz.Story(html=HTML, user_css=CSS) + +def create_report_pdf(page_content: WebPageContent) -> BytesIO: + story = fitz.Story(html=page_content.html, user_css=page_content.css) MEDIABOX = fitz.paper_rect("A4") # output page format: Letter WHERE = MEDIABOX + (36, 36, -36, -36) # leave borders of 0.5 inches @@ -34,6 +58,37 @@ def create_report_pdf() -> BytesIO: return in_memory_pdf +def generate_report_content(polygon: Polygon, stac_root: str) -> ReportContent: + gca_client = STACClientGCA.open(stac_root) + zarr_datasets: list[ZarrDataset] = gca_client.get_all_zarr_uris() # type: ignore + + dataset_contents: list[DatasetContent] = [] + for zarr_dataset in zarr_datasets: + xarr = ZarrSlicer._get_dataset_from_zarr_url(zarr_dataset.zarr_uri) + sliced_xarr = ZarrSlicer.slice_xarr_with_polygon(xarr, polygon) + if ZarrSlicer.check_xarr_contains_data(sliced_xarr): + dataset_content = get_dataset_content(zarr_dataset.dataset_id, sliced_xarr) + if dataset_content: + dataset_contents.append(dataset_content) + + return ReportContent(datasets=dataset_contents) + + if __name__ == "__main__": - pdf = create_report_pdf() + polygon = Polygon( + [ + [2.3915028831735015, 51.7360381463356], + [5.071438932343227, 50.89406012060684], + [6.955992986278972, 51.49577449585874], + [7.316959036046541, 53.18700330195111], + [6.636226617140238, 53.961350092621075], + [3.8631377106468676, 54.14643052276938], + [2.1218958391276317, 53.490771261555096], + [2.3915028831735015, 51.7360381463356], + ] + ) + + pdf = create_report_pdf( + create_report_html(polygon=polygon, stac_root=STAC_ROOT_DEFAULT) + ) print(pdf.getvalue()) diff --git a/App/functions/report-python-cloud-run/report/template.html.jinja b/App/functions/report-python-cloud-run/report/template.html.jinja index c80a56413..081bcbd65 100644 --- a/App/functions/report-python-cloud-run/report/template.html.jinja +++ b/App/functions/report-python-cloud-run/report/template.html.jinja @@ -1,4 +1,13 @@

Coastal report

In this report some coastal characteristics of the selected area are explained.

-{% for dataset in datasets %} \ No newline at end of file +{% for dataset in data.datasets %} +

{{ dataset.title }}

+ {% if dataset.image_svg %} + + {% endif %} +

{{ dataset.text }}

+ {% if dataset.image_base64 %} + + {% endif %} +{% endfor %} \ No newline at end of file diff --git a/App/functions/report-python-cloud-run/report/utils/__init__.py b/App/functions/report-python-cloud-run/report/utils/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/App/functions/report-python-cloud-run/report/utils/stac.py b/App/functions/report-python-cloud-run/report/utils/stac.py index 7d02bd8c8..0e51100b3 100644 --- a/App/functions/report-python-cloud-run/report/utils/stac.py +++ b/App/functions/report-python-cloud-run/report/utils/stac.py @@ -1,18 +1,28 @@ -from collections import namedtuple +from dataclasses import dataclass from pystac_client import Client -Zarr_dataset = namedtuple("Zarr_dataset", ["dataset_id", "zarr_uri"]) + +@dataclass +class ZarrDataset: + dataset_id: str + zarr_uri: str class STACClientGCA(Client): - def get_all_zarr_uris(self) -> list[Zarr_dataset]: + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def get_all_zarr_uris(self) -> list[ZarrDataset]: collections = self.get_collections() zarr_datasets = [] for collection in collections: # we only look at collections that have a child links - if collection.get_child_links(): + if collection.get_item_links(): zarr_datasets.append( - Zarr_dataset(collection.id, collection.assets["data"].href) + ZarrDataset( + dataset_id=collection.id, + zarr_uri=collection.assets["data"].href, + ) ) return zarr_datasets diff --git a/App/functions/report-python-cloud-run/report/utils/zarr_slicing.py b/App/functions/report-python-cloud-run/report/utils/zarr_slicing.py index e3b044201..a14d3b7ff 100644 --- a/App/functions/report-python-cloud-run/report/utils/zarr_slicing.py +++ b/App/functions/report-python-cloud-run/report/utils/zarr_slicing.py @@ -66,7 +66,7 @@ def check_xarr_contains_data(xarr: xr.Dataset) -> bool: Returns: bool: True if xarray dataset contains data """ - return not np.isnan(xarr).all() + return xarr.nbytes > 0 @staticmethod def _get_dataset_type(xarr: xr.Dataset) -> DatasetType: diff --git a/App/functions/report-python-cloud-run/requirements.txt b/App/functions/report-python-cloud-run/requirements.txt index 0d044b425..a945e44ad 100644 --- a/App/functions/report-python-cloud-run/requirements.txt +++ b/App/functions/report-python-cloud-run/requirements.txt @@ -2,7 +2,12 @@ Flask~=3.0.0 gunicorn~=21.2.0 shapely~=2.0.2 xarray~=2023.12.0 +zarr~=2.16.1 +fsspec~=2023.12.2 +aiohttp~=3.9.1 pystac~=1.9.0 pystac-client~=0.7.5 pymupdf~=1.23.5 -Jinja2~=3.1.0 \ No newline at end of file +Jinja2~=3.1.0 +matplotlib~=3.8.2 +geopandas~=0.14.1 \ No newline at end of file