From c1f348c5450f005ea5d4432c5ff4faac34f7850b Mon Sep 17 00:00:00 2001 From: Nathan Clerkx Date: Thu, 14 Dec 2023 17:13:51 +0100 Subject: [PATCH] wip pdf generation --- .../report-python-cloud-run/README.md | 21 +++ App/functions/report-python-cloud-run/main.py | 27 ++-- .../report/__init__.py | 0 .../report/datasets/base_dataset.py | 1 + .../report/datasets/esl.py | 0 .../report-python-cloud-run/report/report.py | 39 +++++ .../report/template.css | 0 .../report/template.html.jinja | 4 + .../report/utils/stac.py | 18 +++ .../report/utils/zarr_slicing.py | 133 ++++++++++++++++++ .../report-python-cloud-run/requirements.txt | 14 +- 11 files changed, 234 insertions(+), 23 deletions(-) create mode 100644 App/functions/report-python-cloud-run/README.md create mode 100644 App/functions/report-python-cloud-run/report/__init__.py create mode 100644 App/functions/report-python-cloud-run/report/datasets/base_dataset.py create mode 100644 App/functions/report-python-cloud-run/report/datasets/esl.py create mode 100644 App/functions/report-python-cloud-run/report/report.py create mode 100644 App/functions/report-python-cloud-run/report/template.css create mode 100644 App/functions/report-python-cloud-run/report/template.html.jinja create mode 100644 App/functions/report-python-cloud-run/report/utils/stac.py create mode 100644 App/functions/report-python-cloud-run/report/utils/zarr_slicing.py diff --git a/App/functions/report-python-cloud-run/README.md b/App/functions/report-python-cloud-run/README.md new file mode 100644 index 000000000..32c4e397b --- /dev/null +++ b/App/functions/report-python-cloud-run/README.md @@ -0,0 +1,21 @@ +# Developing the report-python-cloud-run Function + +## Prerequisites + +Create a virtual environment and install the dependencies: + +```bash +pip install -r requirements.txt +``` + +## Testing + +Run the report function locally: + +```bash +python report.py +``` + +## Deploying + +Deploying to Cloud run is done using github actions. The workflow is defined in `.github/workflows/deploy_function.yml`. The workflow is triggered on push to the `main` branch. \ No newline at end of file diff --git a/App/functions/report-python-cloud-run/main.py b/App/functions/report-python-cloud-run/main.py index a5cf5a911..d6130d113 100644 --- a/App/functions/report-python-cloud-run/main.py +++ b/App/functions/report-python-cloud-run/main.py @@ -1,28 +1,21 @@ -# Copyright 2020 Google, LLC. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - +from io import BytesIO import os -from flask import Flask +from flask import Flask, make_response + +from report.report import create_report_pdf app = Flask(__name__) @app.route("/") def return_report(): - name = os.environ.get("NAME", "World") - return f"Hello {name}!" + pdf_object: BytesIO = create_report_pdf() + + response = make_response(pdf_object.getvalue()) + response.headers["Content-Type"] = "application/pdf" + response.headers["Content-Disposition"] = "inline; filename=coastal_report.pdf" + return response if __name__ == "__main__": diff --git a/App/functions/report-python-cloud-run/report/__init__.py b/App/functions/report-python-cloud-run/report/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/App/functions/report-python-cloud-run/report/datasets/base_dataset.py b/App/functions/report-python-cloud-run/report/datasets/base_dataset.py new file mode 100644 index 000000000..a26a968f6 --- /dev/null +++ b/App/functions/report-python-cloud-run/report/datasets/base_dataset.py @@ -0,0 +1 @@ +from abc import ABC diff --git a/App/functions/report-python-cloud-run/report/datasets/esl.py b/App/functions/report-python-cloud-run/report/datasets/esl.py new file mode 100644 index 000000000..e69de29bb diff --git a/App/functions/report-python-cloud-run/report/report.py b/App/functions/report-python-cloud-run/report/report.py new file mode 100644 index 000000000..3b5891069 --- /dev/null +++ b/App/functions/report-python-cloud-run/report/report.py @@ -0,0 +1,39 @@ +from io import BytesIO +from pathlib import Path +import fitz +from shapely import Polygon + + +def create_report_html(): + pass + + +def create_report_pdf() -> BytesIO: + story = fitz.Story() + htmlpath = Path(__file__).parent / Path("template.html") + csspath = Path(__file__).parent / Path("template.css") + + HTML = htmlpath.read_bytes().decode() + CSS = csspath.read_bytes().decode() + + story = fitz.Story(html=HTML, user_css=CSS) + + MEDIABOX = fitz.paper_rect("A4") # output page format: Letter + WHERE = MEDIABOX + (36, 36, -36, -36) # leave borders of 0.5 inches + in_memory_pdf = BytesIO() + writer = fitz.DocumentWriter(in_memory_pdf) + + with fitz.DocumentWriter(in_memory_pdf) as writer: + more = 1 + while more: + device = writer.begin_page(MEDIABOX) + more, _ = story.place(WHERE) + story.draw(device) + writer.end_page() + + return in_memory_pdf + + +if __name__ == "__main__": + pdf = create_report_pdf() + print(pdf.getvalue()) diff --git a/App/functions/report-python-cloud-run/report/template.css b/App/functions/report-python-cloud-run/report/template.css new file mode 100644 index 000000000..e69de29bb diff --git a/App/functions/report-python-cloud-run/report/template.html.jinja b/App/functions/report-python-cloud-run/report/template.html.jinja new file mode 100644 index 000000000..c80a56413 --- /dev/null +++ b/App/functions/report-python-cloud-run/report/template.html.jinja @@ -0,0 +1,4 @@ +

Coastal report

+

In this report some coastal characteristics of the selected area are explained.

+ +{% for dataset in datasets %} \ No newline at end of file diff --git a/App/functions/report-python-cloud-run/report/utils/stac.py b/App/functions/report-python-cloud-run/report/utils/stac.py new file mode 100644 index 000000000..7d02bd8c8 --- /dev/null +++ b/App/functions/report-python-cloud-run/report/utils/stac.py @@ -0,0 +1,18 @@ +from collections import namedtuple +from pystac_client import Client + +Zarr_dataset = namedtuple("Zarr_dataset", ["dataset_id", "zarr_uri"]) + + +class STACClientGCA(Client): + def get_all_zarr_uris(self) -> list[Zarr_dataset]: + collections = self.get_collections() + zarr_datasets = [] + + for collection in collections: + # we only look at collections that have a child links + if collection.get_child_links(): + zarr_datasets.append( + Zarr_dataset(collection.id, collection.assets["data"].href) + ) + return zarr_datasets diff --git a/App/functions/report-python-cloud-run/report/utils/zarr_slicing.py b/App/functions/report-python-cloud-run/report/utils/zarr_slicing.py new file mode 100644 index 000000000..e3b044201 --- /dev/null +++ b/App/functions/report-python-cloud-run/report/utils/zarr_slicing.py @@ -0,0 +1,133 @@ +from enum import Enum +import shapely # type: ignore +import xarray as xr +import numpy as np + + +class DatasetType(Enum): + RASTER = "raster" + POINT = "point" + + +class ZarrSlicer: + @staticmethod + def get_sliced_dataset(geojson_str: str, zarr_uri: str) -> xr.Dataset: + """Fetch Zarr from remote store and slice with geojson polygon + + Args: + geojson_str (str): String containing geojson polygon + zarr_uri (str): String containing zarr uri + + Returns: + xr.Dataset: sliced lazy loaded zarr dataset + """ + polygon_shape = ZarrSlicer._create_shape_from_geojson(geojson_str) + zarr = ZarrSlicer._get_dataset_from_zarr_url(zarr_uri) + sliced_zarr = ZarrSlicer.slice_xarr_with_polygon(zarr, polygon_shape) + return sliced_zarr + + @staticmethod + def slice_xarr_with_polygon( + xarr: xr.Dataset, polygon: shapely.Polygon + ) -> xr.Dataset: + """Slice xarray dataset with geojson polygon + + Args: + xarr (xr.Dataset): xarray dataset + polygon (Polygon): geojson polygon + + Returns: + xr.Dataset: sliced xarray dataset + """ + dataset_type = ZarrSlicer._get_dataset_type(xarr) + + if dataset_type == DatasetType.RASTER: + spatial_dims = ZarrSlicer._get_spatial_dimensions(xarr) + indexer = ZarrSlicer._get_indexer_from_raster(xarr, polygon, spatial_dims) + elif dataset_type == DatasetType.POINT: + points = ZarrSlicer._create_points_from_xarr(xarr) + boolean_mask = ZarrSlicer._get_boolean_mask_from_points(points, polygon) + spatial_dims = ZarrSlicer._get_spatial_dimensions(xarr) + + indexer = {spatial_dims[0]: boolean_mask} + else: + raise ValueError("Dataset type not supported") + + sliced_xarr = xarr.sel(indexer) + return sliced_xarr + + @staticmethod + def check_xarr_contains_data(xarr: xr.Dataset) -> bool: + """Check if xarray dataset contains data + + Args: + xarr (xr.Dataset): xarray dataset + + Returns: + bool: True if xarray dataset contains data + """ + return not np.isnan(xarr).all() + + @staticmethod + def _get_dataset_type(xarr: xr.Dataset) -> DatasetType: + """Get dataset type from xarray dataset. We differentiate between + raster and point datasets""" + # if lat and lon are dimensions, we assume it is a raster dataset + if "lat" in xarr.dims and "lon" in xarr.dims: + return DatasetType.RASTER + else: + return DatasetType.POINT + + @staticmethod + def _create_points_from_xarr(xarr: xr.Dataset) -> shapely.MultiPoint: + """Create shapely multipoint from xarray dataset""" + lats = xarr.coords["lat"].values + lons = xarr.coords["lon"].values + points = shapely.points(lons, lats) + return points + + @staticmethod + def _get_spatial_dimensions(xarr: xr.Dataset) -> list[str]: + """Get spatial dimension from xarray dataset""" + dims = {xarr.lat.dims[0], xarr.lon.dims[0]} + return list(dims) + + @staticmethod + def _get_boolean_mask_from_points( + points: shapely.MultiPoint, polygon: shapely.Polygon + ) -> [bool]: + """Get boolean mask from points and polygon""" + return shapely.within(points, polygon) + + @staticmethod + def _get_indexer_from_raster( + raster: xr.Dataset, polygon: shapely.Polygon, spatial_dims: list[str] + ) -> [bool]: + """Get boolean mask from raster and polygon""" + spatial_dim_size = {dim: len(raster[dim].values) for dim in spatial_dims} + + coords = np.stack( + np.meshgrid(raster[spatial_dims[1]].values, raster[spatial_dims[0]].values), + -1, + ).reshape( + spatial_dim_size[spatial_dims[0]], spatial_dim_size[spatial_dims[1]], 2 + ) + + raster_points = shapely.points(coords) + + mask = shapely.within(raster_points, polygon) + + # Reduce mask to square shape + # TODO: create point wise indexing for DataSet; + indexer = {"lat": mask.any(axis=1), "lon": mask.any(axis=0)} + return indexer + + @staticmethod + def _create_shape_from_geojson(geojson: str) -> shapely.Polygon: + """Create shapely polygon from geojson polygon""" + return shapely.from_geojson(geojson) + + @staticmethod + def _get_dataset_from_zarr_url(url: str) -> xr.Dataset: + """Get zarr store from url""" + return xr.open_zarr(url) diff --git a/App/functions/report-python-cloud-run/requirements.txt b/App/functions/report-python-cloud-run/requirements.txt index 0416b7802..0d044b425 100644 --- a/App/functions/report-python-cloud-run/requirements.txt +++ b/App/functions/report-python-cloud-run/requirements.txt @@ -1,6 +1,8 @@ -Flask -gunicorn -shapely -xarray -pystac -reportlab \ No newline at end of file +Flask~=3.0.0 +gunicorn~=21.2.0 +shapely~=2.0.2 +xarray~=2023.12.0 +pystac~=1.9.0 +pystac-client~=0.7.5 +pymupdf~=1.23.5 +Jinja2~=3.1.0 \ No newline at end of file