Skip to content

Commit

Permalink
wip pdf generation
Browse files Browse the repository at this point in the history
  • Loading branch information
N-Clerkx committed Dec 14, 2023
1 parent 06fd390 commit c1f348c
Show file tree
Hide file tree
Showing 11 changed files with 234 additions and 23 deletions.
21 changes: 21 additions & 0 deletions App/functions/report-python-cloud-run/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Developing the report-python-cloud-run Function

## Prerequisites

Create a virtual environment and install the dependencies:

```bash
pip install -r requirements.txt
```

## Testing

Run the report function locally:

```bash
python report.py
```

## Deploying

Deploying to Cloud run is done using github actions. The workflow is defined in `.github/workflows/deploy_function.yml`. The workflow is triggered on push to the `main` branch.
27 changes: 10 additions & 17 deletions App/functions/report-python-cloud-run/main.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,21 @@
# Copyright 2020 Google, LLC.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from io import BytesIO
import os

from flask import Flask
from flask import Flask, make_response

from report.report import create_report_pdf

app = Flask(__name__)


@app.route("/")
def return_report():
name = os.environ.get("NAME", "World")
return f"Hello {name}!"
pdf_object: BytesIO = create_report_pdf()

response = make_response(pdf_object.getvalue())
response.headers["Content-Type"] = "application/pdf"
response.headers["Content-Disposition"] = "inline; filename=coastal_report.pdf"
return response


if __name__ == "__main__":
Expand Down
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from abc import ABC
Empty file.
39 changes: 39 additions & 0 deletions App/functions/report-python-cloud-run/report/report.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from io import BytesIO
from pathlib import Path
import fitz
from shapely import Polygon


def create_report_html():
pass


def create_report_pdf() -> BytesIO:
story = fitz.Story()
htmlpath = Path(__file__).parent / Path("template.html")
csspath = Path(__file__).parent / Path("template.css")

HTML = htmlpath.read_bytes().decode()
CSS = csspath.read_bytes().decode()

story = fitz.Story(html=HTML, user_css=CSS)

MEDIABOX = fitz.paper_rect("A4") # output page format: Letter
WHERE = MEDIABOX + (36, 36, -36, -36) # leave borders of 0.5 inches
in_memory_pdf = BytesIO()
writer = fitz.DocumentWriter(in_memory_pdf)

with fitz.DocumentWriter(in_memory_pdf) as writer:
more = 1
while more:
device = writer.begin_page(MEDIABOX)
more, _ = story.place(WHERE)
story.draw(device)
writer.end_page()

return in_memory_pdf


if __name__ == "__main__":
pdf = create_report_pdf()
print(pdf.getvalue())
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
<h1>Coastal report</h1>
<p>In this report some coastal characteristics of the selected area are explained.</p>

{% for dataset in datasets %}
18 changes: 18 additions & 0 deletions App/functions/report-python-cloud-run/report/utils/stac.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from collections import namedtuple
from pystac_client import Client

Zarr_dataset = namedtuple("Zarr_dataset", ["dataset_id", "zarr_uri"])


class STACClientGCA(Client):
def get_all_zarr_uris(self) -> list[Zarr_dataset]:
collections = self.get_collections()
zarr_datasets = []

for collection in collections:
# we only look at collections that have a child links
if collection.get_child_links():
zarr_datasets.append(
Zarr_dataset(collection.id, collection.assets["data"].href)
)
return zarr_datasets
133 changes: 133 additions & 0 deletions App/functions/report-python-cloud-run/report/utils/zarr_slicing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
from enum import Enum
import shapely # type: ignore
import xarray as xr
import numpy as np


class DatasetType(Enum):
RASTER = "raster"
POINT = "point"


class ZarrSlicer:
@staticmethod
def get_sliced_dataset(geojson_str: str, zarr_uri: str) -> xr.Dataset:
"""Fetch Zarr from remote store and slice with geojson polygon
Args:
geojson_str (str): String containing geojson polygon
zarr_uri (str): String containing zarr uri
Returns:
xr.Dataset: sliced lazy loaded zarr dataset
"""
polygon_shape = ZarrSlicer._create_shape_from_geojson(geojson_str)
zarr = ZarrSlicer._get_dataset_from_zarr_url(zarr_uri)
sliced_zarr = ZarrSlicer.slice_xarr_with_polygon(zarr, polygon_shape)
return sliced_zarr

@staticmethod
def slice_xarr_with_polygon(
xarr: xr.Dataset, polygon: shapely.Polygon
) -> xr.Dataset:
"""Slice xarray dataset with geojson polygon
Args:
xarr (xr.Dataset): xarray dataset
polygon (Polygon): geojson polygon
Returns:
xr.Dataset: sliced xarray dataset
"""
dataset_type = ZarrSlicer._get_dataset_type(xarr)

if dataset_type == DatasetType.RASTER:
spatial_dims = ZarrSlicer._get_spatial_dimensions(xarr)
indexer = ZarrSlicer._get_indexer_from_raster(xarr, polygon, spatial_dims)
elif dataset_type == DatasetType.POINT:
points = ZarrSlicer._create_points_from_xarr(xarr)
boolean_mask = ZarrSlicer._get_boolean_mask_from_points(points, polygon)
spatial_dims = ZarrSlicer._get_spatial_dimensions(xarr)

indexer = {spatial_dims[0]: boolean_mask}
else:
raise ValueError("Dataset type not supported")

sliced_xarr = xarr.sel(indexer)
return sliced_xarr

@staticmethod
def check_xarr_contains_data(xarr: xr.Dataset) -> bool:
"""Check if xarray dataset contains data
Args:
xarr (xr.Dataset): xarray dataset
Returns:
bool: True if xarray dataset contains data
"""
return not np.isnan(xarr).all()

@staticmethod
def _get_dataset_type(xarr: xr.Dataset) -> DatasetType:
"""Get dataset type from xarray dataset. We differentiate between
raster and point datasets"""
# if lat and lon are dimensions, we assume it is a raster dataset
if "lat" in xarr.dims and "lon" in xarr.dims:
return DatasetType.RASTER
else:
return DatasetType.POINT

@staticmethod
def _create_points_from_xarr(xarr: xr.Dataset) -> shapely.MultiPoint:
"""Create shapely multipoint from xarray dataset"""
lats = xarr.coords["lat"].values
lons = xarr.coords["lon"].values
points = shapely.points(lons, lats)
return points

@staticmethod
def _get_spatial_dimensions(xarr: xr.Dataset) -> list[str]:
"""Get spatial dimension from xarray dataset"""
dims = {xarr.lat.dims[0], xarr.lon.dims[0]}
return list(dims)

@staticmethod
def _get_boolean_mask_from_points(
points: shapely.MultiPoint, polygon: shapely.Polygon
) -> [bool]:
"""Get boolean mask from points and polygon"""
return shapely.within(points, polygon)

@staticmethod
def _get_indexer_from_raster(
raster: xr.Dataset, polygon: shapely.Polygon, spatial_dims: list[str]
) -> [bool]:
"""Get boolean mask from raster and polygon"""
spatial_dim_size = {dim: len(raster[dim].values) for dim in spatial_dims}

coords = np.stack(
np.meshgrid(raster[spatial_dims[1]].values, raster[spatial_dims[0]].values),
-1,
).reshape(
spatial_dim_size[spatial_dims[0]], spatial_dim_size[spatial_dims[1]], 2
)

raster_points = shapely.points(coords)

mask = shapely.within(raster_points, polygon)

# Reduce mask to square shape
# TODO: create point wise indexing for DataSet;
indexer = {"lat": mask.any(axis=1), "lon": mask.any(axis=0)}
return indexer

@staticmethod
def _create_shape_from_geojson(geojson: str) -> shapely.Polygon:
"""Create shapely polygon from geojson polygon"""
return shapely.from_geojson(geojson)

@staticmethod
def _get_dataset_from_zarr_url(url: str) -> xr.Dataset:
"""Get zarr store from url"""
return xr.open_zarr(url)
14 changes: 8 additions & 6 deletions App/functions/report-python-cloud-run/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
Flask
gunicorn
shapely
xarray
pystac
reportlab
Flask~=3.0.0
gunicorn~=21.2.0
shapely~=2.0.2
xarray~=2023.12.0
pystac~=1.9.0
pystac-client~=0.7.5
pymupdf~=1.23.5
Jinja2~=3.1.0

0 comments on commit c1f348c

Please sign in to comment.