Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

use DriverVectorCube in chunk_polygon Open-EO/openeo-python-driver#288 #801

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 2 additions & 11 deletions openeogeotrellis/geopysparkdatacube.py
Original file line number Diff line number Diff line change
Expand Up @@ -572,9 +572,7 @@ def partitionByKey(spatialkey):
def chunk_polygon(
self,
reducer: Union[ProcessGraphVisitor, Dict],
# TODO: it's wrong to use MultiPolygon as a collection of polygons. MultiPolygons should be handled as single, atomic "features"
# also see https://github.com/Open-EO/openeo-python-driver/issues/288
chunks: MultiPolygon,
chunks: DriverVectorCube,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This changes an API signature defined in openeo-python-driver. I don't think there is PR for that already

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is, you can find it here: Open-EO/openeo-python-driver#291

mask_value: float,
env: EvalEnv,
context: Optional[dict] = None,
Expand All @@ -584,27 +582,20 @@ def chunk_polygon(

if isinstance(reducer, dict):
reducer = GeoPySparkBackendImplementation.accept_process_graph(reducer)
chunks: List[Polygon] = chunks.geoms
jvm = get_jvm()

result_collection = None
if isinstance(reducer, SingleNodeUDFProcessGraphVisitor):
udf, udf_context = self._extract_udf_code_and_context(process=reducer, context=context, env=env)
# Polygons should use the same projection as the rdd.
# TODO Usage of GeometryCollection should be avoided. It's abused here like a FeatureCollection,
# but a GeometryCollections is conceptually just single "feature".
# What you want here is proper support for FeatureCollections or at least a list of individual geometries.
# also see https://github.com/Open-EO/openeo-python-driver/issues/71, https://github.com/Open-EO/openeo-python-driver/issues/288
reprojected_polygons: jvm.org.openeo.geotrellis.ProjectedPolygons \
= to_projected_polygons(jvm, GeometryCollection(chunks))
= to_projected_polygons(jvm, chunks)
band_names = self.metadata.band_dimension.band_names

def rdd_function(rdd, _zoom):
return jvm.org.openeo.geotrellis.udf.Udf.runChunkPolygonUserCode(
udf, rdd, reprojected_polygons, band_names, udf_context, mask_value
)

# All JEP implementation work with float cell types.
float_cube = self.apply_to_levels(lambda layer: self._convert_celltype(layer, "float32"))
result_collection = float_cube._apply_to_levels_geotrellis_rdd(
rdd_function, self.metadata, gps.LayerType.SPACETIME
Expand Down
5 changes: 3 additions & 2 deletions tests/test_chunk_polygon.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from shapely.geometry import Polygon, MultiPolygon

from openeogeotrellis.geopysparkdatacube import GeopysparkDataCube
from openeo_driver.datacube import DriverVectorCube
from .data import get_test_data_file, TEST_DATA_ROOT


Expand All @@ -14,7 +15,7 @@
#
# Note: In order to run these tests you need to set several environment variables.
# If you use the virtual environment venv (with JEP and Numpy installed):
# 1. LD_LIBRARY_PATH = .../venv/lib/python3.6/site-packages/jep
# 1. LD_LIBRARY_PATH = .../venv/lib/python3.8/site-packages/jep
# This will look for the shared library 'jep.so'. This is the compiled C code that binds Java and Python objects.

def test_chunk_polygon(imagecollection_with_two_bands_and_three_dates):
Expand All @@ -36,7 +37,7 @@ def test_chunk_polygon(imagecollection_with_two_bands_and_three_dates):
env = EvalEnv()

polygon1 = Extent(0.0, 0.0, 4.0, 4.0).to_polygon
chunks = MultiPolygon([polygon1])
chunks = DriverVectorCube.from_geometry(polygon1)
cube: GeopysparkDataCube = imagecollection_with_two_bands_and_three_dates
result_cube: GeopysparkDataCube = cube.chunk_polygon(udf_add_to_bands, chunks=chunks, mask_value=None, env=env)
result_layer: TiledRasterLayer = result_cube.pyramid.levels[0]
Expand Down
4 changes: 2 additions & 2 deletions tests/test_error.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
from geopyspark import TiledRasterLayer, Extent
from openeo_driver.utils import EvalEnv
from py4j.protocol import Py4JJavaError
from shapely.geometry import MultiPolygon

from openeogeotrellis.backend import GeoPySparkBackendImplementation
from openeogeotrellis.geopysparkdatacube import GeopysparkDataCube
from openeogeotrellis.utils import get_jvm
from openeo_driver.datacube import DriverVectorCube


# Note: Ensure that the python environment has all the required modules installed.
Expand Down Expand Up @@ -44,7 +44,7 @@ def apply_datacube(cube: XarrayDataCube, context: dict) -> XarrayDataCube:
env = EvalEnv()

polygon1 = Extent(0.0, 0.0, 4.0, 4.0).to_polygon
chunks = MultiPolygon([polygon1])
chunks = DriverVectorCube.from_geometry(polygon1)
cube: GeopysparkDataCube = imagecollection_with_two_bands_and_three_dates
try:
result_cube: GeopysparkDataCube = cube.chunk_polygon(udf_add_to_bands, chunks=chunks, mask_value=None, env=env)
Expand Down