Skip to content

Commit

Permalink
Add CRS support for queries (#58)
Browse files Browse the repository at this point in the history
* Initial CRS support

* Update readme

* IDK how this is failing... it works fine in notebook

* lint

* Fix test

* Format

* Convert proj to queries crs

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* format

* Selection done, with some hackiness

* Add projection to plugin

* Fix area subsetting, add tests for pluign

* Add test to plugin

* Remove print

Co-authored-by: Deepak Cherian <[email protected]>

* Update tests/test_cf_router.py

Co-authored-by: Deepak Cherian <[email protected]>

* Update tests/test_select.py

Co-authored-by: Deepak Cherian <[email protected]>

* Update xpublish_edr/geometry/area.py

Co-authored-by: Deepak Cherian <[email protected]>

* Fix coord drops

* Require lat and lng for projecting

* Cleaner broadcast op

* Explicit transpose to cleanup dataset on projection

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Deepak Cherian <[email protected]>
  • Loading branch information
3 people authored Nov 15, 2024
1 parent a0cc958 commit 04affad
Show file tree
Hide file tree
Showing 8 changed files with 322 additions and 31 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ In the future, when `xpublish` supports [`DataTree`](https://docs.xarray.dev/en/
| `z` || |
| `datetime` || |
| `parameter-name` || |
| `crs` | | Not currently supported, all coordinates should be in the reference system of the queried dataset |
| `crs` | | Requires a CF compliant [grid mapping](https://cf-xarray.readthedocs.io/en/latest/grid_mappings.html) on the target dataset. Default is `EPSG:4326` |
| `parameter-name` || |
| `f` || |
| `method` || Optional: controls data selection. Use "nearest" for nearest neighbor selection, or "linear" for interpolated selection. Uses `nearest` if not specified |
Expand All @@ -84,7 +84,7 @@ In the future, when `xpublish` supports [`DataTree`](https://docs.xarray.dev/en/
| `z` || |
| `datetime` || |
| `parameter-name` || |
| `crs` | | Not currently supported, all coordinates should be in the reference system of the queried dataset |
| `crs` | | Requires a CF compliant [grid mapping](https://cf-xarray.readthedocs.io/en/latest/grid_mappings.html) on the target dataset. Default is `EPSG:4326` |
| `parameter-name` || |
| `f` || |
| `method` || Optional: controls data selection. Use "nearest" for nearest neighbor selection, or "linear" for interpolated selection. Uses `nearest` if not specified |
Expand Down
72 changes: 56 additions & 16 deletions tests/test_cf_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,25 @@


@pytest.fixture(scope="session")
def cf_dataset():
def cf_air_dataset():
from cf_xarray.datasets import airds

return airds


@pytest.fixture(scope="session")
def cf_xpublish(cf_dataset):
rest = xpublish.Rest({"air": cf_dataset}, plugins={"edr": CfEdrPlugin()})
def cf_temp_dataset():
from cf_xarray.datasets import rotds

return rotds


@pytest.fixture(scope="session")
def cf_xpublish(cf_air_dataset, cf_temp_dataset):
rest = xpublish.Rest(
{"air": cf_air_dataset, "temp": cf_temp_dataset},
plugins={"edr": CfEdrPlugin()},
)

return rest

Expand Down Expand Up @@ -52,7 +62,7 @@ def test_cf_area_formats(cf_client):
assert "csv" in data, "csv is not a valid format"


def test_cf_position_query(cf_client, cf_dataset):
def test_cf_position_query(cf_client, cf_air_dataset, cf_temp_dataset):
x = 204
y = 44
response = cf_client.get(f"/datasets/air/edr/position?coords=POINT({x} {y})")
Expand All @@ -76,17 +86,18 @@ def test_cf_position_query(cf_client, cf_dataset):
air_param = data["parameters"]["air"]

assert (
air_param["unit"]["label"]["en"] == cf_dataset["air"].attrs["units"]
air_param["unit"]["label"]["en"] == cf_air_dataset["air"].attrs["units"]
), "DataArray units should be set as parameter units"
assert (
air_param["observedProperty"]["id"] == cf_dataset["air"].attrs["standard_name"]
air_param["observedProperty"]["id"]
== cf_air_dataset["air"].attrs["standard_name"]
), "DataArray standard_name should be set as the observed property id"
assert (
air_param["observedProperty"]["label"]["en"]
== cf_dataset["air"].attrs["long_name"]
== cf_air_dataset["air"].attrs["long_name"]
), "DataArray long_name should be set as parameter observed property"
assert (
air_param["description"]["en"] == cf_dataset["air"].attrs["long_name"]
air_param["description"]["en"] == cf_air_dataset["air"].attrs["long_name"]
), "DataArray long_name should be set as parameter description"

air_range = data["ranges"]["air"]
Expand All @@ -99,6 +110,34 @@ def test_cf_position_query(cf_client, cf_dataset):
len(air_range["values"]) == 4
), "There should be 4 values, one for each time step"

# Test with a dataset containing data in a different coordinate system
x = 64.59063409
y = 66.66454929
response = cf_client.get(f"/datasets/temp/edr/position?coords=POINT({x} {y})")
assert response.status_code == 200, "Response did not return successfully"

data = response.json()
for key in ("type", "domain", "parameters", "ranges"):
assert key in data, f"Key {key} is not a top level key in the CovJSON response"

axes = data["domain"]["axes"]

npt.assert_array_almost_equal(
axes["longitude"]["values"],
[[x]],
), "Did not select nearby x coordinate"
npt.assert_array_almost_equal(
axes["latitude"]["values"],
[[y]],
), "Did not select a nearby y coordinate"

temp_range = data["ranges"]["temp"]
assert temp_range["type"] == "NdArray", "Response range should be a NdArray"
assert temp_range["dataType"] == "float", "Air dataType should be floats"
assert temp_range["axisNames"] == ["rlat", "rlon"], "All dimensions should persist"
assert temp_range["shape"] == [1, 1], "The shape of the array should be 1x1"
assert len(temp_range["values"]) == 1, "There should be 1 value selected"


def test_cf_position_csv(cf_client):
x = 204
Expand Down Expand Up @@ -346,7 +385,7 @@ def test_cf_multiple_position_csv(cf_client):
assert key in csv_data[0], f"column {key} should be in the header"


def test_cf_area_query(cf_client, cf_dataset):
def test_cf_area_query(cf_client, cf_air_dataset):
coords = "POLYGON((201 41, 201 49, 209 49, 209 41, 201 41))"
response = cf_client.get(f"/datasets/air/edr/area?coords={coords}&f=cf_covjson")

Expand All @@ -372,17 +411,18 @@ def test_cf_area_query(cf_client, cf_dataset):
air_param = data["parameters"]["air"]

assert (
air_param["unit"]["label"]["en"] == cf_dataset["air"].attrs["units"]
air_param["unit"]["label"]["en"] == cf_air_dataset["air"].attrs["units"]
), "DataArray units should be set as parameter units"
assert (
air_param["observedProperty"]["id"] == cf_dataset["air"].attrs["standard_name"]
air_param["observedProperty"]["id"]
== cf_air_dataset["air"].attrs["standard_name"]
), "DataArray standard_name should be set as the observed property id"
assert (
air_param["observedProperty"]["label"]["en"]
== cf_dataset["air"].attrs["long_name"]
== cf_air_dataset["air"].attrs["long_name"]
), "DataArray long_name should be set as parameter observed property"
assert (
air_param["description"]["en"] == cf_dataset["air"].attrs["long_name"]
air_param["description"]["en"] == cf_air_dataset["air"].attrs["long_name"]
), "DataArray long_name should be set as parameter description"

air_range = data["ranges"]["air"]
Expand All @@ -403,7 +443,7 @@ def test_cf_area_query(cf_client, cf_dataset):
), "There should be 26 values, 9 for each time step"


def test_cf_area_csv_query(cf_client, cf_dataset):
def test_cf_area_csv_query(cf_client, cf_air_dataset):
coords = "POLYGON((201 41, 201 49, 209 49, 209 41, 201 41))"
response = cf_client.get(f"/datasets/air/edr/area?coords={coords}&f=csv")

Expand All @@ -427,7 +467,7 @@ def test_cf_area_csv_query(cf_client, cf_dataset):
assert key in csv_data[0], f"column {key} should be in the header"


def test_cf_area_geojson_query(cf_client, cf_dataset):
def test_cf_area_geojson_query(cf_client, cf_air_dataset):
coords = "POLYGON((201 41, 201 49, 209 49, 209 41, 201 41))"
response = cf_client.get(f"/datasets/air/edr/area?coords={coords}&f=geojson")

Expand All @@ -446,7 +486,7 @@ def test_cf_area_geojson_query(cf_client, cf_dataset):
assert len(features) == 36, "There should be 36 data points"


def test_cf_area_nc_query(cf_client, cf_dataset):
def test_cf_area_nc_query(cf_client, cf_air_dataset):
coords = "POLYGON((201 41, 201 49, 209 49, 209 41, 201 41))"
response = cf_client.get(f"/datasets/air/edr/area?coords={coords}&f=nc")

Expand Down
100 changes: 100 additions & 0 deletions tests/test_select.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@
import pandas as pd
import pytest
import xarray as xr
import xarray.testing as xrt
from shapely import MultiPoint, Point, from_wkt

from xpublish_edr.geometry.area import select_by_area
from xpublish_edr.geometry.common import project_dataset
from xpublish_edr.geometry.position import select_by_position
from xpublish_edr.query import EDRQuery

Expand All @@ -17,6 +19,14 @@ def regular_xy_dataset():
return xr.tutorial.load_dataset("air_temperature")


@pytest.fixture(scope="function")
def projected_xy_dataset():
"""Loads a sample dataset with projected X and Y coordinates"""
from cf_xarray.datasets import rotds

return rotds


def test_select_query(regular_xy_dataset):
query = EDRQuery(
coords="POINT(200 45)",
Expand Down Expand Up @@ -134,6 +144,44 @@ def test_select_position_regular_xy(regular_xy_dataset):
npt.assert_approx_equal(ds["air"][-1], 279.19), "Temperature is incorrect"


def test_select_position_projected_xy(projected_xy_dataset):
query = EDRQuery(
coords="POINT(64.59063409 66.66454929)",
crs="EPSG:4326",
)

projected_point = query.project_geometry(projected_xy_dataset)
npt.assert_approx_equal(projected_point.x, 18.045), "Longitude is incorrect"
npt.assert_approx_equal(projected_point.y, 21.725), "Latitude is incorrect"

ds = select_by_position(projected_xy_dataset, projected_point)
xrt.assert_identical(
ds,
projected_xy_dataset.sel(rlon=[18.045], rlat=[21.725], method="nearest"),
)

projected_ds = project_dataset(ds, query.crs)
(
npt.assert_approx_equal(projected_ds.longitude.values, 64.59063409),
"Longitude is incorrect",
)
(
npt.assert_approx_equal(projected_ds.latitude.values, 66.66454929),
"Latitude is incorrect",
)
(
npt.assert_approx_equal(
projected_ds.temp.values,
projected_xy_dataset.sel(
rlon=[18.045],
rlat=[21.725],
method="nearest",
).temp.values,
),
"Temperature is incorrect",
)


def test_select_position_regular_xy_interpolate(regular_xy_dataset):
point = Point((204, 44))
ds = select_by_position(regular_xy_dataset, point, method="linear")
Expand Down Expand Up @@ -170,6 +218,36 @@ def test_select_position_regular_xy_multi(regular_xy_dataset):
)


def test_select_position_projected_xy_multi(projected_xy_dataset):
query = EDRQuery(
coords="MULTIPOINT(64.3 66.6, 64.6 66.5)",
crs="EPSG:4326",
method="linear",
)

projected_points = query.project_geometry(projected_xy_dataset)
ds = select_by_position(projected_xy_dataset, projected_points, method="linear")
projected_ds = project_dataset(ds, query.crs)
assert "temp" in projected_ds, "Dataset does not contain the temp variable"
assert "rlon" not in projected_ds, "Dataset does not contain the rlon variable"
assert "rlat" not in projected_ds, "Dataset does not contain the rlat variable"
(
npt.assert_array_almost_equal(projected_ds.longitude, [64.3, 64.6]),
"Longitude is incorrect",
)
(
npt.assert_array_almost_equal(projected_ds.latitude, [66.6, 66.5]),
"Latitude is incorrect",
)
(
npt.assert_array_almost_equal(
ds.temp,
projected_ds.temp,
),
"Temperature is incorrect",
)


def test_select_position_regular_xy_multi_interpolate(regular_xy_dataset):
points = MultiPoint([(202, 45), (205, 48)])
ds = select_by_position(regular_xy_dataset, points, method="linear")
Expand Down Expand Up @@ -236,6 +314,28 @@ def test_select_area_regular_xy(regular_xy_dataset):
)


def test_select_area_projected_xy(projected_xy_dataset):
query = EDRQuery(
coords="POLYGON((64.3 66.82, 64.5 66.82, 64.5 66.6, 64.3 66.6, 64.3 66.82))",
crs="EPSG:4326",
)

projected_area = query.project_geometry(projected_xy_dataset)
ds = select_by_area(projected_xy_dataset, projected_area)
projected_ds = project_dataset(ds, query.crs)

assert projected_ds is not None, "Dataset was not returned"
assert "temp" in projected_ds, "Dataset does not contain the air variable"
assert "latitude" in projected_ds, "Dataset does not contain the latitude variable"
assert (
"longitude" in projected_ds
), "Dataset does not contain the longitude variable"

assert projected_ds.longitude.shape[0] == 1, "Longitude shape is incorrect"
assert projected_ds.latitude.shape[0] == 1, "Latitude shape is incorrect"
assert projected_ds.temp.shape[0] == 1, "Temperature shape is incorrect"


def test_select_area_regular_xy_boundary(regular_xy_dataset):
polygon = from_wkt("POLYGON((200 40, 200 50, 210 50, 210 40, 200 40))").buffer(
0.0001,
Expand Down
15 changes: 11 additions & 4 deletions xpublish_edr/geometry/area.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,16 @@ def _select_area_regular_xy_grid(
"""
# To minimize performance impact, we first subset the dataset to the bounding box of the polygon
(minx, miny, maxx, maxy) = polygon.bounds
ds = ds.cf.sel(X=slice(minx, maxx), Y=slice(maxy, miny))
indexes = ds.cf.indexes
if indexes["X"].is_monotonic_increasing:
x_sel = slice(minx, maxx)
else:
x_sel = slice(maxx, minx)
if indexes["Y"].is_monotonic_increasing:
y_sel = slice(miny, maxy)
else:
y_sel = slice(maxy, miny)
ds = ds.cf.sel(X=x_sel, Y=y_sel)

# For a regular grid, we can create a meshgrid of the X and Y coordinates to create a spatial mask
pts = np.meshgrid(ds.cf["X"], ds.cf["Y"])
Expand All @@ -45,6 +54,4 @@ def _select_area_regular_xy_grid(
y_sel = xr.Variable(data=y_inds, dims=VECTORIZED_DIM)

# Apply the mask and vectorize to a 1d collection of points
ds_sub = ds.cf.isel(X=x_sel, Y=y_sel)

return ds_sub
return ds.cf.isel(X=x_sel, Y=y_sel)
Loading

0 comments on commit 04affad

Please sign in to comment.