Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add area endpoint support #49

Merged
merged 24 commits into from
Nov 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
9ae30a5
Add initial pass at subsetting to polygon
mpiannucci Oct 24, 2024
3d314b3
Add test case
mpiannucci Oct 24, 2024
d34ad67
Small reorg
mpiannucci Oct 24, 2024
70489cd
Add selection tests
mpiannucci Oct 24, 2024
65b993f
Add selection tests, move param selection to sepearte function
mpiannucci Oct 24, 2024
6a75c43
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 24, 2024
dfae759
Fix lints
mpiannucci Oct 24, 2024
3aae7ba
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 24, 2024
f5c5a7d
Refactor to chagne to be vectorized output
mpiannucci Oct 24, 2024
81ef0ae
Merge remote-tracking branch 'mpiannucci/matt/polygon' into matt/polygon
mpiannucci Oct 24, 2024
b0e3d0a
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 24, 2024
f12037e
format
mpiannucci Oct 24, 2024
1a6aa03
Add buffer option for area queries
mpiannucci Oct 24, 2024
f6a89df
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 24, 2024
2427e91
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 24, 2024
812cea3
Update to use intersect_xy
mpiannucci Oct 24, 2024
f7491d4
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 24, 2024
83458f3
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 24, 2024
29fc089
more test coverage
mpiannucci Oct 24, 2024
7080321
reorganize
mpiannucci Nov 4, 2024
154134f
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 4, 2024
90aca54
Add more doc strings
mpiannucci Nov 4, 2024
cc47a51
Merge remote-tracking branch 'mpiannucci/matt/polygon' into matt/polygon
mpiannucci Nov 4, 2024
9c69cbb
Oops forgot file
mpiannucci Nov 4, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 70 additions & 0 deletions tests/test_cf_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,18 @@ def test_cf_position_formats(cf_client):
assert "csv" in data, "csv is not a valid format"


def test_cf_area_formats(cf_client):
response = cf_client.get("/edr/area/formats")

assert response.status_code == 200, "Response did not return successfully"

data = response.json()

assert "cf_covjson" in data, "cf_covjson is not a valid format"
assert "nc" in data, "nc is not a valid format"
assert "csv" in data, "csv is not a valid format"


def test_cf_position_query(cf_client, cf_dataset):
x = 204
y = 44
Expand Down Expand Up @@ -147,3 +159,61 @@ def test_percent_encoded_cf_position_nc(cf_client):
assert (
"position.nc" in response.headers["content-disposition"]
), "The file name should be position.nc"


def test_cf_area_query(cf_client, cf_dataset):
coords = "POLYGON((201 41, 201 49, 209 49, 209 41, 201 41))&f=cf_covjson"
response = cf_client.get(f"/datasets/air/edr/area?coords={coords}")

assert response.status_code == 200, "Response did not return successfully"

data = response.json()

for key in ("type", "domain", "parameters", "ranges"):
assert key in data, f"Key {key} is not a top level key in the CovJSON response"

axes = data["domain"]["axes"]

assert axes["x"] == {
"values": [202.5, 202.5, 202.5, 205.0, 205.0, 205.0, 207.5, 207.5, 207.5],
}, "Did not select nearby x coordinates within the polygon"
assert axes["y"] == {
"values": [47.5, 45.0, 42.5, 47.5, 45.0, 42.5, 47.5, 45.0, 42.5],
}, "Did not select a nearby y coordinates within the polygon"

assert (
len(axes["t"]["values"]) == 4
), "There should be a time value for each time step"

air_param = data["parameters"]["air"]

assert (
air_param["unit"]["label"]["en"] == cf_dataset["air"].attrs["units"]
), "DataArray units should be set as parameter units"
assert (
air_param["observedProperty"]["id"] == cf_dataset["air"].attrs["standard_name"]
), "DataArray standard_name should be set as the observed property id"
assert (
air_param["observedProperty"]["label"]["en"]
== cf_dataset["air"].attrs["long_name"]
), "DataArray long_name should be set as parameter observed property"
assert (
air_param["description"]["en"] == cf_dataset["air"].attrs["long_name"]
), "DataArray long_name should be set as parameter description"

air_range = data["ranges"]["air"]

assert air_range["type"] == "NdArray", "Response range should be a NdArray"
assert air_range["dataType"] == "float", "Air dataType should be floats"
assert air_range["axisNames"] == [
"t",
"pts",
], "Time should be the only remaining axes"
assert len(air_range["shape"]) == 2, "There should be 2 axes"
assert air_range["shape"][0] == len(axes["t"]["values"]), "The shape of the "
assert air_range["shape"][1] == len(
axes["x"]["values"],
), "The shape of the pts axis"
assert (
len(air_range["values"]) == 36
), "There should be 26 values, 9 for each time step"
165 changes: 165 additions & 0 deletions tests/test_select.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
import cf_xarray # noqa
import numpy as np
import numpy.testing as npt
import pandas as pd
import pytest
import xarray as xr
from shapely import Point, from_wkt

from xpublish_edr.geometry.area import select_by_area
from xpublish_edr.geometry.position import select_by_postition
from xpublish_edr.query import EDRQuery


@pytest.fixture(scope="function")
def regular_xy_dataset():
"""Loads a sample dataset with regular X and Y coordinates"""
return xr.tutorial.load_dataset("air_temperature")


def test_select_query(regular_xy_dataset):
query = EDRQuery(
coords="POINT(200 45)",
datetime="2013-01-01T06:00:00",
parameters="air,time",
)
query_params = {}

ds = query.select(regular_xy_dataset, query_params)

assert ds is not None, "Dataset was not returned"
assert "air" in ds, "Dataset does not contain the air variable"
assert "lat" in ds, "Dataset does not contain the lat variable"
assert "lon" in ds, "Dataset does not contain the lon variable"
assert "time" in ds, "Dataset does not contain the time variable"

assert ds["time"] == pd.to_datetime(
"2013-01-01T06:00:00",
), "Dataset shape is incorrect"
assert ds["air"].shape == (25, 53), "Dataset shape is incorrect"

query = EDRQuery(
coords="POINT(200 45)",
datetime="2013-01-01T06:00:00/2013-01-01T12:00:00",
parameters="air,time",
)

ds = query.select(regular_xy_dataset, query_params)
(
npt.assert_array_equal(
ds["time"],
np.array(
["2013-01-01T06:00:00.000000000", "2013-01-01T12:00:00.000000000"],
dtype="datetime64[ns]",
),
),
"Dataset shape is incorrect",
)
assert ds["air"].shape == (2, 25, 53), "Dataset shape is incorrect"


def test_select_query_error(regular_xy_dataset):
query = EDRQuery(
coords="POINT(200 45)",
datetime="2013-01-01T06:00:00",
parameters="water",
)
query_params = {"foo": "bar"}

with pytest.raises(ValueError):
query.select(regular_xy_dataset, query_params)

query = EDRQuery(
coords="POINT(200 45)",
datetime="2013-01-0 06:00",
parameters="air",
)

with pytest.raises(TypeError):
query.select(regular_xy_dataset, {})

query = EDRQuery(
coords="POINT(200 45)",
datetime="2013-01-01T06:00:00",
parameters="air",
z="100",
)

with pytest.raises(KeyError):
query.select(regular_xy_dataset, {})


def test_select_position_regular_xy(regular_xy_dataset):
point = Point((204, 44))
ds = select_by_postition(regular_xy_dataset, point)

assert ds is not None, "Dataset was not returned"
assert "air" in ds, "Dataset does not contain the air variable"
assert "lat" in ds, "Dataset does not contain the lat variable"
assert "lon" in ds, "Dataset does not contain the lon variable"

assert ds["air"].shape == ds["time"].shape, "Dataset shape is incorrect"
npt.assert_array_equal(ds["lat"], 45.0), "Latitude is incorrect"
npt.assert_array_equal(ds["lon"], 205.0), "Longitude is incorrect"
npt.assert_approx_equal(ds["air"][0], 280.2), "Temperature is incorrect"
npt.assert_approx_equal(ds["air"][-1], 279.19), "Temperature is incorrect"


def test_select_area_regular_xy(regular_xy_dataset):
polygon = Point(204, 44).buffer(5)
ds = select_by_area(regular_xy_dataset, polygon)

assert ds is not None, "Dataset was not returned"
assert "air" in ds, "Dataset does not contain the air variable"
assert "lat" in ds, "Dataset does not contain the lat variable"
assert "lon" in ds, "Dataset does not contain the lon variable"

assert ds["air"].shape == (2920, 13), "Dataset shape is incorrect"
assert ds["lat"].shape == (13,), "Latitude shape is incorrect"
assert ds["lon"].shape == (13,), "Longitude shape is incorrect"

print(ds["air"].isel(time=0).values)

(
npt.assert_array_equal(np.unique(ds["lat"]), [40.0, 42.5, 45.0, 47.5]),
"Latitude is incorrect",
)
(
npt.assert_array_equal(np.unique(ds["lon"]), [200.0, 202.5, 205.0, 207.5]),
"Longitude is incorrect",
)
(
npt.assert_array_almost_equal(
ds["air"].isel(time=0),
np.array(
[
280.0,
282.79,
284.6,
279.0,
280.7,
283.2,
284.9,
279.0,
280.2,
282.6,
284.2,
279.6,
281.9,
],
),
),
"Temperature is incorrect",
)


def test_select_area_regular_xy_boundary(regular_xy_dataset):
polygon = from_wkt("POLYGON((200 40, 200 50, 210 50, 210 40, 200 40))").buffer(
0.0001,
)
ds = select_by_area(regular_xy_dataset, polygon)

assert ds["lat"].min() == 40.0, "Latitude is incorrect"
assert ds["lat"].max() == 50.0, "Latitude is incorrect"
assert ds["lon"].min() == 200.0, "Longitude is incorrect"
assert ds["lon"].max() == 210.0, "Longitude is incorrect"
3 changes: 3 additions & 0 deletions xpublish_edr/geometry/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
"""
Package for geometry related functionality
"""
49 changes: 49 additions & 0 deletions xpublish_edr/geometry/area.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
"""
Handle selection and formatting for area queries
"""

import numpy as np
import shapely
import xarray as xr

from xpublish_edr.geometry.common import is_regular_xy_coords


def select_by_area(
ds: xr.Dataset,
polygon: shapely.Polygon,
) -> xr.Dataset:
"""
Return a dataset with the area within the given polygon
"""
if not is_regular_xy_coords(ds):
# TODO: Handle 2D coordinates
raise NotImplementedError("Only 1D coordinates are supported")
return _select_area_regular_xy_grid(ds, polygon)


def _select_area_regular_xy_grid(
ds: xr.Dataset,
polygon: shapely.Polygon,
) -> xr.Dataset:
"""
Return a dataset with the area within the given polygon
"""
# To minimize performance impact, we first subset the dataset to the bounding box of the polygon
(minx, miny, maxx, maxy) = polygon.bounds
ds = ds.cf.sel(X=slice(minx, maxx), Y=slice(maxy, miny))

# For a regular grid, we can create a meshgrid of the X and Y coordinates to create a spatial mask
pts = np.meshgrid(ds.cf["X"], ds.cf["Y"])

# Create a mask of the points within the polygon
mask = shapely.intersects_xy(polygon, pts[0], pts[1])

# Find the x and y indices that have any points within the polygon
x_inds, y_inds = np.nonzero(mask)
x_sel = xr.Variable(data=x_inds, dims="pts")
y_sel = xr.Variable(data=y_inds, dims="pts")

# Apply the mask and vectorize to a 1d collection of points
ds_sub = ds.cf.isel(X=x_sel, Y=y_sel)
return ds_sub
19 changes: 19 additions & 0 deletions xpublish_edr/geometry/common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
"""
Common geometry handling functions
"""

import xarray as xr


def coord_is_regular(da: xr.DataArray) -> bool:
"""
Check if the DataArray has a regular grid
"""
return len(da.shape) == 1 and da.name in da.dims


def is_regular_xy_coords(ds: xr.Dataset) -> bool:
"""
Check if the dataset has 2D coordinates
"""
return coord_is_regular(ds.cf["X"]) and coord_is_regular(ds.cf["Y"])
30 changes: 30 additions & 0 deletions xpublish_edr/geometry/position.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
"""
Handle selection and formatting for position queries
"""

import shapely
import xarray as xr

from xpublish_edr.geometry.common import is_regular_xy_coords


def select_by_postition(ds: xr.Dataset, point: shapely.Point) -> xr.Dataset:
"""
Return a dataset with the position nearest to the given coordinates
"""
if not is_regular_xy_coords(ds):
# TODO: Handle 2D coordinates
raise NotImplementedError("Only 1D coordinates are supported")

return _select_by_position_regular_xy_grid(ds, point)


def _select_by_position_regular_xy_grid(
ds: xr.Dataset,
point: shapely.Point,
) -> xr.Dataset:
"""
Return a dataset with the position nearest to the given coordinates
"""
# Find the nearest X and Y coordinates to the point
return ds.cf.sel(X=point.x, Y=point.y, method="nearest")
8 changes: 8 additions & 0 deletions xpublish_edr/logger.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"""
Logging setup and shared logger for the cf_edr package.
"""

import logging

# Set up a shared logger for the cf_edr package
logger = logging.getLogger("cf_edr")
Loading
Loading