Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add area endpoint support #49

Merged
merged 24 commits into from
Nov 4, 2024
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
9ae30a5
Add initial pass at subsetting to polygon
mpiannucci Oct 24, 2024
3d314b3
Add test case
mpiannucci Oct 24, 2024
d34ad67
Small reorg
mpiannucci Oct 24, 2024
70489cd
Add selection tests
mpiannucci Oct 24, 2024
65b993f
Add selection tests, move param selection to sepearte function
mpiannucci Oct 24, 2024
6a75c43
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 24, 2024
dfae759
Fix lints
mpiannucci Oct 24, 2024
3aae7ba
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 24, 2024
f5c5a7d
Refactor to chagne to be vectorized output
mpiannucci Oct 24, 2024
81ef0ae
Merge remote-tracking branch 'mpiannucci/matt/polygon' into matt/polygon
mpiannucci Oct 24, 2024
b0e3d0a
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 24, 2024
f12037e
format
mpiannucci Oct 24, 2024
1a6aa03
Add buffer option for area queries
mpiannucci Oct 24, 2024
f6a89df
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 24, 2024
2427e91
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 24, 2024
812cea3
Update to use intersect_xy
mpiannucci Oct 24, 2024
f7491d4
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 24, 2024
83458f3
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 24, 2024
29fc089
more test coverage
mpiannucci Oct 24, 2024
7080321
reorganize
mpiannucci Nov 4, 2024
154134f
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 4, 2024
90aca54
Add more doc strings
mpiannucci Nov 4, 2024
cc47a51
Merge remote-tracking branch 'mpiannucci/matt/polygon' into matt/polygon
mpiannucci Nov 4, 2024
9c69cbb
Oops forgot file
mpiannucci Nov 4, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 70 additions & 0 deletions tests/test_cf_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,18 @@ def test_cf_position_formats(cf_client):
assert "csv" in data, "csv is not a valid format"


def test_cf_area_formats(cf_client):
response = cf_client.get("/edr/area/formats")

assert response.status_code == 200, "Response did not return successfully"

data = response.json()

assert "cf_covjson" in data, "cf_covjson is not a valid format"
assert "nc" in data, "nc is not a valid format"
assert "csv" in data, "csv is not a valid format"


def test_cf_position_query(cf_client, cf_dataset):
x = 204
y = 44
Expand Down Expand Up @@ -147,3 +159,61 @@ def test_percent_encoded_cf_position_nc(cf_client):
assert (
"position.nc" in response.headers["content-disposition"]
), "The file name should be position.nc"


def test_cf_area_query(cf_client, cf_dataset):
coords = "POLYGON((201 41, 201 49, 209 49, 209 41, 201 41))"
response = cf_client.get(f"/datasets/air/edr/area?coords={coords}")

assert response.status_code == 200, "Response did not return successfully"

data = response.json()

for key in ("type", "domain", "parameters", "ranges"):
assert key in data, f"Key {key} is not a top level key in the CovJSON response"

axes = data["domain"]["axes"]

assert axes["x"] == {
"values": [202.5, 202.5, 202.5, 205.0, 205.0, 205.0, 207.5, 207.5, 207.5],
}, "Did not select nearby x coordinates within the polygon"
assert axes["y"] == {
"values": [47.5, 45.0, 42.5, 47.5, 45.0, 42.5, 47.5, 45.0, 42.5],
}, "Did not select a nearby y coordinates within the polygon"

assert (
len(axes["t"]["values"]) == 4
), "There should be a time value for each time step"

air_param = data["parameters"]["air"]

assert (
air_param["unit"]["label"]["en"] == cf_dataset["air"].attrs["units"]
), "DataArray units should be set as parameter units"
assert (
air_param["observedProperty"]["id"] == cf_dataset["air"].attrs["standard_name"]
), "DataArray standard_name should be set as the observed property id"
assert (
air_param["observedProperty"]["label"]["en"]
== cf_dataset["air"].attrs["long_name"]
), "DataArray long_name should be set as parameter observed property"
assert (
air_param["description"]["en"] == cf_dataset["air"].attrs["long_name"]
), "DataArray long_name should be set as parameter description"

air_range = data["ranges"]["air"]

assert air_range["type"] == "NdArray", "Response range should be a NdArray"
assert air_range["dataType"] == "float", "Air dataType should be floats"
assert air_range["axisNames"] == [
"t",
"pts",
], "Time should be the only remaining axes"
assert len(air_range["shape"]) == 2, "There should be 2 axes"
assert air_range["shape"][0] == len(axes["t"]["values"]), "The shape of the "
assert air_range["shape"][1] == len(
axes["x"]["values"],
), "The shape of the pts axis"
assert (
len(air_range["values"]) == 36
), "There should be 26 values, 9 for each time step"
114 changes: 114 additions & 0 deletions tests/test_select.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
import cf_xarray # noqa
import numpy as np
import numpy.testing as npt
import pandas as pd
import pytest
import xarray as xr
from shapely import Point, from_wkt

from xpublish_edr.query import EDRQuery
from xpublish_edr.select import select_area, select_postition, select_query


@pytest.fixture(scope="function")
def regular_xy_dataset():
"""Loads a sample dataset with regular X and Y coordinates"""
return xr.tutorial.load_dataset("air_temperature")


def test_select_query(regular_xy_dataset):
query = EDRQuery(
coords="POINT(200 45)",
datetime="2013-01-01T06:00:00",
parameters="air,time",
)
query_params = {}

ds = select_query(regular_xy_dataset, query, query_params)

assert ds is not None, "Dataset was not returned"
assert "air" in ds, "Dataset does not contain the air variable"
assert "lat" in ds, "Dataset does not contain the lat variable"
assert "lon" in ds, "Dataset does not contain the lon variable"
assert "time" in ds, "Dataset does not contain the time variable"

assert ds["time"] == pd.to_datetime(
"2013-01-01T06:00:00",
), "Dataset shape is incorrect"
assert ds["air"].shape == (25, 53), "Dataset shape is incorrect"


def test_select_position_regular_xy(regular_xy_dataset):
point = Point((204, 44))
ds = select_postition(regular_xy_dataset, point)

assert ds is not None, "Dataset was not returned"
assert "air" in ds, "Dataset does not contain the air variable"
assert "lat" in ds, "Dataset does not contain the lat variable"
assert "lon" in ds, "Dataset does not contain the lon variable"

assert ds["air"].shape == ds["time"].shape, "Dataset shape is incorrect"
npt.assert_array_equal(ds["lat"], 45.0), "Latitude is incorrect"
npt.assert_array_equal(ds["lon"], 205.0), "Longitude is incorrect"
npt.assert_approx_equal(ds["air"][0], 280.2), "Temperature is incorrect"
npt.assert_approx_equal(ds["air"][-1], 279.19), "Temperature is incorrect"


def test_select_area_regular_xy(regular_xy_dataset):
polygon = Point(204, 44).buffer(5)
ds = select_area(regular_xy_dataset, polygon)

assert ds is not None, "Dataset was not returned"
assert "air" in ds, "Dataset does not contain the air variable"
assert "lat" in ds, "Dataset does not contain the lat variable"
assert "lon" in ds, "Dataset does not contain the lon variable"

assert ds["air"].shape == (2920, 13), "Dataset shape is incorrect"
assert ds["lat"].shape == (13,), "Latitude shape is incorrect"
assert ds["lon"].shape == (13,), "Longitude shape is incorrect"

print(ds["air"].isel(time=0).values)

(
npt.assert_array_equal(np.unique(ds["lat"]), [40.0, 42.5, 45.0, 47.5]),
"Latitude is incorrect",
)
(
npt.assert_array_equal(np.unique(ds["lon"]), [200.0, 202.5, 205.0, 207.5]),
"Longitude is incorrect",
)
(
npt.assert_array_almost_equal(
ds["air"].isel(time=0),
np.array(
[
280.0,
282.79,
284.6,
279.0,
280.7,
283.2,
284.9,
279.0,
280.2,
282.6,
284.2,
279.6,
281.9,
],
),
),
"Temperature is incorrect",
)


def test_select_area_regular_xy_boundary(regular_xy_dataset):
polygon = from_wkt("POLYGON((200 40, 200 50, 210 50, 210 40, 200 40))").buffer(
0.0001,
)
ds = select_area(regular_xy_dataset, polygon)

assert ds["lat"].min() == 40.0, "Latitude is incorrect"
assert ds["lat"].max() == 50.0, "Latitude is incorrect"
assert ds["lon"].min() == 200.0, "Longitude is incorrect"
assert ds["lon"].max() == 210.0, "Longitude is incorrect"
108 changes: 64 additions & 44 deletions xpublish_edr/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,16 @@

import importlib
import logging
from typing import List, Optional
from typing import List

import xarray as xr
from fastapi import APIRouter, Depends, HTTPException, Request
from xpublish import Dependencies, Plugin, hookimpl

from xpublish_edr.select import select_area, select_postition, select_query

from .formats.to_covjson import to_cf_covjson
from .query import EDRQuery, edr_query, edr_query_params
from .query import EDRQuery, edr_query

logger = logging.getLogger("cf_edr")

Expand Down Expand Up @@ -60,6 +62,18 @@ def get_position_formats():

return formats

@router.get(
"/area/formats",
summary="Area query response formats",
)
def get_area_formats():
"""
Returns the various supported formats for area queries
"""
formats = {key: value.__doc__ for key, value in position_formats().items()}

return formats

return router

@hookimpl
Expand All @@ -79,65 +93,71 @@ def get_position(
Extra selecting/slicing parameters can be provided as extra query parameters
"""
try:
ds = dataset.cf.sel(X=query.point.x, Y=query.point.y, method="nearest")
ds = select_postition(dataset, query.geometry)
mpiannucci marked this conversation as resolved.
Show resolved Hide resolved
except KeyError:
raise HTTPException(
status_code=404,
detail="Dataset does not have CF Convention compliant metadata",
)

if query.z:
ds = dataset.cf.sel(Z=query.z, method="nearest")
logger.debug(
f"Dataset filtered by position ({query.geometry.x}, {query.geometry.y}): {ds}",
)

if query.datetime:
datetimes = query.datetime.split("/")
try:
ds = select_query(ds, query, dict(request.query_params))
mpiannucci marked this conversation as resolved.
Show resolved Hide resolved
except ValueError as e:
raise HTTPException(
status_code=404,
detail=f"Error selecting from query: {e.args[0]}",
)

try:
if len(datetimes) == 1:
ds = ds.cf.sel(T=datetimes[0], method="nearest")
elif len(datetimes) == 2:
ds = ds.cf.sel(T=slice(datetimes[0], datetimes[1]))
else:
raise HTTPException(
status_code=404,
detail="Invalid datetimes submitted",
)
except ValueError as e:
logger.error("Error with datetime", exc_info=True)
raise HTTPException(
status_code=404,
detail=f"Invalid datetime ({e})",
) from e
logger.debug(f"Dataset filtered by query params {ds}")

if query.parameters:
if query.format:
try:
ds = ds.cf[query.parameters.split(",")]
except KeyError as e:
format_fn = position_formats()[query.format]
except KeyError:
raise HTTPException(
status_code=404,
detail=f"Invalid variable: {e}",
404,
f"{query.format} is not a valid format for EDR position queries. "
"Get `./formats` for valid formats",
)

logger.debug(f"Dataset filtered by query params {ds}")
return format_fn(ds)

query_params = dict(request.query_params)
for query_param in request.query_params:
if query_param in edr_query_params:
del query_params[query_param]
return to_cf_covjson(ds)

method: Optional[str] = "nearest"
@router.get("/area", summary="Area query")
def get_area(
request: Request,
query: EDRQuery = Depends(edr_query),
dataset: xr.Dataset = Depends(deps.dataset),
):
"""
Returns area data based on WKT `Polygon(lon lat)` coordinates

for key, value in query_params.items():
split_value = value.split("/")
if len(split_value) == 1:
continue
elif len(split_value) == 2:
query_params[key] = slice(split_value[0], split_value[1])
method = None
else:
raise HTTPException(404, f"Too many values for selecting {key}")
Extra selecting/slicing parameters can be provided as extra query parameters
"""
try:
ds = select_area(dataset, query.geometry)
mpiannucci marked this conversation as resolved.
Show resolved Hide resolved
except KeyError:
raise HTTPException(
status_code=404,
detail="Dataset does not have CF Convention compliant metadata",
)

logger.debug(f"Dataset filtered by polygon {query.geometry.boundary}: {ds}")

try:
ds = select_query(ds, query, dict(request.query_params))
except ValueError as e:
raise HTTPException(
status_code=404,
detail=f"Error selecting from query: {e.args[0]}",
)

ds = ds.sel(query_params, method=method)
logger.debug(f"Dataset filtered by query params {ds}")

if query.format:
try:
Expand Down
2 changes: 1 addition & 1 deletion xpublish_edr/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class EDRQuery(BaseModel):
format: Optional[str] = None

@property
def point(self):
def geometry(self):
"""Shapely point from WKT query params"""
return wkt.loads(self.coords)

Expand Down
Loading
Loading