Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Collection Metadata #62

Merged
merged 15 commits into from
Nov 21, 2024
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,9 @@ This package attempts to follow [the spec](https://docs.ogc.org/is/19-086r6/19-0

### [collections](https://docs.ogc.org/is/19-086r6/19-086r6.html#_e55ba0f5-8f24-4f1b-a7e3-45775e39ef2e) and Resource Paths Support

`xpublish-edr` does not currently support the `/collections/{collectionId}/query` path template described in the spec. Instead the path resource appears as `/{dataset_id}/query`. This is because of the path structure of xpublish.
`xpublish-edr` does not currently support the `/collections/{collectionId}/query` path template described in the spec. Instead the path resource appears as `/{dataset_id}/edr/{query}`. This is because of the path structure of xpublish. In the future, if `xpublish` supports [`DataTree`](https://docs.xarray.dev/en/stable/generated/xarray.DataTree.html) it could provide a path to supporting the spec compliant `collections` resource path.

In the future, when `xpublish` supports [`DataTree`](https://docs.xarray.dev/en/stable/generated/xarray.DataTree.html) it will provide a path to supporting the spec compliant `collections` resource path.
However, despite the collections resource not existing, this implementation supports [collection metadata](https://docs.ogc.org/is/19-086r6/19-086r6.html#_5d07dde9-231a-4652-a1f3-dd036c337bdc) at the dataset level through the `/{dataset_id}/edr/` resource.

### Supported Queries

Expand Down
69 changes: 61 additions & 8 deletions tests/test_cf_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,53 @@ def test_cf_area_formats(cf_client):
assert "csv" in data, "csv is not a valid format"


def test_cf_metadata_query(cf_client):
response = cf_client.get("/datasets/air/edr/")
assert response.status_code == 200, "Response did not return successfully"
data = response.json()

assert data["id"] == "air", "The id should be air"
assert data["title"] == "4x daily NMC reanalysis (1948)", "The title is incorrect"
assert (
data["description"]
== "Data is from NMC initialized reanalysis\n(4x/day). These are the 0.9950 sigma level values."
), "The description is incorrect"
assert data["crs"] == ["EPSG:4326"], "The crs is incorrect"
assert set(data["output_formats"]) == {
"cf_covjson",
"nc",
"netcdf4",
"nc4",
"netcdf",
"csv",
"geojson",
}, "The output formats are incorrect"
assert (
"position" in data["data_queries"] and "area" in data["data_queries"]
), "The data queries are incorrect"

assert (
"temporal" and "spatial" in data["extent"]
), "Temporal and spatial extents should be present in extent"
assert (
"vertical" not in data["extent"]
), "Vertical extent should not be present in extent"

assert data["extent"]["temporal"]["interval"] == [
"2013-01-01T00:00:00",
"2013-01-01T18:00:00",
], "Temporal interval is incorrect"
assert (
data["extent"]["temporal"]["values"][0]
== "2013-01-01T00:00:00/2013-01-01T18:00:00"
), "Temporal values are incorrect"

assert data["extent"]["spatial"]["bbox"] == [
[200.0, 15.0, 322.5, 75.0],
], "Spatial bbox is incorrect"
assert data["extent"]["spatial"]["crs"] == "EPSG:4326", "Spatial CRS is incorrect"


def test_cf_position_query(cf_client, cf_air_dataset, cf_temp_dataset):
x = 204
y = 44
Expand Down Expand Up @@ -122,14 +169,20 @@ def test_cf_position_query(cf_client, cf_air_dataset, cf_temp_dataset):

axes = data["domain"]["axes"]

npt.assert_array_almost_equal(
axes["x"]["values"],
[[x]],
), "Did not select nearby x coordinate"
npt.assert_array_almost_equal(
axes["y"]["values"],
[[y]],
), "Did not select a nearby y coordinate"
(
npt.assert_array_almost_equal(
axes["x"]["values"],
[[x]],
),
"Did not select nearby x coordinate",
)
(
npt.assert_array_almost_equal(
axes["y"]["values"],
[[y]],
),
"Did not select a nearby y coordinate",
)

temp_range = data["ranges"]["temp"]
assert temp_range["type"] == "NdArray", "Response range should be a NdArray"
Expand Down
3 changes: 3 additions & 0 deletions xpublish_edr/geometry/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@
transformer_from_crs = lru_cache(pyproj.Transformer.from_crs)


DEFAULT_CRS = pyproj.CRS.from_epsg(4326)


def coord_is_regular(da: xr.DataArray) -> bool:
"""
Check if the DataArray has a regular grid
Expand Down
200 changes: 194 additions & 6 deletions xpublish_edr/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,13 @@

from xpublish_edr.formats.to_covjson import to_cf_covjson
from xpublish_edr.geometry.area import select_by_area
from xpublish_edr.geometry.common import project_dataset
from xpublish_edr.geometry.common import DEFAULT_CRS, dataset_crs, project_dataset
from xpublish_edr.geometry.position import select_by_position
from xpublish_edr.logger import logger
from xpublish_edr.query import EDRQuery, edr_query


def position_formats():
def output_formats():
"""
Return response format functions from registered
`xpublish_edr_position_formats` entry_points
Expand All @@ -31,6 +31,29 @@ def position_formats():
return formats


def variable_description(variable: xr.DataArray):
"""
Return CF version of EDR Parameter metadata for a given xarray variable
"""
name = variable.attrs.get("name", None)
standard_name = variable.attrs.get("standard_name", name if name else "")
label = standard_name if not name else name
long_name = variable.attrs.get("long_name", "")
units = variable.attrs.get("units", "")
return {
"type": "Parameter",
"description": long_name,
"unit": {
"label": units,
},
"observedProperty": {
"label": label,
"standard_name": standard_name,
"long_name": long_name,
},
}


class CfEdrPlugin(Plugin):
"""
OGC EDR compatible endpoints for Xpublish datasets
Expand All @@ -57,7 +80,7 @@ def get_position_formats():
"""
Returns the various supported formats for position queries
"""
formats = {key: value.__doc__ for key, value in position_formats().items()}
formats = {key: value.__doc__ for key, value in output_formats().items()}

return formats

Expand All @@ -69,7 +92,7 @@ def get_area_formats():
"""
Returns the various supported formats for area queries
"""
formats = {key: value.__doc__ for key, value in position_formats().items()}
formats = {key: value.__doc__ for key, value in output_formats().items()}

return formats

Expand All @@ -80,6 +103,171 @@ def dataset_router(self, deps: Dependencies):
"""Register dataset level router for EDR endpoints"""
router = APIRouter(prefix=self.app_router_prefix, tags=self.dataset_router_tags)

@router.get("/", summary="Collection metadata")
def get_collection_metadata(dataset: xr.Dataset = Depends(deps.dataset)):
"""
Returns the collection metadata for the dataset

There is no nested hierarchy in our router right now, so instead we return the metadata
for the current dataset as the a single collection. See the spec for more information:
https://docs.ogc.org/is/19-086r6/19-086r6.html#_162817c2-ccd7-43c9-b1ea-ad3aea1b4d6b
"""
id = dataset.attrs.get("_xpublish_id", "unknown")
title = dataset.attrs.get("title", "unknown")
description = dataset.attrs.get("description", "no description")

crs = dataset_crs(dataset)

available_output_formats = list(output_formats().keys())

ds_cf = dataset.cf
axes = ds_cf.axes

# We will use the dataset's CRS as the default CRS for the extents,
# but override when it makes sense.
extent_crs = crs

if len(axes["X"]) > 1:
mpiannucci marked this conversation as resolved.
Show resolved Hide resolved
if "latitude" and "longitude" in ds_cf:
mpiannucci marked this conversation as resolved.
Show resolved Hide resolved
min_lon = float(ds_cf["longitude"].min().values)
max_lon = float(ds_cf["longitude"].max().values)
min_lat = float(ds_cf["latitude"].min().values)
max_lat = float(ds_cf["latitude"].max().values)

# When we are explicitly using latitude and longitude, we should use WGS84
extent_crs = DEFAULT_CRS
else:
raise HTTPException(
status_code=404,
detail="Dataset does not have EDR compliant metadata: Multiple X axes found",
)
else:
min_lon = float(ds_cf["X"].min().values)
max_lon = float(ds_cf["X"].max().values)
min_lat = float(ds_cf["Y"].min().values)
max_lat = float(ds_cf["Y"].max().values)

extents: dict = {
"spatial": {
"bbox": [
[
min_lon,
min_lat,
max_lon,
max_lat,
],
],
"crs": extent_crs.to_string(),
},
}

if "T" in ds_cf:
time_min = ds_cf["T"].min().dt.strftime("%Y-%m-%dT%H:%M:%S").values
time_max = ds_cf["T"].max().dt.strftime("%Y-%m-%dT%H:%M:%S").values

extents["temporal"] = {
"interval": [
str(time_min),
str(time_max),
],
"values": [
f"{time_min}/{time_max}",
],
"trs": 'TIMECRS["DateTime",TDATUM["Gregorian Calendar"],CS[TemporalDateTime,1],AXIS["Time (T)",unspecified]]', # noqa
mpiannucci marked this conversation as resolved.
Show resolved Hide resolved
}

if "Z" in ds_cf:
units = ds_cf["Z"].attrs.get("units", "unknown")
positive = ds_cf["Z"].attrs.get("positive", "up")
elevations = ds_cf["Z"].values
min_z = elevations.min()
max_z = elevations.max()
elevation_values = ",".join([str(e) for e in elevations])

extents["vertical"] = {
"interval": [
min_z,
max_z,
],
"values": elevation_values,
"vrs": f"VERTCRS[VERT_CS['unknown'],AXIS['Z',{positive}],UNIT[{units},1]]", # noqa
"positive": positive,
"units": units,
}

parameters = {
k: variable_description(v)
for k, v in dataset.variables.items()
if "axis" not in v.attrs
}

crs_details = [
{
"crs": crs.to_string(),
"wkt": crs.to_wkt(),
},
]

# 4326 is always available
if crs != DEFAULT_CRS:
crs_details.append(
{
"crs": DEFAULT_CRS.to_string(),
"wkt": DEFAULT_CRS.to_wkt(),
},
)

return {
"id": id,
"title": title,
"description": description,
"links": [],
"extent": extents,
"data_queries": {
"position": {
"href": "/edr/position?coords={coords}",
"hreflang": "en",
"rel": "data",
"templated": True,
"variables": {
"title": "Position query",
"description": "Returns position data based on WKT `POINT(lon lat)` or `MULTIPOINT(lon lat, ...)` coordinates", # noqa
"query_type": "position",
"coords": {
"type": "string",
"description": "WKT `POINT(lon lat)` or `MULTIPOINT(lon lat, ...)` coordinates", # noqa
"required": True,
},
"output_format": available_output_formats,
"default_output_format": "cf_covjson",
"crs_details": crs_details,
},
},
"area": {
"href": "/edr/area",
mpiannucci marked this conversation as resolved.
Show resolved Hide resolved
"hreflang": "en",
"rel": "data",
"templated": True,
"variables": {
"title": "Area query",
"description": "Returns data in a polygon based on WKT `POLYGON(lon lat, ...)` coordinates", # noqa
"query_type": "position",
"coords": {
"type": "string",
"description": "WKT `POLYGON(lon lat, ...)` coordinates",
"required": True,
},
"output_format": available_output_formats,
"default_output_format": "cf_covjson",
"crs_details": crs_details,
},
},
},
"crs": [crs.to_string()],
"output_formats": available_output_formats,
"parameter_names": parameters,
}

@router.get("/position", summary="Position query")
def get_position(
request: Request,
Expand Down Expand Up @@ -126,7 +314,7 @@ def get_position(

if query.format:
try:
format_fn = position_formats()[query.format]
format_fn = output_formats()[query.format]
except KeyError:
raise HTTPException(
404,
Expand Down Expand Up @@ -182,7 +370,7 @@ def get_area(

if query.format:
try:
format_fn = position_formats()[query.format]
format_fn = output_formats()[query.format]
except KeyError:
raise HTTPException(
404,
Expand Down
Loading