From e3dc4a17f802a8f1562140249e8589a3d8ee77aa Mon Sep 17 00:00:00 2001 From: Matthew Iannucci Date: Wed, 20 Nov 2024 15:33:21 -0500 Subject: [PATCH] More accurate to the spec version of the metadata --- tests/test_cf_router.py | 21 ++++++++ xpublish_edr/geometry/common.py | 3 ++ xpublish_edr/plugin.py | 95 +++++++++++++++++++++------------ 3 files changed, 84 insertions(+), 35 deletions(-) diff --git a/tests/test_cf_router.py b/tests/test_cf_router.py index e8b0dd6..67d2178 100644 --- a/tests/test_cf_router.py +++ b/tests/test_cf_router.py @@ -87,6 +87,27 @@ def test_cf_metadata_query(cf_client): "position" in data["data_queries"] and "area" in data["data_queries"] ), "The data queries are incorrect" + assert ( + "temporal" and "spatial" in data["extent"] + ), "Temporal and spatial extents should be present in extent" + assert ( + "vertical" not in data["extent"] + ), "Vertical extent should not be present in extent" + + assert data["extent"]["temporal"]["interval"] == [ + "2013-01-01T00:00:00", + "2013-01-01T18:00:00", + ], "Temporal interval is incorrect" + assert ( + data["extent"]["temporal"]["values"][0] + == "2013-01-01T00:00:00/2013-01-01T18:00:00" + ), "Temporal values are incorrect" + + assert data["extent"]["spatial"]["bbox"] == [ + [200.0, 15.0, 322.5, 75.0], + ], "Spatial bbox is incorrect" + assert data["extent"]["spatial"]["crs"] == "EPSG:4326", "Spatial CRS is incorrect" + def test_cf_position_query(cf_client, cf_air_dataset, cf_temp_dataset): x = 204 diff --git a/xpublish_edr/geometry/common.py b/xpublish_edr/geometry/common.py index bd2e00c..6c944e6 100644 --- a/xpublish_edr/geometry/common.py +++ b/xpublish_edr/geometry/common.py @@ -16,6 +16,9 @@ transformer_from_crs = lru_cache(pyproj.Transformer.from_crs) +DEFAULT_CRS = pyproj.CRS.from_epsg(4326) + + def coord_is_regular(da: xr.DataArray) -> bool: """ Check if the DataArray has a regular grid diff --git a/xpublish_edr/plugin.py b/xpublish_edr/plugin.py index 74b48f2..8563593 100644 --- a/xpublish_edr/plugin.py +++ b/xpublish_edr/plugin.py @@ -11,7 +11,7 @@ from xpublish_edr.formats.to_covjson import to_cf_covjson from xpublish_edr.geometry.area import select_by_area -from xpublish_edr.geometry.common import dataset_crs, project_dataset +from xpublish_edr.geometry.common import DEFAULT_CRS, dataset_crs, project_dataset from xpublish_edr.geometry.position import select_by_position from xpublish_edr.logger import logger from xpublish_edr.query import EDRQuery, edr_query @@ -120,29 +120,45 @@ def get_collection_metadata(dataset: xr.Dataset = Depends(deps.dataset)): available_output_formats = list(output_formats().keys()) - extents = {} - ds_cf = dataset.cf - if "longitude" and "latitude" in ds_cf: - min_lon = float(ds_cf["longitude"].min().values) - max_lon = float(ds_cf["longitude"].max().values) - min_lat = float(ds_cf["latitude"].min().values) - max_lat = float(ds_cf["latitude"].max().values) + axes = ds_cf.axes + + # We will use the dataset's CRS as the default CRS for the extents, + # but override when it makes sense. + extent_crs = crs + + if len(axes["X"]) > 1: + if "latitude" and "longitude" in ds_cf: + min_lon = float(ds_cf["longitude"].min().values) + max_lon = float(ds_cf["longitude"].max().values) + min_lat = float(ds_cf["latitude"].min().values) + max_lat = float(ds_cf["latitude"].max().values) + + # When we are explicitly using latitude and longitude, we should use WGS84 + extent_crs = DEFAULT_CRS + else: + raise HTTPException( + status_code=404, + detail="Dataset does not have EDR compliant metadata: Multiple X axes found", + ) else: min_lon = float(ds_cf["X"].min().values) max_lon = float(ds_cf["X"].max().values) min_lat = float(ds_cf["Y"].min().values) max_lat = float(ds_cf["Y"].max().values) - extents["spatial"] = { - "bbox": [ - [ - min_lon, - min_lat, - max_lon, - max_lat, + extents: dict = { + "spatial": { + "bbox": [ + [ + min_lon, + min_lat, + max_lon, + max_lat, + ], ], - ], + "crs": extent_crs.to_string(), + }, } if "T" in ds_cf: @@ -157,21 +173,25 @@ def get_collection_metadata(dataset: xr.Dataset = Depends(deps.dataset)): "values": [ f"{time_min}/{time_max}", ], + "trs": 'TIMECRS["DateTime",TDATUM["Gregorian Calendar"],CS[TemporalDateTime,1],AXIS["Time (T)",unspecified]]', # noqa } if "Z" in ds_cf: - min_z = ds_cf["Z"].min() - max_z = ds_cf["Z"].max() units = ds_cf["Z"].attrs.get("units", "unknown") + positive = ds_cf["Z"].attrs.get("positive", "up") + elevations = ds_cf["Z"].values + min_z = elevations.min() + max_z = elevations.max() + elevation_values = ",".join([str(e) for e in elevations]) extents["vertical"] = { "interval": [ min_z, max_z, ], - "values": [ - f"{min_z}/{max_z}", - ], + "values": elevation_values, + "vrs": f"VERTCRS[VERT_CS['unknown'],AXIS['Z',{positive}],UNIT[{units},1]]", # noqa + "positive": positive, "units": units, } @@ -181,16 +201,31 @@ def get_collection_metadata(dataset: xr.Dataset = Depends(deps.dataset)): if "axis" not in v.attrs } + crs_details = [ + { + "crs": crs.to_string(), + "wkt": crs.to_wkt(), + }, + ] + + # 4326 is always available + if crs != DEFAULT_CRS: + crs_details.append( + { + "crs": DEFAULT_CRS.to_string(), + "wkt": DEFAULT_CRS.to_wkt(), + }, + ) + return { "id": id, "title": title, "description": description, - "keywords": [], "links": [], "extent": extents, "data_queries": { "position": { - "href": "/edr/position", + "href": "/edr/position?coords={coords}", "hreflang": "en", "rel": "data", "templated": True, @@ -205,12 +240,7 @@ def get_collection_metadata(dataset: xr.Dataset = Depends(deps.dataset)): }, "output_format": available_output_formats, "default_output_format": "cf_covjson", - "crs_details": [ - { - "crs": crs.to_string(), - "wkt": crs.to_wkt(), - }, - ], + "crs_details": crs_details, }, }, "area": { @@ -229,12 +259,7 @@ def get_collection_metadata(dataset: xr.Dataset = Depends(deps.dataset)): }, "output_format": available_output_formats, "default_output_format": "cf_covjson", - "crs_details": [ - { - "crs": crs.coordinate_system.name, - "wkt": crs.to_wkt(), - }, - ], + "crs_details": crs_details, }, }, },