Skip to content

Commit

Permalink
Encoding dataarrays
Browse files Browse the repository at this point in the history
Make sure that dataarrays are encoded as well to hopefully solve IndexError issues when datetimes are not coordinates.

Works on #59
  • Loading branch information
abkfenris committed May 23, 2024
1 parent 47a6e58 commit 2633673
Show file tree
Hide file tree
Showing 8 changed files with 44 additions and 6 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
matrix:
python-version: ["3.9", "3.10", "3.11"]
python-version: ["3.9", "3.10", "3.11", "3.12"]
os: [windows-latest, ubuntu-latest, macos-latest]
pydantic-version: ["<2", ">=2"]

Expand Down
2 changes: 2 additions & 0 deletions noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
python_versions = workflow["jobs"]["run"]["strategy"]["matrix"]["python-version"]
pydantic_versions = workflow["jobs"]["run"]["strategy"]["matrix"]["pydantic-version"]

nox.options.default_venv_backend = "uv"


@nox.session(python=python_versions)
@nox.parametrize("pydantic", pydantic_versions)
Expand Down
1 change: 1 addition & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
black
cf_xarray
check-manifest
doctr
h5netcdf
Expand Down
2 changes: 1 addition & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
server_path = Path(__file__).parent / "server.py"


@pytest.fixture
@pytest.fixture(scope="session")
def xpublish_server(xprocess):
"""Launch an Xpublish server in the background.
Expand Down
Binary file added tests/min_coordinates_encoding.nc
Binary file not shown.
13 changes: 12 additions & 1 deletion tests/server.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
"""Test OpenDAP server with air temperature dataset."""

from pathlib import Path

import numpy as np
import xarray.tutorial
import xpublish
Expand All @@ -14,8 +16,17 @@
ds_attrs_cast.attrs["npint"] = np.int16(16)
ds_attrs_cast.attrs["npintthirtytwo"] = np.int32(32)

ds_coordinates_encoding = xarray.open_dataset(
Path(__file__).parent / "min_coordinates_encoding.nc",
)

rest = xpublish.Rest(
{"air": ds, "attrs_quote": ds_attrs_quote, "attrs_cast": ds_attrs_cast},
{
"air": ds,
"attrs_quote": ds_attrs_quote,
"attrs_cast": ds_attrs_cast,
"coords_encoding": ds_coordinates_encoding,
},
plugins={"opendap": OpenDapPlugin()},
)

Expand Down
24 changes: 23 additions & 1 deletion tests/test_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"""

import sys
from pathlib import Path

import netCDF4
import pytest
Expand All @@ -31,7 +32,9 @@ def test_default_xarray_engine(xpublish_server, dataset):
"""Test opening OpenDAP air dataset with default Xarray engine."""
url = f"{xpublish_server}/datasets/air/opendap"
ds = xr.open_dataset(url)
assert ds == dataset
# assert ds == dataset
xr.testing.assert_equal(ds, dataset)

Check failure on line 36 in tests/test_server.py

View workflow job for this annotation

GitHub Actions / run (3.10, ubuntu-latest, <2)

test_default_xarray_engine AssertionError: Left and right Dataset objects are not equal Differing data variables: L air (time, lat, lon) int16 8MB 24120 24250 24350 ... 29649 29619 29569 R air (time, lat, lon) float64 31MB 241.2 242.5 243.5 ... 296.2 295.7

Check failure on line 36 in tests/test_server.py

View workflow job for this annotation

GitHub Actions / run (3.10, ubuntu-latest, >=2)

test_default_xarray_engine AssertionError: Left and right Dataset objects are not equal Differing data variables: L air (time, lat, lon) int16 8MB 24120 24250 24350 ... 29649 29619 29569 R air (time, lat, lon) float64 31MB 241.2 242.5 243.5 ... 296.2 295.7

Check failure on line 36 in tests/test_server.py

View workflow job for this annotation

GitHub Actions / run (3.9, ubuntu-latest, <2)

test_default_xarray_engine IndexError: The indexing operation you are attempting to perform is not valid on netCDF4.Variable object. Try loading your data into memory first by calling .load().

Check failure on line 36 in tests/test_server.py

View workflow job for this annotation

GitHub Actions / run (3.11, ubuntu-latest, <2)

test_default_xarray_engine AssertionError: Left and right Dataset objects are not equal Differing data variables: L air (time, lat, lon) int16 8MB 24120 24250 24350 ... 29649 29619 29569 R air (time, lat, lon) float64 31MB 241.2 242.5 243.5 ... 296.2 295.7

Check failure on line 36 in tests/test_server.py

View workflow job for this annotation

GitHub Actions / run (3.11, ubuntu-latest, >=2)

test_default_xarray_engine AssertionError: Left and right Dataset objects are not equal Differing data variables: L air (time, lat, lon) int16 8MB 24120 24250 24350 ... 29649 29619 29569 R air (time, lat, lon) float64 31MB 241.2 242.5 243.5 ... 296.2 295.7

Check failure on line 36 in tests/test_server.py

View workflow job for this annotation

GitHub Actions / run (3.12, ubuntu-latest, >=2)

test_default_xarray_engine AssertionError: Left and right Dataset objects are not equal Differing data variables: L air (time, lat, lon) int16 8MB 24120 24250 24350 ... 29649 29619 29569 R air (time, lat, lon) float64 31MB 241.2 242.5 243.5 ... 296.2 295.7

Check failure on line 36 in tests/test_server.py

View workflow job for this annotation

GitHub Actions / run (3.9, ubuntu-latest, >=2)

test_default_xarray_engine IndexError: The indexing operation you are attempting to perform is not valid on netCDF4.Variable object. Try loading your data into memory first by calling .load().

Check failure on line 36 in tests/test_server.py

View workflow job for this annotation

GitHub Actions / run (3.12, ubuntu-latest, <2)

test_default_xarray_engine AssertionError: Left and right Dataset objects are not equal Differing data variables: L air (time, lat, lon) int16 8MB 24120 24250 24350 ... 29649 29619 29569 R air (time, lat, lon) float64 31MB 241.2 242.5 243.5 ... 296.2 295.7
# xr.testing.assert_identical(ds, dataset)


@pytest.mark.skipif(
Expand Down Expand Up @@ -76,3 +79,22 @@ def test_attrs_types(xpublish_server):

assert ds.attrs["npint"] == 16
assert ds.attrs["npintthirtytwo"] == 32


@pytest.mark.skipif(
sys.platform == "win32",
reason="NetCDF4 is failing on Windows Github Actions workers",
)
def test_coordinates_persist_correctly(xpublish_server):
"""Test that encoded coordinate data makes it through OpenDAP correctly.
xref: https://github.com/xpublish-community/xpublish/discussions/246
"""
dataset = xr.open_dataset(Path(__file__).parent / "min_coordinates_encoding.nc")

url = f"{xpublish_server}/datasets/coords_encoding/opendap"
ds = xr.open_dataset(url)

Check failure on line 96 in tests/test_server.py

View workflow job for this annotation

GitHub Actions / run (3.10, ubuntu-latest, <2)

test_coordinates_persist_correctly IndexError: Failed to decode variable 'ocean_time': The indexing operation you are attempting to perform is not valid on netCDF4.Variable object. Try loading your data into memory first by calling .load().

Check failure on line 96 in tests/test_server.py

View workflow job for this annotation

GitHub Actions / run (3.10, ubuntu-latest, >=2)

test_coordinates_persist_correctly IndexError: Failed to decode variable 'ocean_time': The indexing operation you are attempting to perform is not valid on netCDF4.Variable object. Try loading your data into memory first by calling .load().

Check failure on line 96 in tests/test_server.py

View workflow job for this annotation

GitHub Actions / run (3.9, ubuntu-latest, <2)

test_coordinates_persist_correctly IndexError: Failed to decode variable 'ocean_time': The indexing operation you are attempting to perform is not valid on netCDF4.Variable object. Try loading your data into memory first by calling .load().

Check failure on line 96 in tests/test_server.py

View workflow job for this annotation

GitHub Actions / run (3.11, ubuntu-latest, <2)

test_coordinates_persist_correctly IndexError: Failed to decode variable 'ocean_time': The indexing operation you are attempting to perform is not valid on netCDF4.Variable object. Try loading your data into memory first by calling .load().

Check failure on line 96 in tests/test_server.py

View workflow job for this annotation

GitHub Actions / run (3.11, ubuntu-latest, >=2)

test_coordinates_persist_correctly IndexError: Failed to decode variable 'ocean_time': The indexing operation you are attempting to perform is not valid on netCDF4.Variable object. Try loading your data into memory first by calling .load().

Check failure on line 96 in tests/test_server.py

View workflow job for this annotation

GitHub Actions / run (3.12, ubuntu-latest, >=2)

test_coordinates_persist_correctly IndexError: Failed to decode variable 'ocean_time': The indexing operation you are attempting to perform is not valid on netCDF4.Variable object. Try loading your data into memory first by calling .load().

Check failure on line 96 in tests/test_server.py

View workflow job for this annotation

GitHub Actions / run (3.9, ubuntu-latest, >=2)

test_coordinates_persist_correctly IndexError: Failed to decode variable 'ocean_time': The indexing operation you are attempting to perform is not valid on netCDF4.Variable object. Try loading your data into memory first by calling .load().

Check failure on line 96 in tests/test_server.py

View workflow job for this annotation

GitHub Actions / run (3.12, ubuntu-latest, <2)

test_coordinates_persist_correctly IndexError: Failed to decode variable 'ocean_time': The indexing operation you are attempting to perform is not valid on netCDF4.Variable object. Try loading your data into memory first by calling .load().
assert ds.dims == dataset.dims
assert ds.coords == dataset.coords
assert ds.variables == dataset.variables
assert ds == dataset
6 changes: 4 additions & 2 deletions xpublish_opendap/dap_xarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,10 +90,12 @@ def dap_dimension(da: xr.DataArray) -> dap.Array:

def dap_grid(da: xr.DataArray, dims: dict[str, dap.Array]) -> dap.Grid:
"""Transform an xarray DataArray into a DAP Grid."""
encoded_da = xr.conventions.encode_cf_variable(da.variable)

data_grid = dap.Grid(
name=da.name,
data=da.astype(da.dtype).data,
dtype=dap_dtype(da),
data=encoded_da.astype(encoded_da.dtype).data,
dtype=dap_dtype(encoded_da),
dimensions=[dims[dim] for dim in da.dims],
)

Expand Down

0 comments on commit 2633673

Please sign in to comment.