Skip to content

Commit

Permalink
Merge pull request #711 from maxrjones/fill-value
Browse files Browse the repository at this point in the history
Retain original _FillValue in encoding
  • Loading branch information
cisaacstern authored Mar 18, 2024
2 parents eb519e1 + dc87b89 commit ae64a98
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 4 deletions.
13 changes: 10 additions & 3 deletions pangeo_forge_recipes/aggregation.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,7 @@ def dataset_to_schema(ds: xr.Dataset) -> XarraySchema:

# Remove redundant encoding options
for v in ds.variables:
for option in ["_FillValue", "source"]:
# TODO: should be okay to remove _FillValue?
for option in ["source"]:
if option in ds[v].encoding:
del ds[v].encoding[option]
d = ds.to_dict(data=False, encoding=True)
Expand Down Expand Up @@ -147,7 +146,15 @@ def _combine_attrs(a1: dict, a2: dict) -> dict:
common_attrs = set(a1) & set(a2)
new_attrs = {}
for key in common_attrs:
if a1[key] == a2[key]:
# treat NaNs as equal in the attrs
if (
isinstance(a1[key], np.floating)
and isinstance(a2[key], np.floating)
and np.isnan(a1[key])
and np.isnan(a2[key])
):
new_attrs[key] = a1[key]
elif a1[key] == a2[key]:
new_attrs[key] = a1[key]
return new_attrs

Expand Down
23 changes: 22 additions & 1 deletion tests/test_combiners.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import logging

import apache_beam as beam
import numpy as np
import pytest
import xarray as xr
from apache_beam.options.pipeline_options import PipelineOptions
Expand Down Expand Up @@ -102,11 +103,31 @@ def _strip_keys(item):
return item[1]


def _assert_schema_equal(a, b):
# This is used instead of ``assert dict1 == dict2`` so that NaNs are treated as equal.
assert set(a.keys()) == set(b.keys())

for key, value1 in a.items():
value2 = b[key]
if (
isinstance(value1, np.floating)
and isinstance(value2, np.floating)
and np.isnan(value1)
and np.isnan(value2)
):
continue

if isinstance(value1, dict) and isinstance(value2, dict):
_assert_schema_equal(value1, value2)
else:
assert value1 == value2


def has_correct_schema(expected_schema):
def _check_results(actual):
assert len(actual) == 1
schema = actual[0]
assert schema == expected_schema
_assert_schema_equal(schema, expected_schema)

return _check_results

Expand Down

0 comments on commit ae64a98

Please sign in to comment.