Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Splitattrs ncsave redo #5410

Merged
Merged
Show file tree
Hide file tree
Changes from 37 commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
83eee13
Add docs and future switch, no function yet.
pp-mo Jul 21, 2023
ce5cdef
Typing enables code completion for Cube.attributes.
pp-mo Jul 30, 2023
4d660b6
Make roundtrip checking more precise + improve some tests accordingly…
pp-mo Jul 30, 2023
9a80cd2
Rework all tests to use common setup + results-checking code.
pp-mo Jul 30, 2023
4ecef96
Saver supports split-attributes saving (no tests yet).
pp-mo Jul 21, 2023
78aaebb
Tiny docs fix.
pp-mo Aug 2, 2023
54f344c
Explain test routines better.
pp-mo Aug 3, 2023
edc9899
Fix init of FUTURE object.
pp-mo Aug 3, 2023
d242506
Remove spurious re-test of FUTURE.save_split_attrs.
pp-mo Aug 3, 2023
8d7ad2a
Don't create Cube attrs of 'None' (n.b. but no effect as currently us…
pp-mo Aug 4, 2023
8aa5311
Remove/repair refs to obsolete routines.
pp-mo Aug 4, 2023
f87550b
Check all warnings from save operations.
pp-mo Aug 4, 2023
30d62c2
Remove TestSave test numbers.
pp-mo Aug 4, 2023
fb13770
More save cases: no match with missing, and different cube attribute …
pp-mo Aug 4, 2023
5a429d0
Run save/roundtrip tests both with+without split saves.
pp-mo Aug 5, 2023
dd53275
Fix.
pp-mo Aug 6, 2023
80a4039
Review changes.
pp-mo Aug 7, 2023
fb343ae
Fix changed warning messages.
pp-mo Aug 8, 2023
b1778c6
Move warnings checking from 'run' to 'check' phase.
pp-mo Aug 8, 2023
067f07d
Simplify and improve warnings checking code.
pp-mo Aug 8, 2023
891da48
Fix wrong testcase.
pp-mo Aug 8, 2023
8ecadca
Minor review changes.
pp-mo Aug 8, 2023
4edf778
Fix reverted code.
pp-mo Aug 8, 2023
159914c
Use sets to simplify demoted-attributes code.
pp-mo Aug 8, 2023
deb1db3
WIP
pp-mo Aug 18, 2023
e5d5ff9
Working with iris 3.6.1, no errors TestSave or TestRoundtrip.
pp-mo Aug 18, 2023
42fce92
Interim save (incomplete?).
pp-mo Aug 21, 2023
19a2956
Different results form for split tests; working for roundtrip.
pp-mo Aug 21, 2023
59d05dc
Check that all param lists are sorted.
pp-mo Aug 21, 2023
1770d97
Check matrix result-files compatibility; add test_save_matrix.
pp-mo Aug 21, 2023
b67f510
test_load_matrix added; two types of load result.
pp-mo Aug 21, 2023
2576826
Finalise special-case attributes.
pp-mo Aug 21, 2023
e201a8e
Small docs tweaks.
pp-mo Aug 21, 2023
4602f06
Add some more testcases,
pp-mo Aug 21, 2023
2377c89
Ensure valid sort-order for globals of possibly different types.
pp-mo Aug 21, 2023
8987a26
Initialise matrix results with legacy values from v3.6.1 -- all match…
pp-mo Aug 21, 2023
cbd7167
Add full current matrix results, i.e. snapshot current behaviours.
pp-mo Aug 21, 2023
eea99d1
Review changes : rename some matrix testcases, for clarity.
pp-mo Oct 10, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 13 additions & 3 deletions lib/iris/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,9 @@ def callback(cube, field, filename):
class Future(threading.local):
"""Run-time configuration controller."""

def __init__(self, datum_support=False, pandas_ndim=False):
def __init__(
self, datum_support=False, pandas_ndim=False, save_split_attrs=False
):
"""
A container for run-time options controls.

Expand All @@ -164,6 +166,11 @@ def __init__(self, datum_support=False, pandas_ndim=False):
pandas_ndim : bool, default=False
See :func:`iris.pandas.as_data_frame` for details - opts in to the
newer n-dimensional behaviour.
save_split_attrs : bool, default=False
Save "global" and "local" cube attributes to netcdf in appropriately
different ways : "global" ones are saved as dataset attributes, where
possible, while "local" ones are saved as data-variable attributes.
See :func:`iris.fileformats.netcdf.saver.save`.

"""
# The flag 'example_future_flag' is provided as a reference for the
Expand All @@ -175,12 +182,15 @@ def __init__(self, datum_support=False, pandas_ndim=False):
# self.__dict__['example_future_flag'] = example_future_flag
self.__dict__["datum_support"] = datum_support
self.__dict__["pandas_ndim"] = pandas_ndim
self.__dict__["save_split_attrs"] = save_split_attrs

def __repr__(self):
# msg = ('Future(example_future_flag={})')
# return msg.format(self.example_future_flag)
msg = "Future(datum_support={}, pandas_ndim={})"
return msg.format(self.datum_support, self.pandas_ndim)
msg = "Future(datum_support={}, pandas_ndim={}, save_split_attrs={})"
return msg.format(
self.datum_support, self.pandas_ndim, self.save_split_attrs
)

# deprecated_options = {'example_future_flag': 'warning',}
deprecated_options = {}
Expand Down
16 changes: 10 additions & 6 deletions lib/iris/cube.py
Original file line number Diff line number Diff line change
Expand Up @@ -930,19 +930,19 @@ def _normalise_attrs(
return attributes

@property
def locals(self):
def locals(self) -> LimitedAttributeDict:
return self._locals

@locals.setter
def locals(self, attributes):
def locals(self, attributes: Optional[Mapping]):
trexfeathers marked this conversation as resolved.
Show resolved Hide resolved
self._locals = self._normalise_attrs(attributes)

@property
def globals(self):
def globals(self) -> LimitedAttributeDict:
return self._globals

@globals.setter
def globals(self, attributes):
def globals(self, attributes: Optional[Mapping]):
self._globals = self._normalise_attrs(attributes)

#
Expand Down Expand Up @@ -1335,8 +1335,12 @@ def _names(self):
#
# Ensure that .attributes is always a :class:`CubeAttrsDict`.
#
@CFVariableMixin.attributes.setter
def attributes(self, attributes):
@property
def attributes(self) -> CubeAttrsDict:
return super().attributes

@attributes.setter
def attributes(self, attributes: Optional[Mapping]):
"""
An override to CfVariableMixin.attributes.setter, which ensures that Cube
attributes are stored in a way which distinguishes global + local ones.
Expand Down
210 changes: 166 additions & 44 deletions lib/iris/fileformats/netcdf/saver.py
Original file line number Diff line number Diff line change
Expand Up @@ -541,6 +541,10 @@ def write(
matching keys will become attributes on the data variable rather
than global attributes.

.. Note::

Has no effect if :attr:`iris.FUTURE.save_split_attrs` is ``True``.

* unlimited_dimensions (iterable of strings and/or
:class:`iris.coords.Coord` objects):
List of coordinate names (or coordinate objects)
Expand Down Expand Up @@ -633,6 +637,9 @@ def write(
3 files that do not use HDF5.

"""
# TODO: when iris.FUTURE.save_split_attrs defaults to True, we can deprecate the
# "local_keys" arg, and finally remove it when we finally remove the
# save_split_attrs switch.
if unlimited_dimensions is None:
unlimited_dimensions = []

Expand Down Expand Up @@ -709,20 +716,23 @@ def write(
# aux factory in the cube.
self._add_aux_factories(cube, cf_var_cube, cube_dimensions)

# Add data variable-only attribute names to local_keys.
if local_keys is None:
local_keys = set()
else:
local_keys = set(local_keys)
local_keys.update(_CF_DATA_ATTRS, _UKMO_DATA_ATTRS)

# Add global attributes taking into account local_keys.
global_attributes = {
k: v
for k, v in cube.attributes.items()
if (k not in local_keys and k.lower() != "conventions")
}
self.update_global_attributes(global_attributes)
if not iris.FUTURE.save_split_attrs:
# In the "old" way, we update global attributes as we go.
# Add data variable-only attribute names to local_keys.
if local_keys is None:
local_keys = set()
else:
local_keys = set(local_keys)
local_keys.update(_CF_DATA_ATTRS, _UKMO_DATA_ATTRS)

# Add global attributes taking into account local_keys.
cube_attributes = cube.attributes
global_attributes = {
k: v
for k, v in cube_attributes.items()
if (k not in local_keys and k.lower() != "conventions")
}
self.update_global_attributes(global_attributes)
trexfeathers marked this conversation as resolved.
Show resolved Hide resolved

if cf_profile_available:
cf_patch = iris.site_configuration.get("cf_patch")
Expand Down Expand Up @@ -778,6 +788,9 @@ def update_global_attributes(self, attributes=None, **kwargs):
CF global attributes to be updated.

"""
# TODO: when when iris.FUTURE.save_split_attrs is removed, this routine will
# only be called once: it can reasonably be renamed "_set_global_attributes",
# and the 'kwargs' argument can be removed.
if attributes is not None:
# Handle sequence e.g. [('fruit', 'apple'), ...].
if not hasattr(attributes, "keys"):
Expand Down Expand Up @@ -2195,6 +2208,8 @@ def _create_cf_data_variable(
"""
Create CF-netCDF data variable for the cube and any associated grid
mapping.
# TODO: when iris.FUTURE.save_split_attrs is removed, the 'local_keys' arg can
# be removed.

Args:

Expand All @@ -2219,6 +2234,8 @@ def _create_cf_data_variable(
The newly created CF-netCDF data variable.

"""
# TODO: when iris.FUTURE.save_split_attrs is removed, the 'local_keys' arg can
# be removed.
trexfeathers marked this conversation as resolved.
Show resolved Hide resolved
# Get the values in a form which is valid for the file format.
data = self._ensure_valid_dtype(cube.core_data(), "cube", cube)

Expand Down Expand Up @@ -2307,16 +2324,20 @@ def set_packing_ncattrs(cfvar):
if cube.units.calendar:
_setncattr(cf_var, "calendar", cube.units.calendar)

# Add data variable-only attribute names to local_keys.
if local_keys is None:
local_keys = set()
if iris.FUTURE.save_split_attrs:
attr_names = cube.attributes.locals.keys()
else:
local_keys = set(local_keys)
local_keys.update(_CF_DATA_ATTRS, _UKMO_DATA_ATTRS)
# Add data variable-only attribute names to local_keys.
if local_keys is None:
local_keys = set()
else:
local_keys = set(local_keys)
local_keys.update(_CF_DATA_ATTRS, _UKMO_DATA_ATTRS)

# Add any cube attributes whose keys are in local_keys as
# CF-netCDF data variable attributes.
attr_names = set(cube.attributes).intersection(local_keys)

# Add any cube attributes whose keys are in local_keys as
# CF-netCDF data variable attributes.
attr_names = set(cube.attributes).intersection(local_keys)
for attr_name in sorted(attr_names):
# Do not output 'conventions' attribute.
if attr_name.lower() == "conventions":
Expand Down Expand Up @@ -2600,9 +2621,15 @@ def save(
Save cube(s) to a netCDF file, given the cube and the filename.

* Iris will write CF 1.7 compliant NetCDF files.
* The attributes dictionaries on each cube in the saved cube list
will be compared and common attributes saved as NetCDF global
attributes where appropriate.
* **If split-attribute saving is disabled**, i.e.
:data:`iris.FUTURE`\\ ``.save_split_attrs`` is ``False``, then attributes
dictionaries on each cube in the saved cube list will be compared, and common
attributes saved as NetCDF global attributes where appropriate.

Or, **when split-attribute saving is enabled**, then ``cube.attributes.locals``
are always saved as attributes of data-variables, and ``cube.attributes.globals``
are saved as global (dataset) attributes, where possible.
Since the 2 types are now distinguished : see :class:`~iris.cube.CubeAttrsDict`.
* Keyword arguments specifying how to save the data are applied
to each cube. To use different settings for different cubes, use
the NetCDF Context manager (:class:`~Saver`) directly.
Expand Down Expand Up @@ -2635,6 +2662,8 @@ def save(
An interable of cube attribute keys. Any cube attributes with
matching keys will become attributes on the data variable rather
than global attributes.
**NOTE:** this is *ignored* if 'split-attribute saving' is **enabled**,
i.e. when ``iris.FUTURE.save_split_attrs`` is ``True``.

* unlimited_dimensions (iterable of strings and/or
:class:`iris.coords.Coord` objects):
Expand Down Expand Up @@ -2773,26 +2802,114 @@ def save(
else:
cubes = cube

if local_keys is None:
# Decide which cube attributes will be saved as "global" attributes
# NOTE: in 'legacy' mode, when iris.FUTURE.save_split_attrs == False, this code
# section derives a common value for 'local_keys', which is passed to 'Saver.write'
# when saving each input cube. The global attributes are then created by a call
# to "Saver.update_global_attributes" within each 'Saver.write' call (which is
# obviously a bit redundant!), plus an extra one to add 'Conventions'.
# HOWEVER, in `split_attrs` mode (iris.FUTURE.save_split_attrs == False), this code
# instead constructs a 'global_attributes' dictionary, and outputs that just once,
# after writing all the input cubes.
if iris.FUTURE.save_split_attrs:
# We don't actually use 'local_keys' in this case.
# TODO: can remove this when the iris.FUTURE.save_split_attrs is removed.
local_keys = set()

# Find any collisions in the cube global attributes and "demote" all those to
# local attributes (where possible, else warn they are lost).
# N.B. "collision" includes when not all cubes *have* that attribute.
global_names = set()
for cube in cubes:
global_names |= set(cube.attributes.globals.keys())

# Fnd any global attributes which are not the same on *all* cubes.
def attr_values_equal(val1, val2):
# An equality test which also works when some values are numpy arrays (!)
# As done in :meth:`iris.common.mixin.LimitedAttributeDict.__eq__`.
match = val1 == val2
try:
match = bool(match)
except ValueError:
match = match.all()
return match

cube0 = cubes[0]
invalid_globals = set(
[
attrname
for attrname in global_names
if not all(
attr_values_equal(
cube.attributes.globals.get(attrname),
cube0.attributes.globals.get(attrname),
)
for cube in cubes[1:]
)
]
)

# Establish all the global attributes which we will write to the file (at end).
global_attributes = {
attr: cube0.attributes.globals.get(attr)
for attr in global_names - invalid_globals
}
if invalid_globals:
# Some cubes have different global attributes: modify cubes as required.
warnings.warn(
f"Saving the cube global attributes {sorted(invalid_globals)} as local "
"(i.e. data-variable) attributes, where possible, since they are not "
"the same on all input cubes."
)
cubes = cubes.copy() # avoiding modifying the actual input arg.
for i_cube in range(len(cubes)):
# We iterate over cube *index*, so we can replace the list entries with
# with cube *copies* -- just to avoid changing our call args.
cube = cubes[i_cube]
demote_attrs = set(cube.attributes.globals) & invalid_globals
if any(demote_attrs):
# Catch any demoted attrs where there is already a local version
blocked_attrs = demote_attrs & set(cube.attributes.locals)
if blocked_attrs:
warnings.warn(
f"Global cube attributes {sorted(blocked_attrs)} "
f'of cube "{cube.name()}" were not saved, overlaid '
"by existing local attributes with the same names."
)
demote_attrs -= blocked_attrs
if demote_attrs:
# This cube contains some 'demoted' global attributes.
# Replace input cube with a copy, so we can modify attributes.
cube = cube.copy()
cubes[i_cube] = cube
for attr in demote_attrs:
# move global to local
value = cube.attributes.globals.pop(attr)
cube.attributes.locals[attr] = value

else:
trexfeathers marked this conversation as resolved.
Show resolved Hide resolved
local_keys = set(local_keys)

# Determine the attribute keys that are common across all cubes and
# thereby extend the collection of local_keys for attributes
# that should be attributes on data variables.
attributes = cubes[0].attributes
common_keys = set(attributes)
for cube in cubes[1:]:
keys = set(cube.attributes)
local_keys.update(keys.symmetric_difference(common_keys))
common_keys.intersection_update(keys)
different_value_keys = []
for key in common_keys:
if np.any(attributes[key] != cube.attributes[key]):
different_value_keys.append(key)
common_keys.difference_update(different_value_keys)
local_keys.update(different_value_keys)
# Legacy mode: calculate "local_keys" to control which attributes are local
# and which global.
if local_keys is None:
local_keys = set()
else:
local_keys = set(local_keys)

# Determine the attribute keys that are common across all cubes and
# thereby extend the collection of local_keys for attributes
# that should be attributes on data variables.
attributes = cubes[0].attributes
common_keys = set(attributes)
for cube in cubes[1:]:
keys = set(cube.attributes)
local_keys.update(keys.symmetric_difference(common_keys))
common_keys.intersection_update(keys)
different_value_keys = []
for key in common_keys:
if np.any(attributes[key] != cube.attributes[key]):
different_value_keys.append(key)
common_keys.difference_update(different_value_keys)
local_keys.update(different_value_keys)

def is_valid_packspec(p):
"""Only checks that the datatype is valid."""
Expand Down Expand Up @@ -2894,7 +3011,12 @@ def is_valid_packspec(p):
warnings.warn(msg)

# Add conventions attribute.
sman.update_global_attributes(Conventions=conventions)
if iris.FUTURE.save_split_attrs:
# In the "new way", we just create all the global attributes at once.
global_attributes["Conventions"] = conventions
sman.update_global_attributes(global_attributes)
else:
sman.update_global_attributes(Conventions=conventions)

if compute:
# No more to do, since we used Saver(compute=True).
Expand Down
Loading