Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Splitattrs ncsave #5404

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 13 additions & 3 deletions lib/iris/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,9 @@ def callback(cube, field, filename):
class Future(threading.local):
"""Run-time configuration controller."""

def __init__(self, datum_support=False, pandas_ndim=False):
def __init__(
self, datum_support=False, pandas_ndim=False, save_split_attrs=False
):
"""
A container for run-time options controls.

Expand All @@ -164,6 +166,11 @@ def __init__(self, datum_support=False, pandas_ndim=False):
pandas_ndim : bool, default=False
See :func:`iris.pandas.as_data_frame` for details - opts in to the
newer n-dimensional behaviour.
save_split_attrs : bool, default=False
Save "global" and "local" cube attributes to netcdf in appropriately
different ways : "global" ones are saved as dataset attributes, where
possible, while "local" ones are saved as data-variable attributes.
See :func:`iris.fileformats.netcdf.saver.save`.

"""
# The flag 'example_future_flag' is provided as a reference for the
Expand All @@ -175,12 +182,15 @@ def __init__(self, datum_support=False, pandas_ndim=False):
# self.__dict__['example_future_flag'] = example_future_flag
self.__dict__["datum_support"] = datum_support
self.__dict__["pandas_ndim"] = pandas_ndim
self.__dict__["save_split_attrs"] = pandas_ndim

def __repr__(self):
# msg = ('Future(example_future_flag={})')
# return msg.format(self.example_future_flag)
msg = "Future(datum_support={}, pandas_ndim={})"
return msg.format(self.datum_support, self.pandas_ndim)
msg = "Future(datum_support={}, pandas_ndim={}, save_split_attrs={})"
return msg.format(
self.datum_support, self.pandas_ndim, self.save_split_attrs
)

# deprecated_options = {'example_future_flag': 'warning',}
deprecated_options = {}
Expand Down
8 changes: 4 additions & 4 deletions lib/iris/cube.py
Original file line number Diff line number Diff line change
Expand Up @@ -930,19 +930,19 @@ def _normalise_attrs(
return attributes

@property
def locals(self):
def locals(self) -> LimitedAttributeDict:
return self._locals

@locals.setter
def locals(self, attributes):
def locals(self, attributes: Optional[Mapping]):
self._locals = self._normalise_attrs(attributes)

@property
def globals(self):
def globals(self) -> LimitedAttributeDict:
return self._globals

@globals.setter
def globals(self, attributes):
def globals(self, attributes: Optional[Mapping]):
self._globals = self._normalise_attrs(attributes)

#
Expand Down
210 changes: 166 additions & 44 deletions lib/iris/fileformats/netcdf/saver.py
Original file line number Diff line number Diff line change
Expand Up @@ -540,6 +540,8 @@ def write(
An interable of cube attribute keys. Any cube attributes with
matching keys will become attributes on the data variable rather
than global attributes.
.. note:
Has no effect if :attr:`iris.FUTURE.save_split_attrs` is ``True``.

* unlimited_dimensions (iterable of strings and/or
:class:`iris.coords.Coord` objects):
Expand Down Expand Up @@ -709,20 +711,27 @@ def write(
# aux factory in the cube.
self._add_aux_factories(cube, cf_var_cube, cube_dimensions)

# Add data variable-only attribute names to local_keys.
if local_keys is None:
local_keys = set()
else:
local_keys = set(local_keys)
local_keys.update(_CF_DATA_ATTRS, _UKMO_DATA_ATTRS)

# Add global attributes taking into account local_keys.
global_attributes = {
k: v
for k, v in cube.attributes.items()
if (k not in local_keys and k.lower() != "conventions")
}
self.update_global_attributes(global_attributes)
if not iris.FUTURE.save_split_attrs:
# In the "old" way, we update global attributes as we go.
# Add data variable-only attribute names to local_keys.
if local_keys is None:
local_keys = set()
else:
local_keys = set(local_keys)
local_keys.update(_CF_DATA_ATTRS, _UKMO_DATA_ATTRS)

# Add global attributes taking into account local_keys.
cube_attributes = cube.attributes
if iris.FUTURE.save_split_attrs:
# In this case, do *not* promote any 'local' attributes to global ones,
# only "global" cube attrs may be written as global file attributes
cube_attributes = cube_attributes.globals
global_attributes = {
k: v
for k, v in cube_attributes.items()
if (k not in local_keys and k.lower() != "conventions")
}
self.update_global_attributes(global_attributes)

if cf_profile_available:
cf_patch = iris.site_configuration.get("cf_patch")
Expand Down Expand Up @@ -778,6 +787,9 @@ def update_global_attributes(self, attributes=None, **kwargs):
CF global attributes to be updated.

"""
# TODO: when we no longer support combined attribute saving, this routine will
# only be called once: it can reasonably be renamed "_set_global_attributes",
# and the 'kwargs' argument can be removed.
if attributes is not None:
# Handle sequence e.g. [('fruit', 'apple'), ...].
if not hasattr(attributes, "keys"):
Expand Down Expand Up @@ -2219,6 +2231,8 @@ def _create_cf_data_variable(
The newly created CF-netCDF data variable.

"""
# TODO: when iris.FUTURE.save_split_attrs is removed, the 'local_keys' arg can
# be removed.
# Get the values in a form which is valid for the file format.
data = self._ensure_valid_dtype(cube.core_data(), "cube", cube)

Expand Down Expand Up @@ -2307,16 +2321,20 @@ def set_packing_ncattrs(cfvar):
if cube.units.calendar:
_setncattr(cf_var, "calendar", cube.units.calendar)

# Add data variable-only attribute names to local_keys.
if local_keys is None:
local_keys = set()
if iris.FUTURE.save_split_attrs:
attr_names = cube.attributes.locals.keys()
else:
local_keys = set(local_keys)
local_keys.update(_CF_DATA_ATTRS, _UKMO_DATA_ATTRS)
# Add data variable-only attribute names to local_keys.
if local_keys is None:
local_keys = set()
else:
local_keys = set(local_keys)
local_keys.update(_CF_DATA_ATTRS, _UKMO_DATA_ATTRS)

# Add any cube attributes whose keys are in local_keys as
# CF-netCDF data variable attributes.
attr_names = set(cube.attributes).intersection(local_keys)

# Add any cube attributes whose keys are in local_keys as
# CF-netCDF data variable attributes.
attr_names = set(cube.attributes).intersection(local_keys)
for attr_name in sorted(attr_names):
# Do not output 'conventions' attribute.
if attr_name.lower() == "conventions":
Expand Down Expand Up @@ -2600,9 +2618,15 @@ def save(
Save cube(s) to a netCDF file, given the cube and the filename.

* Iris will write CF 1.7 compliant NetCDF files.
* The attributes dictionaries on each cube in the saved cube list
will be compared and common attributes saved as NetCDF global
attributes where appropriate.
* If **split-attribute saving is disabled**, i.e.
:attr:`iris.FUTURE.save_split_attrs` is ``False``, then attributes dictionaries
on each cube in the saved cube list will be compared and common attributes saved
as NetCDF global attributes where appropriate.

Or, **when **split-attribute saving is enabled**, then `cube.attributes.locals``
are always saved as attributes of data-variables, and ``cube.attributes.globals``
are saved as global (dataset) attributes, where possible.
Since the 2 types are now distinguished : see :class:`~iris.cube.CubeAttrsDict`.
* Keyword arguments specifying how to save the data are applied
to each cube. To use different settings for different cubes, use
the NetCDF Context manager (:class:`~Saver`) directly.
Expand Down Expand Up @@ -2635,6 +2659,8 @@ def save(
An interable of cube attribute keys. Any cube attributes with
matching keys will become attributes on the data variable rather
than global attributes.
**NOTE:** this is *ignored* if 'split-attribute saving' is **enabled**,
i.e. when ``iris.FUTURE.save_split_attrs`` is ``True``.

* unlimited_dimensions (iterable of strings and/or
:class:`iris.coords.Coord` objects):
Expand Down Expand Up @@ -2773,26 +2799,117 @@ def save(
else:
cubes = cube

if local_keys is None:
if iris.FUTURE.save_split_attrs:
# We don't actually use 'local_keys' in this case.
# TODO: can remove this when the iris.FUTURE.save_split_attrs is removed.
local_keys = set()

# Find any collisions in the cube global attributes and "demote" all those to
# local attributes (where possible, else warn they are lost).
# N.B. "collision" includes when not all cubes *have* that attribute.
global_names = set()
for cube in cubes:
global_names |= set(cube.attributes.globals.keys())

# Fnd any global attributes which are not the same on *all* cubes.
def attr_values_equal(val1, val2):
# An equality test which also works when some values are numpy arrays (!)
# As done in :meth:`iris.common.mixin.LimitedAttributeDict.__eq__`.
match = val1 == val2
try:
match = bool(match)
except ValueError:
match = match.all()
return match

cube0 = cubes[0]
invalid_globals = [
attrname
for attrname in global_names
if not all(
attr_values_equal(
cube.attributes[attrname], cube0.attributes[attrname]
)
for cube in cubes[1:]
)
]

# Establish all the global attributes which we will write to the file (at end).
global_attributes = {
attr: cube0.attributes.globals[attr]
for attr in global_names
if attr not in invalid_globals
}
if invalid_globals:
# Some cubes have different global attributes: modify cubes as required.
warnings.warn(
f"Saving the cube global attributes {invalid_globals} as local"
"(i.e. data-variable) attributes, where possible, since they are not '"
"the same on all input cubes."
)
cubes = list(cubes) # avoiding modifying the actual input arg.
for i_cube in range(len(cubes)):
# We iterate over cube *index*, so we can replace the list entries with
# with cube *copies* -- just to avoid changing our call args.
cube = cubes[i_cube]
demote_attrs = [
attr
for attr in cube.attributes.globals
if attr in invalid_globals
]
if any(demote_attrs):
# This cube contains some 'demoted' global attributes.
# Replace the input cube with a copy, so we can modify attributes.
cube = cube.copy()
cubes[i_cube] = cube
# Catch any demoted attrs where there is already a local version
blocked_attrs = [
attrname
for attrname in demote_attrs
if attrname in cube.attributes.locals
]
if blocked_attrs:
warnings.warn(
f"Global cube attributes {blocked_attrs} "
f'of cube "{cube.name()}" have been lost, overlaid '
"by existing local attributes with the same names."
)
for attr in demote_attrs:
if attr not in blocked_attrs:
cube.attributes.locals[
attr
] = cube.attributes.globals[attr]
cube.attributes.globals.pop(attr)

else:
local_keys = set(local_keys)

# Determine the attribute keys that are common across all cubes and
# thereby extend the collection of local_keys for attributes
# that should be attributes on data variables.
attributes = cubes[0].attributes
common_keys = set(attributes)
for cube in cubes[1:]:
keys = set(cube.attributes)
local_keys.update(keys.symmetric_difference(common_keys))
common_keys.intersection_update(keys)
different_value_keys = []
for key in common_keys:
if np.any(attributes[key] != cube.attributes[key]):
different_value_keys.append(key)
common_keys.difference_update(different_value_keys)
local_keys.update(different_value_keys)
# Determine the attribute keys that are common across all cubes and
# thereby extend the collection of local_keys for attributes
# that should be attributes on data variables.
# NOTE: in 'legacy' mode, this code derives a common value for 'local_keys', which
# is employed in saving each cube.
# However, in `split_attrs` mode, this considers ONLY global attributes, and the
# resulting 'common_keys' is the fixed result : each cube is then saved like ...
# "sman.write(... localkeys=list(cube.attributes) - common_keys, ...)"
if local_keys is None:
local_keys = set()
else:
local_keys = set(local_keys)

common_attr_values = None
for cube in cubes:
cube_attributes = cube.attributes
keys = set(cube_attributes)
if common_attr_values is None:
common_attr_values = cube_attributes.copy()
common_keys = keys.copy()
local_keys.update(keys.symmetric_difference(common_keys))
common_keys.intersection_update(keys)
different_value_keys = []
for key in common_keys:
if np.any(common_attr_values[key] != cube_attributes[key]):
different_value_keys.append(key)
common_keys.difference_update(different_value_keys)
local_keys.update(different_value_keys)

def is_valid_packspec(p):
"""Only checks that the datatype is valid."""
Expand Down Expand Up @@ -2894,7 +3011,12 @@ def is_valid_packspec(p):
warnings.warn(msg)

# Add conventions attribute.
sman.update_global_attributes(Conventions=conventions)
if not iris.FUTURE.save_split_attrs:
# In the "new way", we just create all the global attributes at once.
global_attributes["Conventions"] = conventions
sman.update_global_attributes(global_attributes)
else:
sman.update_global_attributes(Conventions=conventions)

if compute:
# No more to do, since we used Saver(compute=True).
Expand Down
Loading