diff --git a/docs/src/whatsnew/latest.rst b/docs/src/whatsnew/latest.rst index 7325630f22..52b9853ac2 100644 --- a/docs/src/whatsnew/latest.rst +++ b/docs/src/whatsnew/latest.rst @@ -30,7 +30,13 @@ This document explains the changes made to Iris for this release ✨ Features =========== -#. N/A +#. `@trexfeathers`_ added a new :class:`~iris.Future` flag - + ``date_microseconds`` - which sets whether Iris should use the new + microsecond-precision units (see :class:`cf_units.Unit`, microseconds + introduced in version 3.3) when the unit + is a time unit. The previous maximum precision was seconds. You should check + your code for new floating point problems if activating this (e.g. when + using the :class:`~iris.Constraint` API). (:pull:`6260`) 🐛 Bugs Fixed @@ -50,7 +56,10 @@ This document explains the changes made to Iris for this release 🚀 Performance Enhancements =========================== -#. N/A +#. Note that due to the new ``date_microseconds`` :class:`~iris.Future` flag, + the time coordinate categorisation speedup introduced in + :doc:`/whatsnew/3.11` will only be available when + ``iris.FUTURE.date_microseconds == True``. 🔥 Deprecations diff --git a/lib/iris/__init__.py b/lib/iris/__init__.py index d4454efe89..bc2b84709d 100644 --- a/lib/iris/__init__.py +++ b/lib/iris/__init__.py @@ -143,7 +143,13 @@ def callback(cube, field, filename): class Future(threading.local): """Run-time configuration controller.""" - def __init__(self, datum_support=False, pandas_ndim=False, save_split_attrs=False): + def __init__( + self, + datum_support=False, + pandas_ndim=False, + save_split_attrs=False, + date_microseconds=False, + ): """Container for run-time options controls. To adjust the values simply update the relevant attribute from @@ -169,6 +175,13 @@ def __init__(self, datum_support=False, pandas_ndim=False, save_split_attrs=Fals different ways : "global" ones are saved as dataset attributes, where possible, while "local" ones are saved as data-variable attributes. See :func:`iris.fileformats.netcdf.saver.save`. + date_microseconds : bool, default=False + Newer versions of cftime and cf-units support microsecond precision + for dates, compared to the legacy behaviour that only works with + seconds. Enabling microsecond precision will alter core Iris + behaviour, such as when using :class:`~iris.Constraint`, and you + may need to defend against floating point precision issues where + you didn't need to before. """ # The flag 'example_future_flag' is provided as a reference for the @@ -181,6 +194,7 @@ def __init__(self, datum_support=False, pandas_ndim=False, save_split_attrs=Fals self.__dict__["datum_support"] = datum_support self.__dict__["pandas_ndim"] = pandas_ndim self.__dict__["save_split_attrs"] = save_split_attrs + self.__dict__["date_microseconds"] = date_microseconds # TODO: next major release: set IrisDeprecation to subclass # DeprecationWarning instead of UserWarning. @@ -189,7 +203,12 @@ def __repr__(self): # msg = ('Future(example_future_flag={})') # return msg.format(self.example_future_flag) msg = "Future(datum_support={}, pandas_ndim={}, save_split_attrs={})" - return msg.format(self.datum_support, self.pandas_ndim, self.save_split_attrs) + return msg.format( + self.datum_support, + self.pandas_ndim, + self.save_split_attrs, + self.date_microseconds, + ) # deprecated_options = {'example_future_flag': 'warning',} deprecated_options: dict[str, Literal["error", "warning"]] = {} diff --git a/lib/iris/common/mixin.py b/lib/iris/common/mixin.py index 87d58944c7..e238ab9d36 100644 --- a/lib/iris/common/mixin.py +++ b/lib/iris/common/mixin.py @@ -7,8 +7,10 @@ from __future__ import annotations from collections.abc import Mapping +from datetime import timedelta from functools import wraps from typing import Any +import warnings import cf_units import numpy as np @@ -139,6 +141,68 @@ def update(self, other, **kwargs): dict.update(self, other, **kwargs) +class Unit(cf_units.Unit): + # TODO: remove this subclass once FUTURE.date_microseconds is removed. + + @classmethod + def from_unit(cls, unit: cf_units.Unit): + """Cast a :class:`cf_units.Unit` to an :class:`Unit`.""" + if isinstance(unit, Unit): + result = unit + elif isinstance(unit, cf_units.Unit): + result = cls.__new__(cls) + result.__dict__.update(unit.__dict__) + else: + message = f"Expected a cf_units.Unit, got {type(unit)}" + raise TypeError(message) + return result + + def num2date( + self, + time_value, + only_use_cftime_datetimes=True, + only_use_python_datetimes=False, + ): + # Used to patch the cf_units.Unit.num2date method to round to the + # nearest second, which was the legacy behaviour. This is under a FUTURE + # flag - users will need to adapt to microsecond precision eventually, + # which may involve floating point issues. + from iris import FUTURE + + def _round(date): + if date.microsecond == 0: + return date + elif date.microsecond < 500000: + return date - timedelta(microseconds=date.microsecond) + else: + return ( + date + + timedelta(seconds=1) + - timedelta(microseconds=date.microsecond) + ) + + result = super().num2date( + time_value, only_use_cftime_datetimes, only_use_python_datetimes + ) + if FUTURE.date_microseconds is False: + message = ( + "You are using legacy date precision for Iris units - max " + "precision is seconds. In future, Iris will use microsecond " + "precision - available since cf-units version 3.3 - which may " + "affect core behaviour. To opt-in to the " + "new behaviour, set `iris.FUTURE.date_microseconds = True`." + ) + warnings.warn(message, category=FutureWarning) + + if hasattr(result, "shape"): + vfunc = np.vectorize(_round) + result = vfunc(result) + else: + result = _round(result) + + return result + + class CFVariableMixin: _metadata_manager: Any @@ -207,7 +271,8 @@ def units(self) -> cf_units.Unit: @units.setter def units(self, unit: cf_units.Unit | str | None) -> None: - self._metadata_manager.units = cf_units.as_unit(unit) + unit = cf_units.as_unit(unit) + self._metadata_manager.units = Unit.from_unit(unit) @property def attributes(self) -> LimitedAttributeDict: diff --git a/lib/iris/tests/unit/common/metadata/test_microsecond_future.py b/lib/iris/tests/unit/common/metadata/test_microsecond_future.py new file mode 100644 index 0000000000..b86ebf06d4 --- /dev/null +++ b/lib/iris/tests/unit/common/metadata/test_microsecond_future.py @@ -0,0 +1,98 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the BSD license. +# See LICENSE in the root of the repository for full licensing details. +"""Unit tests for the opt-in FUTURE.date_microseconds behaviour.""" + +import warnings + +import cf_units +import numpy as np +from packaging.version import Version +import pytest + +from iris import FUTURE +from iris.coords import DimCoord +from iris.tests._shared_utils import assert_array_equal + +cf_units_legacy = Version(cf_units.__version__) < Version("3.3.0") + + +@pytest.fixture( + params=[0, 1000, 500000], + ids=["no_microseconds", "1_millisecond", "half_second"], +) +def time_coord(request) -> tuple[bool, DimCoord]: + points = np.array([0.0, 1.0, 2.0]) + points += request.param / 1e6 + return request.param, DimCoord( + points, + "time", + units="seconds since 1970-01-01 00:00:00", + ) + + +@pytest.fixture( + params=[False, True], + ids=["without_future", "with_future"], +) +def future_date_microseconds(request): + FUTURE.date_microseconds = request.param + yield request.param + FUTURE.date_microseconds = False + + +def test_warning(time_coord, future_date_microseconds): + # Warning should be raised whether the coordinate has microseconds or not. + # Want users to be aware, and opt-in, as early as possible. + n_microseconds, coord = time_coord + + def _op(): + _ = coord.units.num2date(coord.points) + + if future_date_microseconds: + with warnings.catch_warnings(): + warnings.simplefilter("error", FutureWarning) + _op() + else: + with pytest.warns(FutureWarning): + _op() + + +@pytest.mark.parametrize( + "indexing", + (np.s_[0], np.s_[:], np.s_[:, np.newaxis]), + ids=("single", "array", "array_2d"), +) +def test_num2date(time_coord, future_date_microseconds, indexing): + n_microseconds, coord = time_coord + result = coord.units.num2date(coord.points[indexing]) + + if indexing == np.s_[0]: + assert hasattr(result, "microsecond") + # Convert to iterable for more consistency downstream. + result = [result] + else: + assert hasattr(result, "shape") + assert hasattr(result.flatten()[0], "microsecond") + result = result.flatten() + + expected_microseconds = n_microseconds + if not future_date_microseconds or cf_units_legacy: + expected_microseconds = 0 + + result_microseconds = np.array([r.microsecond for r in result]) + assert_array_equal(result_microseconds, expected_microseconds) + + +def test_roundup(time_coord, future_date_microseconds): + n_microseconds, coord = time_coord + result = coord.units.num2date(coord.points) + + expected_seconds = np.floor(coord.points) + if n_microseconds >= 500000 and (not future_date_microseconds or cf_units_legacy): + # Legacy cf-units versions round microseconds and ignore the future flag. + expected_seconds += 1 + + result_seconds = np.array([r.second for r in result]) + assert_array_equal(result_seconds, expected_seconds)