Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

date_microseconds FUTURE flag #6260

Merged
merged 14 commits into from
Dec 17, 2024
Merged
13 changes: 11 additions & 2 deletions docs/src/whatsnew/latest.rst
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,13 @@ This document explains the changes made to Iris for this release
✨ Features
===========

#. N/A
#. `@trexfeathers`_ added a new :class:`~iris.Future` flag -
``date_microseconds`` - which sets whether Iris should use the new
microsecond-precision units (see :class:`cf_units.Unit`, microseconds
introduced in version 3.3) when the unit
is a time unit. The previous maximum precision was seconds. You should check
your code for new floating point problems if activating this (e.g. when
using the :class:`~iris.Constraint` API). (:pull:`6260`)


🐛 Bugs Fixed
Expand All @@ -50,7 +56,10 @@ This document explains the changes made to Iris for this release
🚀 Performance Enhancements
===========================

#. N/A
#. Note that due to the new ``date_microseconds`` :class:`~iris.Future` flag,
the time coordinate categorisation speedup introduced in
:doc:`/whatsnew/3.11` will only be available when
``iris.FUTURE.date_microseconds == True``.
stephenworsley marked this conversation as resolved.
Show resolved Hide resolved


🔥 Deprecations
Expand Down
23 changes: 21 additions & 2 deletions lib/iris/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,13 @@ def callback(cube, field, filename):
class Future(threading.local):
"""Run-time configuration controller."""

def __init__(self, datum_support=False, pandas_ndim=False, save_split_attrs=False):
def __init__(
self,
datum_support=False,
pandas_ndim=False,
save_split_attrs=False,
date_microseconds=False,
):
"""Container for run-time options controls.

To adjust the values simply update the relevant attribute from
Expand All @@ -169,6 +175,13 @@ def __init__(self, datum_support=False, pandas_ndim=False, save_split_attrs=Fals
different ways : "global" ones are saved as dataset attributes, where
possible, while "local" ones are saved as data-variable attributes.
See :func:`iris.fileformats.netcdf.saver.save`.
date_microseconds : bool, default=False
Newer versions of cftime and cf-units support microsecond precision
for dates, compared to the legacy behaviour that only works with
seconds. Enabling microsecond precision will alter core Iris
behaviour, such as when using :class:`~iris.Constraint`, and you
may need to defend against floating point precision issues where
you didn't need to before.

"""
# The flag 'example_future_flag' is provided as a reference for the
Expand All @@ -181,6 +194,7 @@ def __init__(self, datum_support=False, pandas_ndim=False, save_split_attrs=Fals
self.__dict__["datum_support"] = datum_support
self.__dict__["pandas_ndim"] = pandas_ndim
self.__dict__["save_split_attrs"] = save_split_attrs
self.__dict__["date_microseconds"] = date_microseconds

# TODO: next major release: set IrisDeprecation to subclass
# DeprecationWarning instead of UserWarning.
Expand All @@ -189,7 +203,12 @@ def __repr__(self):
# msg = ('Future(example_future_flag={})')
# return msg.format(self.example_future_flag)
msg = "Future(datum_support={}, pandas_ndim={}, save_split_attrs={})"
return msg.format(self.datum_support, self.pandas_ndim, self.save_split_attrs)
return msg.format(
self.datum_support,
self.pandas_ndim,
self.save_split_attrs,
self.date_microseconds,
)

# deprecated_options = {'example_future_flag': 'warning',}
deprecated_options: dict[str, Literal["error", "warning"]] = {}
Expand Down
67 changes: 66 additions & 1 deletion lib/iris/common/mixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@
from __future__ import annotations

from collections.abc import Mapping
from datetime import timedelta
from functools import wraps
from typing import Any
import warnings

import cf_units
import numpy as np
Expand Down Expand Up @@ -139,6 +141,68 @@ def update(self, other, **kwargs):
dict.update(self, other, **kwargs)


class Unit(cf_units.Unit):
# TODO: remove this subclass once FUTURE.date_microseconds is removed.

@classmethod
def from_unit(cls, unit: cf_units.Unit):
"""Cast a :class:`cf_units.Unit` to an :class:`Unit`."""
if isinstance(unit, Unit):
result = unit
elif isinstance(unit, cf_units.Unit):
result = cls.__new__(cls)
result.__dict__.update(unit.__dict__)
else:
message = f"Expected a cf_units.Unit, got {type(unit)}"
raise TypeError(message)
return result

def num2date(
self,
time_value,
only_use_cftime_datetimes=True,
only_use_python_datetimes=False,
):
# Used to patch the cf_units.Unit.num2date method to round to the
# nearest second, which was the legacy behaviour. This is under a FUTURE
# flag - users will need to adapt to microsecond precision eventually,
# which may involve floating point issues.
from iris import FUTURE

def _round(date):
if date.microsecond == 0:
return date
elif date.microsecond < 500000:
return date - timedelta(microseconds=date.microsecond)
else:
return (
date
+ timedelta(seconds=1)
- timedelta(microseconds=date.microsecond)
)

result = super().num2date(
time_value, only_use_cftime_datetimes, only_use_python_datetimes
)
if FUTURE.date_microseconds is False:
message = (
"You are using legacy date precision for Iris units - max "
"precision is seconds. In future, Iris will use microsecond "
"precision - available since cf-units version 3.3 - which may "
"affect core behaviour. To opt-in to the "
"new behaviour, set `iris.FUTURE.date_microseconds = True`."
)
warnings.warn(message, category=FutureWarning)

if hasattr(result, "shape"):
vfunc = np.vectorize(_round)
result = vfunc(result)
else:
result = _round(result)

return result


class CFVariableMixin:
_metadata_manager: Any

Expand Down Expand Up @@ -207,7 +271,8 @@ def units(self) -> cf_units.Unit:

@units.setter
def units(self, unit: cf_units.Unit | str | None) -> None:
self._metadata_manager.units = cf_units.as_unit(unit)
unit = cf_units.as_unit(unit)
self._metadata_manager.units = Unit.from_unit(unit)

@property
def attributes(self) -> LimitedAttributeDict:
Expand Down
98 changes: 98 additions & 0 deletions lib/iris/tests/unit/common/metadata/test_microsecond_future.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
# Copyright Iris contributors
#
# This file is part of Iris and is released under the BSD license.
# See LICENSE in the root of the repository for full licensing details.
"""Unit tests for the opt-in FUTURE.date_microseconds behaviour."""

import warnings

import cf_units
import numpy as np
from packaging.version import Version
import pytest

from iris import FUTURE
from iris.coords import DimCoord
from iris.tests._shared_utils import assert_array_equal

cf_units_legacy = Version(cf_units.__version__) < Version("3.3.0")


@pytest.fixture(
params=[0, 1000, 500000],
ids=["no_microseconds", "1_millisecond", "half_second"],
)
def time_coord(request) -> tuple[bool, DimCoord]:
points = np.array([0.0, 1.0, 2.0])
points += request.param / 1e6
return request.param, DimCoord(
points,
"time",
units="seconds since 1970-01-01 00:00:00",
)


@pytest.fixture(
params=[False, True],
ids=["without_future", "with_future"],
)
def future_date_microseconds(request):
FUTURE.date_microseconds = request.param
yield request.param
FUTURE.date_microseconds = False


def test_warning(time_coord, future_date_microseconds):
# Warning should be raised whether the coordinate has microseconds or not.
# Want users to be aware, and opt-in, as early as possible.
n_microseconds, coord = time_coord

def _op():
_ = coord.units.num2date(coord.points)

if future_date_microseconds:
with warnings.catch_warnings():
warnings.simplefilter("error", FutureWarning)
_op()
else:
with pytest.warns(FutureWarning):
_op()


@pytest.mark.parametrize(
"indexing",
(np.s_[0], np.s_[:], np.s_[:, np.newaxis]),
ids=("single", "array", "array_2d"),
)
def test_num2date(time_coord, future_date_microseconds, indexing):
n_microseconds, coord = time_coord
result = coord.units.num2date(coord.points[indexing])

if indexing == np.s_[0]:
assert hasattr(result, "microsecond")
# Convert to iterable for more consistency downstream.
result = [result]
else:
assert hasattr(result, "shape")
assert hasattr(result.flatten()[0], "microsecond")
result = result.flatten()

expected_microseconds = n_microseconds
if not future_date_microseconds or cf_units_legacy:
expected_microseconds = 0

result_microseconds = np.array([r.microsecond for r in result])
assert_array_equal(result_microseconds, expected_microseconds)


def test_roundup(time_coord, future_date_microseconds):
n_microseconds, coord = time_coord
result = coord.units.num2date(coord.points)

expected_seconds = np.floor(coord.points)
if n_microseconds >= 500000 and (not future_date_microseconds or cf_units_legacy):
# Legacy cf-units versions round microseconds and ignore the future flag.
expected_seconds += 1

result_seconds = np.array([r.second for r in result])
assert_array_equal(result_seconds, expected_seconds)
Loading