Skip to content

Commit

Permalink
Ensure inputs of cosine of solar zenith angle are chunked when needed (
Browse files Browse the repository at this point in the history
…#1542)

<!--Please ensure the PR fulfills the following requirements! -->
<!-- If this is your first PR, make sure to add your details to the
AUTHORS.rst! -->
### Pull Request Checklist:
- [x] This PR addresses an already opened issue (for bug fixes /
features)
    - This PR fixes #1536
- [x] Tests for the changes have been added (for bug fixes / features)
- [ ] (If applicable) Documentation has been added / updated (for bug
fixes / features)
- [x] CHANGES.rst has been updated (with summary of main changes)
- [x] Link to issue (:issue:`number`) and pull request (:pull:`number`)
has been added

### What kind of change does this PR introduce?

* New function `xc.core.utils._chunk_like` to chunk a list of inputs
according to one chunk dictionary. It also circumvents
pydata/xarray#6204 by recreating DataArrays
that where obtained from dimension coordinates.
* Generalization of `uses_dask` so it can accept a list of objects.
* Usage of `_chunk_like` to ensure the inputs of
`cosine_of_solar_zenith_angle` are chunked when needed, in
`mean_radiant_temperature` and `potential_evapotranspiration`.

The effect of this is simply that the `cosine_of_solar_zenith_angle`
will be performed on blocks of the same size as in the original data,
even though its inputs (the dimension coordinate) did not carry that
information. Before this PR, the calculation was done as a single block,
of the same size as the full array.

### Does this PR introduce a breaking change?
No.

### Other information:
Dask might warn something like `PerformanceWarning: Increasing number of
chunks by factor of NN`. Where NN should be the number of chunks along
the `lat` dimension, if present. That exactly what we want, so it's ok.
  • Loading branch information
aulemahal authored Nov 30, 2023
2 parents 6efc676 + ce2c565 commit cf4bf5a
Show file tree
Hide file tree
Showing 5 changed files with 83 additions and 9 deletions.
1 change: 1 addition & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ Bug fixes
^^^^^^^^^
* Fixed a bug with ``n_escore=-1`` in ``xclim.sdba.adjustment.NpdfTransform`` (:issue:`1515`, :pull:`1515`).
* In the documentaion, fix the tooltips in the indicator search results (:issue:`1524`, :pull:`1527`).
* If chunked inputs are passed to indicators ``mean_radiant_temperature`` and ``potential_evapotranspiration``, subcalculations of the solar angle will also use the same chunks, instead of a single one of the same size as the data (:issue:`1536`, :pull:`1542`).

Internal changes
^^^^^^^^^^^^^^^^
Expand Down
17 changes: 17 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,13 @@
import xarray as xr

from xclim.core.utils import (
_chunk_like,
ensure_chunk_size,
nan_calc_percentiles,
walk_map,
wrapped_partial,
)
from xclim.testing.helpers import test_timeseries as _test_timeseries


def test_walk_map():
Expand Down Expand Up @@ -110,3 +112,18 @@ def test_calc_perc_partial_nan(self):
# The expected is from R `quantile(arr, 0.5, type=8, na.rm = TRUE)`
# Note that scipy mquantiles would give a different result here
assert res[()] == 42.0


def test_chunk_like():
da = _test_timeseries(
np.zeros(
100,
),
"tas",
)
da = xr.concat([da] * 10, xr.DataArray(np.arange(10), dims=("lat",), name="lat"))

assert isinstance(da.lat.variable, xr.core.variable.IndexVariable)
t, la = _chunk_like(da.time, da.lat, chunks={"time": 10, "lat": 1})
assert t.chunks[0] == tuple([10] * 10)
assert la.chunks[0] == tuple([1] * 10)
32 changes: 30 additions & 2 deletions xclim/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,17 +275,22 @@ def ensure_chunk_size(da: xr.DataArray, **minchunks: dict[str, int]) -> xr.DataA
return da


def uses_dask(da: xr.DataArray) -> bool:
def uses_dask(*das: xr.DataArray | xr.Dataset) -> bool:
"""Evaluate whether dask is installed and array is loaded as a dask array.
Parameters
----------
da: xr.DataArray
das: xr.DataArray or xr.Dataset
DataArrays or Datasets to check.
Returns
-------
bool
True if any of the passed objects is using dask.
"""
if len(das) > 1:
return any([uses_dask(da) for da in das])
da = das[0]
if isinstance(da, xr.DataArray) and isinstance(da.data, dsk.Array):
return True
if isinstance(da, xr.Dataset) and any(
Expand Down Expand Up @@ -867,3 +872,26 @@ def is_percentile_dataarray(source: xr.DataArray) -> bool:
and source.attrs.get("climatology_bounds", None) is not None
and ("quantile" in source.coords or "percentiles" in source.coords)
)


def _chunk_like(*inputs: xr.DataArray | xr.Dataset, chunks: dict[str, int] | None):
"""Helper function that (re-)chunks inputs according to a single chunking dictionary.
Will also ensure passed inputs are not IndexVariable types, so that they can be chunked.
"""
if not chunks:
return tuple(inputs)

outputs = []
for da in inputs:
if isinstance(da, xr.DataArray) and isinstance(
da.variable, xr.core.variable.IndexVariable
):
da = xr.DataArray(da, dims=da.dims, coords=da.coords, name=da.name)
if not isinstance(da, (xr.DataArray, xr.Dataset)):
outputs.append(da)
else:
outputs.append(
da.chunk(**{d: c for d, c in chunks.items() if d in da.dims})
)
return tuple(outputs)
26 changes: 21 additions & 5 deletions xclim/indices/_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -1378,7 +1378,9 @@ def potential_evapotranspiration(
tasmin = convert_units_to(tasmin, "degF")
tasmax = convert_units_to(tasmax, "degF")

re = extraterrestrial_solar_radiation(tasmin.time, lat)
re = extraterrestrial_solar_radiation(
tasmin.time, lat, chunks=tasmin.chunksizes
)
re = convert_units_to(re, "cal cm-2 day-1")

# Baier et Robertson(1965) formula
Expand All @@ -1397,7 +1399,9 @@ def potential_evapotranspiration(

lv = 2.5 # MJ/kg

ra = extraterrestrial_solar_radiation(tasmin.time, lat)
ra = extraterrestrial_solar_radiation(
tasmin.time, lat, chunks=tasmin.chunksizes
)
ra = convert_units_to(ra, "MJ m-2 d-1")

# Hargreaves and Samani (1985) formula
Expand All @@ -1415,7 +1419,7 @@ def potential_evapotranspiration(
tasK = convert_units_to(tas, "K")

ext_rad = extraterrestrial_solar_radiation(
tas.time, lat, solar_constant="1367 W m-2"
tas.time, lat, solar_constant="1367 W m-2", chunks=tas.chunksizes
)
latentH = 4185.5 * (751.78 - 0.5655 * tasK)
radDIVlat = ext_rad / latentH
Expand Down Expand Up @@ -1972,12 +1976,24 @@ def mean_radiant_temperature(

if stat == "sunlit":
csza = cosine_of_solar_zenith_angle(
dates, dec, lat, lon=lon, stat="average", sunlit=True
dates,
dec,
lat,
lon=lon,
stat="average",
sunlit=True,
chunks=rsds.chunksizes,
)
elif stat == "instant":
tc = time_correction_for_solar_angle(dates)
csza = cosine_of_solar_zenith_angle(
dates, dec, lat, lon=lon, time_correction=tc, stat="instant"
dates,
dec,
lat,
lon=lon,
time_correction=tc,
stat="instant",
chunks=rsds.chunksizes,
)
else:
raise NotImplementedError(
Expand Down
16 changes: 14 additions & 2 deletions xclim/indices/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
get_calendar,
)
from xclim.core.units import convert_units_to
from xclim.core.utils import Quantified
from xclim.core.utils import Quantified, _chunk_like


def _wrap_radians(da):
Expand Down Expand Up @@ -199,6 +199,7 @@ def cosine_of_solar_zenith_angle(
time_correction: xr.DataArray = None,
stat: str = "average",
sunlit: bool = False,
chunks: dict[str, int] | None = None,
) -> xr.DataArray:
"""Cosine of the solar zenith angle.
Expand Down Expand Up @@ -230,6 +231,9 @@ def cosine_of_solar_zenith_angle(
sunlit: bool
If True, only the sunlit part of the interval is considered in the integral or average.
Does nothing if stat is "instant".
chunks : dictionary
When `time`, `lat` and `lon` originate from coordinates of a large chunked dataset, this dataset's chunking
can be passed here to ensure the computation is also chunked.
Returns
-------
Expand All @@ -249,6 +253,8 @@ def cosine_of_solar_zenith_angle(
declination = convert_units_to(declination, "rad")
lat = _wrap_radians(convert_units_to(lat, "rad"))
lon = convert_units_to(lon, "rad")
declination, lat, lon = _chunk_like(declination, lat, lon, chunks=chunks)

S_IN_D = 24 * 3600

if len(time) < 3 or xr.infer_freq(time) == "D":
Expand Down Expand Up @@ -357,6 +363,7 @@ def extraterrestrial_solar_radiation(
lat: xr.DataArray,
solar_constant: Quantified = "1361 W m-2",
method="spencer",
chunks: dict[str, int] | None = None,
) -> xr.DataArray:
"""Extraterrestrial solar radiation.
Expand All @@ -375,6 +382,9 @@ def extraterrestrial_solar_radiation(
method : {'spencer', 'simple'}
Which method to use when computing the solar declination and the eccentricity
correction factor. See :py:func:`solar_declination` and :py:func:`eccentricity_correction_factor`.
chunks : dictionary
When `times` and `lat` originate from coordinates of a large chunked dataset, passing the dataset's chunks here
will ensure the computation is chunked as well.
Returns
-------
Expand All @@ -391,7 +401,9 @@ def extraterrestrial_solar_radiation(
return (
gsc
* rad_to_day
* cosine_of_solar_zenith_angle(times, ds, lat, stat="integral", sunlit=True)
* cosine_of_solar_zenith_angle(
times, ds, lat, stat="integral", sunlit=True, chunks=chunks
)
* dr
).assign_attrs(units="J m-2 d-1")

Expand Down

0 comments on commit cf4bf5a

Please sign in to comment.