Skip to content

Commit

Permalink
Merge pull request #278 from primap-community/fix_downscaling
Browse files Browse the repository at this point in the history
Fix downscaling functionality
  • Loading branch information
JGuetschow authored Nov 20, 2024
2 parents e1689e4 + 0518e9c commit dad301b
Show file tree
Hide file tree
Showing 3 changed files with 329 additions and 96 deletions.
1 change: 1 addition & 0 deletions changelog/278.fix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix problems with downscaling where data is zero for all basket contents.
184 changes: 135 additions & 49 deletions primap2/_downscale.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from collections.abc import Hashable, Sequence

import numpy as np
import pandas as pd
import xarray as xr

from ._accessor_base import BaseDataArrayAccessor, BaseDatasetAccessor
Expand Down Expand Up @@ -37,6 +39,11 @@ def downscale_timeseries(
used to downscale the basket to its contents, which is used to fill gaps in the
timeseries of the basket contents.
If the data to downscale contains only 0 and NaN the result will be all
zero timeseries. If the data is only for individual basket and basket_content
values, the resulting downscaled data points are zero but the shares for other years
are not influenced
Parameters
----------
dim: str
Expand Down Expand Up @@ -104,6 +111,35 @@ def downscale_timeseries(
"To continue regardless, set check_consistency=False."
)

# treat zeros
basket_both_zero = basket_da.where((basket_da == 0) & (basket_sum == 0))
basket_sum_zero = basket_da.where((basket_da != 0) & (basket_sum == 0))
if not basket_sum_zero.isnull().all():
# this can only happen if check consistency = False,
# so we could also remove it and say whoever switches it of has
# to deal with the consequences
error_message = generate_error_message(basket_sum_zero)
raise ValueError(f"pr.downscale_timeseries {error_message}")

any_nonzero = basket_sum.where(basket_sum != 0).notnull().any()

# treat the case where all data is zero or NaN
if (not basket_both_zero.isnull().all()) & (not any_nonzero):
# create fake basket_contents_da and basket_sum which contain
# 1 for all data points where NaNs are either in the sum or the basket
# this will later lead to equal shares for the NaNs.
units = basket_contents_da.pint.units
basket_da_squeeze = basket_da.drop(dim)
basket_contents_da = basket_contents_da.where(
(basket_da_squeeze != 0) | (basket_sum != 0), 1 * units
)
basket_sum = basket_contents_da.pr.sum(
dim=dim,
skipna=skipna,
min_count=1,
skipna_evaluation_dims=skipna_evaluation_dims,
)

# inter- and extrapolate
shares: xr.DataArray = (
(basket_contents_da / basket_sum)
Expand All @@ -114,6 +150,11 @@ def downscale_timeseries(
.bfill(dim="time")
)

# treat the case where there are zero values in basket_sum and basket_da but also
# non-zero data
if (not basket_both_zero.isnull().all()) & any_nonzero:
shares = shares.where((basket_da != 0) | (basket_sum != 0), 0)

downscaled: xr.DataArray = basket_da * shares

return self._da.fillna(downscaled)
Expand Down Expand Up @@ -146,6 +187,11 @@ def downscale_timeseries(
used to downscale the basket to its contents, which is used to fill gaps in the
timeseries of the basket contents.
If the data to downscale contains only 0 an NaN the result will be all
zero timeseries. If the data is only for individual basket and basket_content
values, the resulting downscaled data points are zero but the shares for other years
are not influenced
Parameters
----------
dim: str
Expand Down Expand Up @@ -193,51 +239,20 @@ def downscale_timeseries(
"Use ds.pr.remove_processing_info()."
)

ds_sel = select_no_scalar_dimension(self._ds, sel)

basket_contents_ds = ds_sel.loc[{dim: basket_contents}]
basket_ds = ds_sel.loc[{dim: basket}]

if skipna_evaluation_dims is not None:
if skipna:
raise ValueError(
"Only one of 'skipna' and 'skipna_evaluation_dims' may be supplied, not"
" both."
)
else:
skipna = None

basket_sum = basket_contents_ds.pr.sum(
dim=dim,
skipna=skipna,
min_count=1,
skipna_evaluation_dims=skipna_evaluation_dims,
)

if check_consistency:
deviation = abs(basket_ds / basket_sum - 1)
devmax = deviation.to_array().max()
if devmax > tolerance:
raise ValueError(
f"Sum of the basket_contents {basket_contents!r} deviates"
f" {devmax * 100} % from the basket"
f" {basket!r}, which is more than the allowed 1 %. "
"To continue regardless, set check_consistency=False."
)

# inter- and extrapolate
shares: xr.Dataset = (
(basket_contents_ds / basket_sum)
.pint.to({x: "" for x in basket_contents_ds.keys()})
.pint.dequantify()
.interpolate_na(dim="time", method="linear")
.ffill(dim="time")
.bfill(dim="time")
)

downscaled: xr.Dataset = basket_ds * shares
downscaled = self._ds.copy()
for var in self._ds.data_vars:
downscaled[var] = downscaled[var].pr.downscale_timeseries(
dim=dim,
basket=basket,
basket_contents=basket_contents,
check_consistency=check_consistency,
sel=sel,
skipna_evaluation_dims=skipna_evaluation_dims,
skipna=skipna,
tolerance=tolerance,
)

return self._ds.fillna(downscaled)
return downscaled

def downscale_gas_timeseries(
self,
Expand Down Expand Up @@ -265,6 +280,11 @@ def downscale_gas_timeseries(
converted back afterwards; for both conversions the gwp conversions of the
basket are used.
If the data to downscale contains only 0 an NaN the result will be all
zero timeseries. If the data is only for individual basket and basket_content
values, the resulting downscaled data points are zero but the shares for other years
are not influenced
Parameters
----------
basket: str
Expand Down Expand Up @@ -306,10 +326,10 @@ def downscale_gas_timeseries(
ds_sel = select_no_scalar_dimension(self._ds, sel)

basket_contents_converted = xr.Dataset()
da_basket = ds_sel[basket]
basket_da = ds_sel[basket]
for var in basket_contents:
da: xr.DataArray = ds_sel[var]
basket_contents_converted[var] = da.pr.convert_to_gwp_like(like=da_basket)
basket_contents_converted[var] = da.pr.convert_to_gwp_like(like=basket_da)

if skipna_evaluation_dims is not None:
if skipna:
Expand All @@ -328,7 +348,7 @@ def downscale_gas_timeseries(
)

if check_consistency:
deviation = abs(da_basket / basket_sum - 1)
deviation = abs(basket_da / basket_sum - 1)
devmax = deviation.max().item()
if devmax > tolerance:
raise ValueError(
Expand All @@ -340,6 +360,31 @@ def downscale_gas_timeseries(
"To continue regardless, set check_consistency=False."
)

# treat zeros
basket_both_zero = basket_da.where((basket_da == 0) & (basket_sum == 0))
basket_sum_zero = basket_da.where((basket_da != 0) & (basket_sum == 0))
if not basket_sum_zero.isnull().all():
# this can only happen if check consistency = False,
# so we could also remove it and say whoever switches it of has
# to deal with the consequences
error_message = generate_error_message(basket_sum_zero)
raise ValueError(f"pr.downscale_gas_timeseries {error_message}")

any_nonzero = basket_sum.where(basket_sum != 0).notnull().any()

# treat the case where all data is zero or NaN
if (not basket_both_zero.isnull().all()) & (not any_nonzero):
unit_content = str(basket_da.pint.units)
basket_contents_converted = basket_contents_converted.where(
(basket_da != 0) | (basket_sum != 0), 1 * ureg(unit_content)
)
basket_sum = basket_contents_converted.pr.sum(
dim="entity",
skipna=skipna,
min_count=1,
skipna_evaluation_dims=skipna_evaluation_dims,
)

# inter- and extrapolate
shares = (
(basket_contents_converted / basket_sum)
Expand All @@ -350,14 +395,55 @@ def downscale_gas_timeseries(
.bfill(dim="time")
)

downscaled = da_basket * shares
# treat the case where there are zero values in basket_sum and basket_da but also
# non-zero data
if (not basket_both_zero.isnull().all()) & any_nonzero:
shares = shares.where((basket_da != 0) | (basket_sum != 0), 0)

downscaled = basket_da * shares

downscaled_converted = xr.Dataset()

with ureg.context(da_basket.attrs["gwp_context"]):
with ureg.context(basket_da.attrs["gwp_context"]):
for var in basket_contents:
downscaled_converted[var] = ds_sel[var].fillna(
downscaled[var].pint.to(ds_sel[var].pint.units)
)

return self._ds.pr.fillna(downscaled_converted)


def generate_error_message(da_error: xr.DataArray) -> str:
"""Generate error message for zero sum data.
based on generate_log_message for pr.merge
Strategy:
* remove all length-1 dimensions and put them in description
* convert the rest into a pandas dataframe for nice printing
"""
scalar_dims = [dim for dim in da_error.dims if len(da_error[dim]) == 1]
scalar_dims_format = []
for dim in scalar_dims:
if pd.api.types.is_datetime64_any_dtype(da_error[dim]):
ts = pd.Timestamp(da_error[dim][0].values)
# optimization for the common case where we have data on a yearly or
# more coarse basis
if ts == pd.Timestamp(year=ts.year, month=1, day=1):
scalar_dims_format.append(f"{dim}={ts.strftime('%Y')}")
else:
scalar_dims_format.append(f"{dim}={ts!s}")
else:
scalar_dims_format.append(f"{dim}={da_error[dim].item()}")
scalar_dims_str = ", ".join(scalar_dims_format)
da_error_dequ = da_error.squeeze(drop=True).pint.dequantify()
if np.ndim(da_error_dequ.data) == 0:
errors_str = ""
else:
errors_str = da_error_dequ.to_dataframe().dropna().to_string()

return (
"error: found zero basket content sum for non-zero basket data"
f" for {scalar_dims_str}:\n"
f"({da_error.name})\n" + errors_str
)
Loading

0 comments on commit dad301b

Please sign in to comment.