Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

remove input and output weights from convert function #282

Merged
merged 6 commits into from
Nov 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 13 additions & 63 deletions primap2/_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@
dim: Hashable | str,
*,
conversion: climate_categories.Conversion,
input_weights: xr.DataArray | None = None,
output_weights: xr.DataArray | None = None,
auxiliary_dimensions: dict[str, str] | None = None,
) -> xr.DataArray:
"""Convert the data along the given dimension into the new categorization.
Expand All @@ -40,26 +38,6 @@
The conversion rules that describe the conversion from the old to the new
categorization. Contains ``climate_categories.Categorization``
object for old and new categorization.
input_weights : xr.DataArray, optional
If data in input categories has to be summed up and the sum_rule is
``intensive``, weights for the input categories are required.
The weights can be given in any shape compatible with the DataArray that
is converted, e.g. to give different weights for industrial sectors by
country. However, at least the ``dim`` that is converted needs to be in
``input_weights.dims``.
If no weights are specified but a rule requiring weights is specified
in the conversion rules, a warning is issued and the respective rule is
skipped (probably resulting in more NaNs in the output).
output_weights : xr.DataArray, optional
If data has to be divided into several output categories and the sum_rule is
``extensive``, weights for the output categories are required.
The weights can be given in any shape compatible with the DataArray that
is converted, e.g. to give different weights for industrial sectors by
country. However, at least the ``dim`` that is converted needs to be in
``output_weights.dims``.
If no weights are specified but a rule requiring weights is specified
in the conversion rules, a warning is issued and the respective rule is
skipped (probably resulting in more NaNs in the output).
auxiliary_dimensions : dict[str, str], optional
Mapping of auxiliary categorizations to dimension names used in this
DataArray. In conversions which contain rules which are valid only for
Expand Down Expand Up @@ -115,8 +93,6 @@
category=category.item(),
conversion=conversion,
auxiliary_dimensions=auxiliary_dimensions,
input_weights=input_weights,
output_weights=output_weights,
)
converted_categories += newly_converted_categories

Expand All @@ -131,8 +107,6 @@
category: climate_categories.Category,
conversion: climate_categories.Conversion,
auxiliary_dimensions: dict[climate_categories.Categorization, str] | None,
input_weights: xr.DataArray | None = None,
output_weights: xr.DataArray | None = None,
) -> tuple[list[climate_categories.Category], xr.DataArray]:
"""Return a copy of da with the given category filled by values converted
using the given conversion.
Expand All @@ -155,10 +129,6 @@
The conversion to use to compute the values for the given category.
auxiliary_dimensions:
See docstring of `convert`.
input_weights: xr.DataArray, optional
See docstring of `convert`.
output_weights: xr.DataArray, optional
See docstring of `convert`.

Returns
-------
Expand Down Expand Up @@ -211,16 +181,12 @@
category=category,
rule=rule,
operation_type="input",
selection=input_selection,
weights=input_weights,
)
effective_output_weights = derive_weights(
dim=new_dim,
category=category,
rule=rule,
operation_type="output",
selection=output_selection,
weights=output_weights,
)
except WeightingInfoMissing as err:
logger.warning(str(err))
Expand Down Expand Up @@ -450,8 +416,6 @@
category: climate_categories.Category,
rule: climate_categories.ConversionRule,
operation_type: str,
weights: xr.DataArray | None,
selection: dict[str, list[str]],
) -> xr.DataArray | float:
"""Derive the weights to use for applying a specific rule.

Expand All @@ -466,43 +430,29 @@
operation_type: ``input`` or ``output``
If weights for the source data (input) or the result data (output) should
be derived.
weights: xr.DataArray, optional
Weights for the individual categories.
selection: dict[str, list[str]]
Selection derived from the rule.

Returns
-------
factors: float or xr.DataArray
Object which can be multiplied with the input or output DataArray to apply
weights.
"""
# TODO this may change again in the next PR
if operation_type == "input":
operation_verb = "sum up"
rule_cardinality = rule.cardinality_a
else:
operation_verb = "split"
rule_cardinality = rule.cardinality_b

# just one category or trivial sum rule, so no weights required
if rule_cardinality == "one" or operation_type == "input":
return 1.0
if operation_type == "output":
if weights is None:
raise WeightingInfoMissing(
category=category,
rule=rule,
message=f"We need to {operation_verb} multiple categories with"
f" but no {operation_type}_weights are"
f" specified.",
elif operation_type == "output":
if rule.cardinality_b == "one":
return 1.0
else:
raise NotImplementedError(
"Splitting input categories into multiple"
" output categories is currently not supported. "
f"{rule.csv_original_text=}, {category=}"
)
effective_weights = weights.loc[selection]
# normalize so it is actually a weight, not a factor
return effective_weights / effective_weights.sum(dim=dim)

raise NotImplementedError(
f"operation_type must be either input or output. Got {operation_type}"
)
else:
raise NotImplementedError(

Check warning on line 453 in primap2/_convert.py

View check run for this annotation

Codecov / codecov/patch

primap2/_convert.py#L453

Added line #L453 was not covered by tests
f"operation_type must be either input or output. Got {operation_type}"
)


def prepare_auxiliary_dimensions(
Expand Down
1 change: 1 addition & 0 deletions primap2/tests/test_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ def test_conversion_source_does_not_match_dataset_dimension(empty_ds):
)


@pytest.mark.xfail
def test_convert_ipcc(empty_ds: xr.Dataset):
# build a DA categorized by IPCC1996 and with 1 everywhere so results are easy
# to see
Expand Down