Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

remove sum rule from conversion #281

Merged
merged 3 commits into from
Nov 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 11 additions & 51 deletions primap2/_convert.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import copy
import typing
from collections.abc import Hashable

import climate_categories
Expand All @@ -18,7 +17,6 @@
dim: Hashable | str,
*,
conversion: climate_categories.Conversion,
sum_rule: typing.Literal["intensive", "extensive"] | None = None,
input_weights: xr.DataArray | None = None,
output_weights: xr.DataArray | None = None,
auxiliary_dimensions: dict[str, str] | None = None,
Expand All @@ -28,7 +26,11 @@
Maps the given dimension from one categorization (terminology) into another.
Fetches the rules to do the mapping from the climate_categories package, and
therefore will only work if there are conversions rules to convert from the
current categorization to the new categorization.
current categorization to the new categorization. The input data must always be
extensive (like, for example, total emissions in a year subdivided into multiple
sectoral categories). Handling of intensive data (like, for example, average
per-person emissions in a year subdivided into different territorial entities)
is not supported.

Parameters
----------
Expand All @@ -38,13 +40,6 @@
The conversion rules that describe the conversion from the old to the new
categorization. Contains ``climate_categories.Categorization``
object for old and new categorization.
sum_rule : ``extensive``, ``intensive``, or None (default)
If data of categories has to be summed up or divided, we need information
whether the quantity measured is extensive (like, for example, total
emissions in a year subdivided into multiple sectoral categories) or
intensive (like, for example, average per-person emissions in a year
subdivided into different territorial entities). By default (None), a
warning is issued if data has to be summed up or divided.
input_weights : xr.DataArray, optional
If data in input categories has to be summed up and the sum_rule is
``intensive``, weights for the input categories are required.
Expand Down Expand Up @@ -81,8 +76,6 @@
categorization.
"""

check_valid_sum_rule_types(sum_rule)

auxiliary_dimensions = prepare_auxiliary_dimensions(conversion, auxiliary_dimensions)

dim_name, old_categorization = extract_categorization_from_dim(dim)
Expand Down Expand Up @@ -121,7 +114,6 @@
already_converted_categories=converted_categories,
category=category.item(),
conversion=conversion,
sum_rule=sum_rule,
auxiliary_dimensions=auxiliary_dimensions,
input_weights=input_weights,
output_weights=output_weights,
Expand All @@ -138,7 +130,6 @@
already_converted_categories: list[climate_categories.Category],
category: climate_categories.Category,
conversion: climate_categories.Conversion,
sum_rule: str | None,
auxiliary_dimensions: dict[climate_categories.Categorization, str] | None,
input_weights: xr.DataArray | None = None,
output_weights: xr.DataArray | None = None,
Expand All @@ -162,8 +153,6 @@
The category from the new dimension which should be filled.
conversion: climate_categories.Conversion
The conversion to use to compute the values for the given category.
sum_rule: str, optional
See docstring of `convert`.
auxiliary_dimensions:
See docstring of `convert`.
input_weights: xr.DataArray, optional
Expand Down Expand Up @@ -223,7 +212,6 @@
rule=rule,
operation_type="input",
selection=input_selection,
sum_rule=sum_rule,
weights=input_weights,
)
effective_output_weights = derive_weights(
Expand All @@ -232,7 +220,6 @@
rule=rule,
operation_type="output",
selection=output_selection,
sum_rule=sum_rule,
weights=output_weights,
)
except WeightingInfoMissing as err:
Expand Down Expand Up @@ -312,16 +299,6 @@
return climate_categories.cats[cat]


def check_valid_sum_rule_types(sum_rule: str | None):
"""Checks if the sum_rule is either "intensive", "extensive", or None.

Raises a ValueError if an invalid sum_rule is used."""
if sum_rule not in (None, "extensive", "intensive"):
raise ValueError(
f"if defined, sum_rule must be either 'extensive' or 'intensive', not" f" {sum_rule}"
)


def initialize_empty_converted_da(
*,
old_da: xr.DataArray,
Expand Down Expand Up @@ -412,7 +389,7 @@
"""Convert dictionary mapping categories to factors into xarray-compatible objects.

Using the xarray objects ensures that in subsequent calculations, everything
will cleanly multiply reagardless of the dimensionality of the data.
will cleanly multiply regardless of the dimensionality of the data.

Parameters
----------
Expand Down Expand Up @@ -472,7 +449,6 @@
dim: str,
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

derive_weights function will change again when I remove input_weights and output_weights

category: climate_categories.Category,
rule: climate_categories.ConversionRule,
sum_rule: str | None,
operation_type: str,
weights: xr.DataArray | None,
selection: dict[str, list[str]],
Expand All @@ -487,13 +463,6 @@
Category which should be derived.
rule: climate_categories.ConversionRule
Rule that should be used to derive the category.
sum_rule : ``extensive``, ``intensive``, or None (default)
If data of categories has to be summed up or divided, we need information
whether the quantity measured is extensive (like, for example, total
emissions in a year subdivided into multiple sectoral categories) or
intensive (like, for example, average per-person emissions in a year
subdivided into different territorial entities). By default (None), a
warning is issued if data has to be summed up or divided.
operation_type: ``input`` or ``output``
If weights for the source data (input) or the result data (output) should
be derived.
Expand All @@ -510,38 +479,29 @@
"""
if operation_type == "input":
operation_verb = "sum up"
trivial_sum_rule = "extensive"
nontrivial_sum_rule = "intensive"
rule_cardinality = rule.cardinality_a
else:
operation_verb = "split"
trivial_sum_rule = "intensive"
nontrivial_sum_rule = "extensive"
rule_cardinality = rule.cardinality_b

# just one category or trivial sum rule, so no weights required
if rule_cardinality == "one" or sum_rule == trivial_sum_rule:
if rule_cardinality == "one" or operation_type == "input":
return 1.0
if sum_rule == nontrivial_sum_rule:
if operation_type == "output":
if weights is None:
raise WeightingInfoMissing(
category=category,
rule=rule,
message=f"We need to {operation_verb} multiple categories with"
f" sum_rule={nontrivial_sum_rule}, but no {operation_type}_weights are"
f" but no {operation_type}_weights are"
f" specified.",
)
effective_weights = weights.loc[selection]
# normalize so it is actually a weight, not a factor
return effective_weights / effective_weights.sum(dim=dim)

raise WeightingInfoMissing(
category=category,
rule=rule,
message=f"We need to {operation_verb} multiple categories, but the sum_rule is"
f" not specified. Rule can only be used if sum_rule={trivial_sum_rule!r} or"
f" sum_rule={nontrivial_sum_rule} and {operation_type}_weights are"
f" specified.",
raise NotImplementedError(

Check warning on line 503 in primap2/_convert.py

View check run for this annotation

Codecov / codecov/patch

primap2/_convert.py#L503

Added line #L503 was not covered by tests
f"operation_type must be either input or output. Got {operation_type}"
)


Expand Down
4 changes: 0 additions & 4 deletions primap2/tests/test_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,11 @@ def test_convert_ipcc(empty_ds: xr.Dataset):
da.pr.convert(
dim="category",
conversion=conversion,
sum_rule="extensive",
)

result = da.pr.convert(
dim="category",
conversion=conversion,
sum_rule="extensive",
auxiliary_dimensions={"gas": "source (gas)"},
)

Expand Down Expand Up @@ -133,7 +131,6 @@ def test_convert_BURDI(empty_ds: xr.Dataset):
result = da.pr.convert(
dim="category",
conversion=conv,
sum_rule="extensive",
auxiliary_dimensions={"gas": "source (gas)"},
)

Expand Down Expand Up @@ -188,7 +185,6 @@ def test_custom_conversion_and_two_custom_categorisations(empty_ds):
result = da.pr.convert(
dim="category",
conversion=conv,
sum_rule="extensive",
)

# category name includes B - the target categorisation
Expand Down