Merge pull request #281 from primap-community/remove-sum-rule

primap-community · Nov 1, 2024 · a2da6d6 · a2da6d6
2 parents ce5362a + bd02bd3
commit a2da6d6
Show file tree

Hide file tree

Showing 2 changed files with 11 additions and 55 deletions.
diff --git a/primap2/_convert.py b/primap2/_convert.py
@@ -1,5 +1,4 @@
 import copy
-import typing
 from collections.abc import Hashable
 
 import climate_categories
@@ -18,7 +17,6 @@ def convert(
         dim: Hashable | str,
         *,
         conversion: climate_categories.Conversion,
-        sum_rule: typing.Literal["intensive", "extensive"] | None = None,
         input_weights: xr.DataArray | None = None,
         output_weights: xr.DataArray | None = None,
         auxiliary_dimensions: dict[str, str] | None = None,
@@ -28,7 +26,11 @@ def convert(
         Maps the given dimension from one categorization (terminology) into another.
         Fetches the rules to do the mapping from the climate_categories package, and
         therefore will only work if there are conversions rules to convert from the
-        current categorization to the new categorization.
+        current categorization to the new categorization. The input data must always be
+        extensive (like, for example, total emissions in a year subdivided into multiple
+        sectoral categories). Handling of intensive data (like, for example, average
+        per-person emissions in a year subdivided into different territorial entities)
+        is not supported.
 
         Parameters
         ----------
@@ -38,13 +40,6 @@ def convert(
             The conversion rules that describe the conversion from the old to the new
             categorization. Contains ``climate_categories.Categorization``
             object for old and new categorization.
-        sum_rule : ``extensive``, ``intensive``, or None (default)
-            If data of categories has to be summed up or divided, we need information
-            whether the quantity measured is extensive (like, for example, total
-            emissions in a year subdivided into multiple sectoral categories) or
-            intensive (like, for example, average per-person emissions in a year
-            subdivided into different territorial entities). By default (None), a
-            warning is issued if data has to be summed up or divided.
         input_weights : xr.DataArray, optional
             If data in input categories has to be summed up and the sum_rule is
             ``intensive``, weights for the input categories are required.
@@ -81,8 +76,6 @@ def convert(
             categorization.
         """
 
-        check_valid_sum_rule_types(sum_rule)
-
         auxiliary_dimensions = prepare_auxiliary_dimensions(conversion, auxiliary_dimensions)
 
         dim_name, old_categorization = extract_categorization_from_dim(dim)
@@ -121,7 +114,6 @@ def convert(
                 already_converted_categories=converted_categories,
                 category=category.item(),
                 conversion=conversion,
-                sum_rule=sum_rule,
                 auxiliary_dimensions=auxiliary_dimensions,
                 input_weights=input_weights,
                 output_weights=output_weights,
@@ -138,7 +130,6 @@ def _fill_category(
         already_converted_categories: list[climate_categories.Category],
         category: climate_categories.Category,
         conversion: climate_categories.Conversion,
-        sum_rule: str | None,
         auxiliary_dimensions: dict[climate_categories.Categorization, str] | None,
         input_weights: xr.DataArray | None = None,
         output_weights: xr.DataArray | None = None,
@@ -162,8 +153,6 @@ def _fill_category(
             The category from the new dimension which should be filled.
         conversion: climate_categories.Conversion
             The conversion to use to compute the values for the given category.
-        sum_rule: str, optional
-            See docstring of `convert`.
         auxiliary_dimensions:
             See docstring of `convert`.
         input_weights: xr.DataArray, optional
@@ -223,7 +212,6 @@ def _fill_category(
                     rule=rule,
                     operation_type="input",
                     selection=input_selection,
-                    sum_rule=sum_rule,
                     weights=input_weights,
                 )
                 effective_output_weights = derive_weights(
@@ -232,7 +220,6 @@ def _fill_category(
                     rule=rule,
                     operation_type="output",
                     selection=output_selection,
-                    sum_rule=sum_rule,
                     weights=output_weights,
                 )
             except WeightingInfoMissing as err:
@@ -312,16 +299,6 @@ def ensure_categorization_instance(
     return climate_categories.cats[cat]
 
 
-def check_valid_sum_rule_types(sum_rule: str | None):
-    """Checks if the sum_rule is either "intensive", "extensive", or None.
-
-    Raises a ValueError if an invalid sum_rule is used."""
-    if sum_rule not in (None, "extensive", "intensive"):
-        raise ValueError(
-            f"if defined, sum_rule must be either 'extensive' or 'intensive', not" f" {sum_rule}"
-        )
-
-
 def initialize_empty_converted_da(
     *,
     old_da: xr.DataArray,
@@ -412,7 +389,7 @@ def factors_categories_to_xarray(
     """Convert dictionary mapping categories to factors into xarray-compatible objects.
 
     Using the xarray objects ensures that in subsequent calculations, everything
-    will cleanly multiply reagardless of the dimensionality of the data.
+    will cleanly multiply regardless of the dimensionality of the data.
 
     Parameters
     ----------
@@ -472,7 +449,6 @@ def derive_weights(
     dim: str,
     category: climate_categories.Category,
     rule: climate_categories.ConversionRule,
-    sum_rule: str | None,
     operation_type: str,
     weights: xr.DataArray | None,
     selection: dict[str, list[str]],
@@ -487,13 +463,6 @@ def derive_weights(
         Category which should be derived.
     rule: climate_categories.ConversionRule
         Rule that should be used to derive the category.
-    sum_rule : ``extensive``, ``intensive``, or None (default)
-        If data of categories has to be summed up or divided, we need information
-        whether the quantity measured is extensive (like, for example, total
-        emissions in a year subdivided into multiple sectoral categories) or
-        intensive (like, for example, average per-person emissions in a year
-        subdivided into different territorial entities). By default (None), a
-        warning is issued if data has to be summed up or divided.
     operation_type: ``input`` or ``output``
         If weights for the source data (input) or the result data (output) should
         be derived.
@@ -510,38 +479,29 @@ def derive_weights(
     """
     if operation_type == "input":
         operation_verb = "sum up"
-        trivial_sum_rule = "extensive"
-        nontrivial_sum_rule = "intensive"
         rule_cardinality = rule.cardinality_a
     else:
         operation_verb = "split"
-        trivial_sum_rule = "intensive"
-        nontrivial_sum_rule = "extensive"
         rule_cardinality = rule.cardinality_b
 
     # just one category or trivial sum rule, so no weights required
-    if rule_cardinality == "one" or sum_rule == trivial_sum_rule:
+    if rule_cardinality == "one" or operation_type == "input":
         return 1.0
-    if sum_rule == nontrivial_sum_rule:
+    if operation_type == "output":
         if weights is None:
             raise WeightingInfoMissing(
                 category=category,
                 rule=rule,
                 message=f"We need to {operation_verb} multiple categories with"
-                f" sum_rule={nontrivial_sum_rule}, but no {operation_type}_weights are"
+                f" but no {operation_type}_weights are"
                 f" specified.",
             )
         effective_weights = weights.loc[selection]
         # normalize so it is actually a weight, not a factor
         return effective_weights / effective_weights.sum(dim=dim)
 
-    raise WeightingInfoMissing(
-        category=category,
-        rule=rule,
-        message=f"We need to {operation_verb} multiple categories, but the sum_rule is"
-        f" not specified. Rule can only be used if sum_rule={trivial_sum_rule!r} or"
-        f" sum_rule={nontrivial_sum_rule} and {operation_type}_weights are"
-        f" specified.",
+    raise NotImplementedError(
+        f"operation_type must be either input or output. Got {operation_type}"
     )
 
 

diff --git a/primap2/tests/test_convert.py b/primap2/tests/test_convert.py
@@ -56,13 +56,11 @@ def test_convert_ipcc(empty_ds: xr.Dataset):
         da.pr.convert(
             dim="category",
             conversion=conversion,
-            sum_rule="extensive",
         )
 
     result = da.pr.convert(
         dim="category",
         conversion=conversion,
-        sum_rule="extensive",
         auxiliary_dimensions={"gas": "source (gas)"},
     )
 
@@ -133,7 +131,6 @@ def test_convert_BURDI(empty_ds: xr.Dataset):
     result = da.pr.convert(
         dim="category",
         conversion=conv,
-        sum_rule="extensive",
         auxiliary_dimensions={"gas": "source (gas)"},
     )
 
@@ -188,7 +185,6 @@ def test_custom_conversion_and_two_custom_categorisations(empty_ds):
     result = da.pr.convert(
         dim="category",
         conversion=conv,
-        sum_rule="extensive",
     )
 
     # category name includes B - the target categorisation