primap-community · crdanielbusch · Nov 7, 2024 · Oct 29, 2024 · Oct 30, 2024 · Oct 31, 2024
diff --git a/primap2/_convert.py b/primap2/_convert.py
@@ -166,6 +166,9 @@ def _fill_category(
             already_converted = set(output_selection[new_dim]).intersection(
                 set(already_converted_categories)
             )
+            # if there are several categories on the target side
+            # we can still convert because it will
+            # create a new category
             if already_converted:
                 logger.warning(
                     f"For category {category!r}, would want to use a "
@@ -175,35 +178,28 @@ def _fill_category(
                 )
                 continue
 
-            try:
-                effective_input_weights = derive_weights(
-                    dim=dim,
-                    category=category,
-                    rule=rule,
-                    operation_type="input",
-                )
-                effective_output_weights = derive_weights(
-                    dim=new_dim,
-                    category=category,
-                    rule=rule,
-                    operation_type="output",
-                )
-            except WeightingInfoMissing as err:
-                logger.warning(str(err))
-                continue
-
             # the left-hand side of the conversion formula summed up
-            lhs = (input_factors * effective_input_weights * self._da.loc[input_selection]).sum(
-                dim=dim
-            )
+            lhs = (input_factors * self._da.loc[input_selection]).sum(dim=dim)
             # the right-hand side of the conversion formula split up
-            rhs = lhs / output_factors / effective_output_weights
-
-            da.loc[output_selection] = rhs
-
-            if not rule.is_restricted:
-                # stop processing rules for this category
+            rhs = lhs / output_factors
+
+            # if there is more than one category on the target side
+            if len(output_selection[new_dim]) > 1:
+                # TODO this leads to very long category names
+                new_category = "M_" + "_".join(output_selection[new_dim])
+                # add newly created category to da
+                new_categories = list(da.indexes["category (IPCC2006)"]) + [new_category]
+                da = da.reindex({"category (IPCC2006)": new_categories}, fill_value=np.nan)
+                new_output_selection = output_selection.copy()
+                new_output_selection[new_dim] = new_category
+                da.loc[new_output_selection] = rhs.sum(dim=new_dim)
                 return output_selection[new_dim], da
+            else:
+                da.loc[output_selection] = rhs
+
+                if not rule.is_restricted:
+                    # stop processing rules for this category
+                    return output_selection[new_dim], da
 
         logger.debug(
             f"No unrestricted rule to derive data for {category!r} applied, some or "
@@ -394,67 +390,6 @@ def factors_categories_to_xarray(
     return selection, factors
 
 
-class WeightingInfoMissing(ValueError):
-    """Some information to derive weighting factors for a rule is missing."""
-
-    def __init__(
-        self,
-        category: climate_categories.Category,
-        rule: climate_categories.ConversionRule,
-        message: str,
-    ):
-        full_message = (
-            f"Can not derive data for category {category!r} using rule"
-            f" '{rule}': {message} Skipping this rule."
-        )
-        ValueError.__init__(self, full_message)
-
-
-def derive_weights(
-    *,
-    dim: str,
-    category: climate_categories.Category,
-    rule: climate_categories.ConversionRule,
-    operation_type: str,
-) -> xr.DataArray | float:
-    """Derive the weights to use for applying a specific rule.
-
-    Parameters
-    ----------
-    dim: str
-        Dimension which contains the categories.
-    category: climate_categories.Category
-        Category which should be derived.
-    rule: climate_categories.ConversionRule
-        Rule that should be used to derive the category.
-    operation_type: ``input`` or ``output``
-        If weights for the source data (input) or the result data (output) should
-        be derived.
-
-    Returns
-    -------
-    factors: float or xr.DataArray
-        Object which can be multiplied with the input or output DataArray to apply
-        weights.
-    """
-    # TODO this may change again in the next PR
-    if operation_type == "input":
-        return 1.0
-    elif operation_type == "output":
-        if rule.cardinality_b == "one":
-            return 1.0
-        else:
-            raise NotImplementedError(
-                "Splitting input categories into multiple"
-                " output categories is currently not supported. "
-                f"{rule.csv_original_text=}, {category=}"
-            )
-    else:
-        raise NotImplementedError(
-            f"operation_type must be either input or output. Got {operation_type}"
-        )
-
-
 def prepare_auxiliary_dimensions(
     conversion: climate_categories.Conversion,
     auxiliary_dimensions: dict[str, str] | None,

diff --git a/primap2/tests/data/BURDI_conversion.csv b/primap2/tests/data/BURDI_conversion.csv
@@ -17,16 +17,19 @@ BURDI,IPCC2006_PRIMAP,comment
 2.C,2.C
 2.F,2.F
 2.G + 2.D, 2.H
+2.G, 2.H.3
 3,2.D
 4,M.AG
 4.A,3.A.1
 4.B,3.A.2
 4.C,3.C.7
+4.D, 3.C.45.AG
 4.D + 4.C + 4.E + 4.F + 4.G,3.C
 4.E,3.C.1.c
 4.F,3.C.1.b
 4.G,3.C.8
 5,M.LULUCF
+4+5,3
 6,4
 6.A,4.A
 6.B,4.D

diff --git a/primap2/tests/test_convert.py b/primap2/tests/test_convert.py
@@ -40,7 +40,6 @@ def test_conversion_source_does_not_match_dataset_dimension(empty_ds):
         )
 
 
-@pytest.mark.xfail
 def test_convert_ipcc(empty_ds: xr.Dataset):
     # build a DA categorized by IPCC1996 and with 1 everywhere so results are easy
     # to see
@@ -64,12 +63,44 @@ def test_convert_ipcc(empty_ds: xr.Dataset):
         conversion=conversion,
         auxiliary_dimensions={"gas": "source (gas)"},
     )
-
+    # rule 1 -> 1
     assert (result.pr.loc[{"category": "1"}] == 1.0 * primap2.ureg("Gg CO2 / year")).all().item()
+    # rule 2 + 3 -> 2
     assert (result.pr.loc[{"category": "2"}] == 2.0 * primap2.ureg("Gg CO2 / year")).all().item()
+    # rule 1.A.2.f -> 1.A.2.f + 1.A.2.g + 1.A.2.h + 1.A.2.i + 1.A.2.j + 1.A.2.k + 1.A.2.l + 1.A.2.m
+    mcat = "M_1.A.2.f_1.A.2.g_1.A.2.h_1.A.2.i_1.A.2.j_1.A.2.k_1.A.2.l_1.A.2.m"
+    assert (result.pr.loc[{"category": mcat}] == 8.0 * primap2.ureg("Gg CO2 / year")).all().item()
+    # rule 4.D for N2O only -> 3.C.4 + 3.C.5
+    mcat = "M_3.C.4_3.C.5"
+    assert (
+        (
+            result.pr.loc[{"category": mcat, "source (gas)": "N2O"}]
+            == 2.0 * primap2.ureg("Gg CO2 / year")
+        )
+        .all()
+        .item()
+    )
+    # all other gases should be nan
+    all_gases_but_N2O = list(result.indexes["source (gas)"])
+    all_gases_but_N2O.remove("N2O")
+    assert np.isnan(
+        result.pr.loc[{"category": mcat, "source (gas)": all_gases_but_N2O}].values
+    ).all()
+    # rule 7 -> 5
+    assert (result.pr.loc[{"category": "5"}] == 1.0 * primap2.ureg("Gg CO2 / year")).all().item()
+    # rule 2.F.6 -> 2.E + 2.F.6 + 2.G.1 + 2.G.2 + 2.G.4,
+    # rule 2.F.6 + 3.D -> 2.E + 2.F.6 + 2.G - ignored because 2.F.G already converted
+    # rule 2.G -> 2.H.3 - 1-to-1-conversion
+    mcat = "M_2.E_2.F.6_2.G.1_2.G.2_2.G.4"
+    assert (result.pr.loc[{"category": mcat}] == 5.0 * primap2.ureg("Gg CO2 / year")).all().item()
+    assert "M_2.E_2.F.6_2.G" not in list(result.indexes["category (IPCC2006)"])
+    assert (
+        (result.pr.loc[{"category": "2.H.3"}] == 1.0 * primap2.ureg("Gg CO2 / year")).all().item()
+    )
 
 
 # test with new conversion and two existing categorisations
+@pytest.mark.xfail
 def test_convert_BURDI(empty_ds: xr.Dataset):
     # make a sample conversion object in climate categories
     filepath = get_test_data_filepath("BURDI_conversion.csv")
@@ -150,12 +181,24 @@ def test_convert_BURDI(empty_ds: xr.Dataset):
     assert (
         (result.pr.loc[{"category": "3.C.7"}] == 1.0 * primap2.ureg("Gg CO2 / year")).all().item()
     )
-    # 2.E + 2.B = 2.E, 2.E should not be part of new data set
+    # rule 2.E + 2.B -> 2.B
+    # 2.B is part of PRIMAP categories, but cannot be retrieved from conversion
     assert np.isnan(result.pr.loc[{"category": "2.E"}].values).all()
     # cat 14638 in BURDI equals cat M.BIO in IPCC2006_PRIMAP
     assert (
         (result.pr.loc[{"category": "M.BIO"}] == 1.0 * primap2.ureg("Gg CO2 / year")).all().item()
     )
+    # 4.D -> M.3.C.45.AG
+    # This category is only available on M3C45AG branch in climate categories
+    # test locally with:
+    # `source venv/bin/activate`
+    # `pip install -e ../climate_categories`
+    # Will pass after climate categories release
+    assert (
+        (result.pr.loc[{"category": "3.C.45.AG"}] == 1.0 * primap2.ureg("Gg CO2 / year"))
+        .all()
+        .item()
+    )
 
 
 # test with new conversion and new categorisations
@@ -198,5 +241,5 @@ def test_custom_conversion_and_two_custom_categorisations(empty_ds):
     assert (result.pr.loc[{"category": "2"}] == 2.0 * primap2.ureg("Gg CO2 / year")).all().item()
 
     # check result has 2 categories (input categorisation had 3)
-    # TODO this is ambiguous when order changes
+    # TODO this is ambiguous, order may change
     assert result.shape == (2, 21, 4, 1)