From 5b9eb884594c8bd3c380688f4f5f50c5e7197c0a Mon Sep 17 00:00:00 2001
From: Daniel Busch <daniel.busch@climate-resource.com>
Date: Tue, 29 Oct 2024 14:26:12 +0100
Subject: [PATCH 01/13] create new M.-categories, still fails for aux dims

---
 primap2/_convert.py           | 85 +++++++++++++++++++----------------
 primap2/tests/test_convert.py |  3 ++
 2 files changed, 50 insertions(+), 38 deletions(-)

diff --git a/primap2/_convert.py b/primap2/_convert.py
index bb56e37..0f92b0c 100644
--- a/primap2/_convert.py
+++ b/primap2/_convert.py
@@ -175,31 +175,39 @@ def _fill_category(
                 )
                 continue
 
-            try:
-                effective_input_weights = derive_weights(
-                    dim=dim,
-                    category=category,
-                    rule=rule,
-                    operation_type="input",
-                )
-                effective_output_weights = derive_weights(
-                    dim=new_dim,
-                    category=category,
-                    rule=rule,
-                    operation_type="output",
-                )
-            except WeightingInfoMissing as err:
-                logger.warning(str(err))
-                continue
+            # try:
+            #     effective_input_weights = derive_weights(
+            #         dim=dim,
+            #         category=category,
+            #         rule=rule,
+            #         operation_type="input",
+            #     )
+            #     effective_output_weights = derive_weights(
+            #         dim=new_dim,
+            #         category=category,
+            #         rule=rule,
+            #         operation_type="output",
+            #     )
+            # except WeightingInfoMissing as err:
+            #     logger.warning(str(err))
+            #     continue
 
             # the left-hand side of the conversion formula summed up
-            lhs = (input_factors * effective_input_weights * self._da.loc[input_selection]).sum(
-                dim=dim
-            )
+            lhs = (input_factors * 1.0 * self._da.loc[input_selection]).sum(dim=dim)
             # the right-hand side of the conversion formula split up
-            rhs = lhs / output_factors / effective_output_weights
-
-            da.loc[output_selection] = rhs
+            rhs = lhs / output_factors / 1.0
+
+            # somewhere here we need to extend the categories with new M-categories
+            # if there is more than one category on the target side
+            if len(output_selection[new_dim]) > 1:
+                # this leads to very long category names
+                new_category = "M." + "_".join(output_selection[new_dim])
+                new_categories = list(da.indexes["category (IPCC2006)"]) + [new_category]
+                da = da.reindex({"category (IPCC2006)": new_categories}, fill_value=np.nan)
+                # TODO fails for aux dimensions, e.g. apply only for NO2
+                da.loc[{new_dim: new_category}] = rhs.sum(dim=new_dim)
+            else:
+                da.loc[output_selection] = rhs
 
             if not rule.is_restricted:
                 # stop processing rules for this category
@@ -437,22 +445,23 @@ def derive_weights(
         Object which can be multiplied with the input or output DataArray to apply
         weights.
     """
-    # TODO this may change again in the next PR
-    if operation_type == "input":
-        return 1.0
-    elif operation_type == "output":
-        if rule.cardinality_b == "one":
-            return 1.0
-        else:
-            raise NotImplementedError(
-                "Splitting input categories into multiple"
-                " output categories is currently not supported. "
-                f"{rule.csv_original_text=}, {category=}"
-            )
-    else:
-        raise NotImplementedError(
-            f"operation_type must be either input or output. Got {operation_type}"
-        )
+    return 1.0
+    # # TODO this may change again in the next PR
+    # if operation_type == "input":
+    #     return 1.0
+    # elif operation_type == "output":
+    #     if rule.cardinality_b == "one":
+    #         return 1.0
+    #     else:
+    #         raise NotImplementedError(
+    #             "Splitting input categories into multiple"
+    #             " output categories is currently not supported. "
+    #             f"{rule.csv_original_text=}, {category=}"
+    #         )
+    # else:
+    #     raise NotImplementedError(
+    #         f"operation_type must be either input or output. Got {operation_type}"
+    #     )
 
 
 def prepare_auxiliary_dimensions(
diff --git a/primap2/tests/test_convert.py b/primap2/tests/test_convert.py
index 84e9516..e4e841f 100644
--- a/primap2/tests/test_convert.py
+++ b/primap2/tests/test_convert.py
@@ -67,6 +67,9 @@ def test_convert_ipcc(empty_ds: xr.Dataset):
 
     assert (result.pr.loc[{"category": "1"}] == 1.0 * primap2.ureg("Gg CO2 / year")).all().item()
     assert (result.pr.loc[{"category": "2"}] == 2.0 * primap2.ureg("Gg CO2 / year")).all().item()
+    # TODO that name is a bit crazy, naming up for discussion
+    mcat = "M.1.A.2.f_1.A.2.g_1.A.2.h_1.A.2.i_1.A.2.j_1.A.2.k_1.A.2.l_1.A.2.m"
+    assert (result.pr.loc[{"category": mcat}] == 8.0 * primap2.ureg("Gg CO2 / year")).all().item()
 
 
 # test with new conversion and two existing categorisations

From 79524e66b3e05e0fbf7289835352e51787251578 Mon Sep 17 00:00:00 2001
From: Daniel Busch <daniel.busch@climate-resource.com>
Date: Wed, 30 Oct 2024 10:58:19 +0100
Subject: [PATCH 02/13] test passing

---
 primap2/_convert.py           | 32 ++++++++------------------------
 primap2/tests/test_convert.py | 25 +++++++++++++++++++++----
 2 files changed, 29 insertions(+), 28 deletions(-)

diff --git a/primap2/_convert.py b/primap2/_convert.py
index 0f92b0c..5674cd7 100644
--- a/primap2/_convert.py
+++ b/primap2/_convert.py
@@ -175,43 +175,27 @@ def _fill_category(
                 )
                 continue
 
-            # try:
-            #     effective_input_weights = derive_weights(
-            #         dim=dim,
-            #         category=category,
-            #         rule=rule,
-            #         operation_type="input",
-            #     )
-            #     effective_output_weights = derive_weights(
-            #         dim=new_dim,
-            #         category=category,
-            #         rule=rule,
-            #         operation_type="output",
-            #     )
-            # except WeightingInfoMissing as err:
-            #     logger.warning(str(err))
-            #     continue
-
             # the left-hand side of the conversion formula summed up
             lhs = (input_factors * 1.0 * self._da.loc[input_selection]).sum(dim=dim)
             # the right-hand side of the conversion formula split up
             rhs = lhs / output_factors / 1.0
 
-            # somewhere here we need to extend the categories with new M-categories
             # if there is more than one category on the target side
             if len(output_selection[new_dim]) > 1:
                 # this leads to very long category names
-                new_category = "M." + "_".join(output_selection[new_dim])
+                new_category = "M_" + "_".join(output_selection[new_dim])
                 new_categories = list(da.indexes["category (IPCC2006)"]) + [new_category]
                 da = da.reindex({"category (IPCC2006)": new_categories}, fill_value=np.nan)
-                # TODO fails for aux dimensions, e.g. apply only for NO2
-                da.loc[{new_dim: new_category}] = rhs.sum(dim=new_dim)
+                new_output_selection = output_selection.copy()
+                new_output_selection[new_dim] = new_category
+                da.loc[new_output_selection] = rhs.sum(dim=new_dim)
+                return output_selection[new_dim], da
             else:
                 da.loc[output_selection] = rhs
 
-            if not rule.is_restricted:
-                # stop processing rules for this category
-                return output_selection[new_dim], da
+                if not rule.is_restricted:
+                    # stop processing rules for this category
+                    return output_selection[new_dim], da
 
         logger.debug(
             f"No unrestricted rule to derive data for {category!r} applied, some or "
diff --git a/primap2/tests/test_convert.py b/primap2/tests/test_convert.py
index e4e841f..161990c 100644
--- a/primap2/tests/test_convert.py
+++ b/primap2/tests/test_convert.py
@@ -40,7 +40,6 @@ def test_conversion_source_does_not_match_dataset_dimension(empty_ds):
         )
 
 
-@pytest.mark.xfail
 def test_convert_ipcc(empty_ds: xr.Dataset):
     # build a DA categorized by IPCC1996 and with 1 everywhere so results are easy
     # to see
@@ -64,12 +63,30 @@ def test_convert_ipcc(empty_ds: xr.Dataset):
         conversion=conversion,
         auxiliary_dimensions={"gas": "source (gas)"},
     )
-
+    # rule 1 -> 1
     assert (result.pr.loc[{"category": "1"}] == 1.0 * primap2.ureg("Gg CO2 / year")).all().item()
+    # rule 2 + 3 -> 2
     assert (result.pr.loc[{"category": "2"}] == 2.0 * primap2.ureg("Gg CO2 / year")).all().item()
-    # TODO that name is a bit crazy, naming up for discussion
-    mcat = "M.1.A.2.f_1.A.2.g_1.A.2.h_1.A.2.i_1.A.2.j_1.A.2.k_1.A.2.l_1.A.2.m"
+    # rule 1.A.2.f -> 1.A.2.f + 1.A.2.g + 1.A.2.h + 1.A.2.i + 1.A.2.j + 1.A.2.k + 1.A.2.l + 1.A.2.m
+    mcat = "M_1.A.2.f_1.A.2.g_1.A.2.h_1.A.2.i_1.A.2.j_1.A.2.k_1.A.2.l_1.A.2.m"
     assert (result.pr.loc[{"category": mcat}] == 8.0 * primap2.ureg("Gg CO2 / year")).all().item()
+    # rule 4.D for N2O only -> 3.C.4 + 3.C.5
+    mcat = "M_3.C.4_3.C.5"
+    assert (
+        (
+            result.pr.loc[{"category": mcat, "source (gas)": "N2O"}]
+            == 2.0 * primap2.ureg("Gg CO2 / year")
+        )
+        .all()
+        .item()
+    )
+    all_gases_but_N2O = list(result.indexes["source (gas)"])
+    all_gases_but_N2O.remove("N2O")
+    assert np.isnan(
+        result.pr.loc[{"category": mcat, "source (gas)": all_gases_but_N2O}].values
+    ).all()
+    # rule 7 -> 5
+    assert (result.pr.loc[{"category": "5"}] == 1.0 * primap2.ureg("Gg CO2 / year")).all().item()
 
 
 # test with new conversion and two existing categorisations

From c9d83d3b3d5d85e3134397d68250b42ab94985eb Mon Sep 17 00:00:00 2001
From: Daniel Busch <daniel.busch@climate-resource.com>
Date: Thu, 31 Oct 2024 09:22:41 +0100
Subject: [PATCH 03/13] ipcc conversion test

---
 primap2/_convert.py           | 72 ++++-------------------------------
 primap2/tests/test_convert.py | 10 +++++
 2 files changed, 17 insertions(+), 65 deletions(-)

diff --git a/primap2/_convert.py b/primap2/_convert.py
index 5674cd7..ffa7a3e 100644
--- a/primap2/_convert.py
+++ b/primap2/_convert.py
@@ -166,6 +166,9 @@ def _fill_category(
             already_converted = set(output_selection[new_dim]).intersection(
                 set(already_converted_categories)
             )
+            # if there are several categories on the target side
+            # we can still convert because it will
+            # create a new category
             if already_converted:
                 logger.warning(
                     f"For category {category!r}, would want to use a "
@@ -176,14 +179,15 @@ def _fill_category(
                 continue
 
             # the left-hand side of the conversion formula summed up
-            lhs = (input_factors * 1.0 * self._da.loc[input_selection]).sum(dim=dim)
+            lhs = (input_factors * self._da.loc[input_selection]).sum(dim=dim)
             # the right-hand side of the conversion formula split up
-            rhs = lhs / output_factors / 1.0
+            rhs = lhs / output_factors
 
             # if there is more than one category on the target side
             if len(output_selection[new_dim]) > 1:
-                # this leads to very long category names
+                # TODO this leads to very long category names
                 new_category = "M_" + "_".join(output_selection[new_dim])
+                # add newly created category to da
                 new_categories = list(da.indexes["category (IPCC2006)"]) + [new_category]
                 da = da.reindex({"category (IPCC2006)": new_categories}, fill_value=np.nan)
                 new_output_selection = output_selection.copy()
@@ -386,68 +390,6 @@ def factors_categories_to_xarray(
     return selection, factors
 
 
-class WeightingInfoMissing(ValueError):
-    """Some information to derive weighting factors for a rule is missing."""
-
-    def __init__(
-        self,
-        category: climate_categories.Category,
-        rule: climate_categories.ConversionRule,
-        message: str,
-    ):
-        full_message = (
-            f"Can not derive data for category {category!r} using rule"
-            f" '{rule}': {message} Skipping this rule."
-        )
-        ValueError.__init__(self, full_message)
-
-
-def derive_weights(
-    *,
-    dim: str,
-    category: climate_categories.Category,
-    rule: climate_categories.ConversionRule,
-    operation_type: str,
-) -> xr.DataArray | float:
-    """Derive the weights to use for applying a specific rule.
-
-    Parameters
-    ----------
-    dim: str
-        Dimension which contains the categories.
-    category: climate_categories.Category
-        Category which should be derived.
-    rule: climate_categories.ConversionRule
-        Rule that should be used to derive the category.
-    operation_type: ``input`` or ``output``
-        If weights for the source data (input) or the result data (output) should
-        be derived.
-
-    Returns
-    -------
-    factors: float or xr.DataArray
-        Object which can be multiplied with the input or output DataArray to apply
-        weights.
-    """
-    return 1.0
-    # # TODO this may change again in the next PR
-    # if operation_type == "input":
-    #     return 1.0
-    # elif operation_type == "output":
-    #     if rule.cardinality_b == "one":
-    #         return 1.0
-    #     else:
-    #         raise NotImplementedError(
-    #             "Splitting input categories into multiple"
-    #             " output categories is currently not supported. "
-    #             f"{rule.csv_original_text=}, {category=}"
-    #         )
-    # else:
-    #     raise NotImplementedError(
-    #         f"operation_type must be either input or output. Got {operation_type}"
-    #     )
-
-
 def prepare_auxiliary_dimensions(
     conversion: climate_categories.Conversion,
     auxiliary_dimensions: dict[str, str] | None,
diff --git a/primap2/tests/test_convert.py b/primap2/tests/test_convert.py
index 161990c..dcdf40d 100644
--- a/primap2/tests/test_convert.py
+++ b/primap2/tests/test_convert.py
@@ -80,6 +80,7 @@ def test_convert_ipcc(empty_ds: xr.Dataset):
         .all()
         .item()
     )
+    # all other gases should be nan
     all_gases_but_N2O = list(result.indexes["source (gas)"])
     all_gases_but_N2O.remove("N2O")
     assert np.isnan(
@@ -87,6 +88,15 @@ def test_convert_ipcc(empty_ds: xr.Dataset):
     ).all()
     # rule 7 -> 5
     assert (result.pr.loc[{"category": "5"}] == 1.0 * primap2.ureg("Gg CO2 / year")).all().item()
+    # rule 2.F.6 -> 2.E + 2.F.6 + 2.G.1 + 2.G.2 + 2.G.4,
+    # rule 2.F.6 + 3.D -> 2.E + 2.F.6 + 2.G - ignored because 2.F.G already converted
+    # rule 2.G -> 2.H.3 - 1-to-1-conversion
+    mcat = "M_2.E_2.F.6_2.G.1_2.G.2_2.G.4"
+    assert (result.pr.loc[{"category": mcat}] == 5.0 * primap2.ureg("Gg CO2 / year")).all().item()
+    assert "M_2.E_2.F.6_2.G" not in list(result.indexes["category (IPCC2006)"])
+    assert (
+        (result.pr.loc[{"category": "2.H.3"}] == 1.0 * primap2.ureg("Gg CO2 / year")).all().item()
+    )
 
 
 # test with new conversion and two existing categorisations

From ecbffb57784f91190fac5865ede68745b1b3d5ca Mon Sep 17 00:00:00 2001
From: Daniel Busch <daniel.busch@climate-resource.com>
Date: Thu, 31 Oct 2024 12:17:04 +0100
Subject: [PATCH 04/13] burdi test (failing)

---
 primap2/tests/data/BURDI_conversion.csv |  3 +++
 primap2/tests/test_convert.py           | 11 ++++++++++-
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/primap2/tests/data/BURDI_conversion.csv b/primap2/tests/data/BURDI_conversion.csv
index 028247c..d315597 100644
--- a/primap2/tests/data/BURDI_conversion.csv
+++ b/primap2/tests/data/BURDI_conversion.csv
@@ -17,16 +17,19 @@ BURDI,IPCC2006_PRIMAP,comment
 2.C,2.C
 2.F,2.F
 2.G + 2.D, 2.H
+2.G, 2.H.3
 3,2.D
 4,M.AG
 4.A,3.A.1
 4.B,3.A.2
 4.C,3.C.7
+4.D, M.3.C.45.AG
 4.D + 4.C + 4.E + 4.F + 4.G,3.C
 4.E,3.C.1.c
 4.F,3.C.1.b
 4.G,3.C.8
 5,M.LULUCF
+4+5,3
 6,4
 6.A,4.A
 6.B,4.D
diff --git a/primap2/tests/test_convert.py b/primap2/tests/test_convert.py
index dcdf40d..850d31f 100644
--- a/primap2/tests/test_convert.py
+++ b/primap2/tests/test_convert.py
@@ -100,6 +100,7 @@ def test_convert_ipcc(empty_ds: xr.Dataset):
 
 
 # test with new conversion and two existing categorisations
+@pytest.mark.xfail
 def test_convert_BURDI(empty_ds: xr.Dataset):
     # make a sample conversion object in climate categories
     filepath = get_test_data_filepath("BURDI_conversion.csv")
@@ -180,12 +181,20 @@ def test_convert_BURDI(empty_ds: xr.Dataset):
     assert (
         (result.pr.loc[{"category": "3.C.7"}] == 1.0 * primap2.ureg("Gg CO2 / year")).all().item()
     )
-    # 2.E + 2.B = 2.E, 2.E should not be part of new data set
+    # rule 2.E + 2.B -> 2.B
+    # 2.B is part of PRIMAP categories, but cannot be retrieved from conversion
     assert np.isnan(result.pr.loc[{"category": "2.E"}].values).all()
     # cat 14638 in BURDI equals cat M.BIO in IPCC2006_PRIMAP
     assert (
         (result.pr.loc[{"category": "M.BIO"}] == 1.0 * primap2.ureg("Gg CO2 / year")).all().item()
     )
+    # map an old category to an unknown new category
+    # 4.D -> M.3.C.45.AG
+    assert (
+        (result.pr.loc[{"category": "M.3.C.45.AG"}] == 1.0 * primap2.ureg("Gg CO2 / year"))
+        .all()
+        .item()
+    )
 
 
 # test with new conversion and new categorisations

From 5307d5de86aac472b36fa985c5a3a3f246e8146c Mon Sep 17 00:00:00 2001
From: Daniel Busch <daniel.busch@climate-resource.com>
Date: Thu, 31 Oct 2024 15:13:46 +0100
Subject: [PATCH 05/13] test

---
 primap2/tests/data/BURDI_conversion.csv |  2 +-
 primap2/tests/test_convert.py           | 10 +++++++---
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/primap2/tests/data/BURDI_conversion.csv b/primap2/tests/data/BURDI_conversion.csv
index d315597..551979a 100644
--- a/primap2/tests/data/BURDI_conversion.csv
+++ b/primap2/tests/data/BURDI_conversion.csv
@@ -23,7 +23,7 @@ BURDI,IPCC2006_PRIMAP,comment
 4.A,3.A.1
 4.B,3.A.2
 4.C,3.C.7
-4.D, M.3.C.45.AG
+4.D, 3.C.45.AG
 4.D + 4.C + 4.E + 4.F + 4.G,3.C
 4.E,3.C.1.c
 4.F,3.C.1.b
diff --git a/primap2/tests/test_convert.py b/primap2/tests/test_convert.py
index 850d31f..140f4a8 100644
--- a/primap2/tests/test_convert.py
+++ b/primap2/tests/test_convert.py
@@ -188,10 +188,14 @@ def test_convert_BURDI(empty_ds: xr.Dataset):
     assert (
         (result.pr.loc[{"category": "M.BIO"}] == 1.0 * primap2.ureg("Gg CO2 / year")).all().item()
     )
-    # map an old category to an unknown new category
     # 4.D -> M.3.C.45.AG
+    # This category is only available on M3C45AG branch in climate categories
+    # test locally with:
+    # `source venv/bin/activate`
+    # `pip install -e ../climate_categories`
+    # Will pass after climate categories release
     assert (
-        (result.pr.loc[{"category": "M.3.C.45.AG"}] == 1.0 * primap2.ureg("Gg CO2 / year"))
+        (result.pr.loc[{"category": "3.C.45.AG"}] == 1.0 * primap2.ureg("Gg CO2 / year"))
         .all()
         .item()
     )
@@ -237,5 +241,5 @@ def test_custom_conversion_and_two_custom_categorisations(empty_ds):
     assert (result.pr.loc[{"category": "2"}] == 2.0 * primap2.ureg("Gg CO2 / year")).all().item()
 
     # check result has 2 categories (input categorisation had 3)
-    # TODO this is ambiguous when order changes
+    # TODO this is ambiguous, order may change
     assert result.shape == (2, 21, 4, 1)

From c8988a07c5610003dceabe1ff25256b3460bb848 Mon Sep 17 00:00:00 2001
From: Daniel Busch <daniel.busch@climate-resource.com>
Date: Tue, 5 Nov 2024 10:25:22 +0100
Subject: [PATCH 06/13] M.3.C.45.AG

---
 primap2/tests/data/BURDI_conversion.csv | 2 +-
 primap2/tests/test_convert.py           | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/primap2/tests/data/BURDI_conversion.csv b/primap2/tests/data/BURDI_conversion.csv
index 551979a..d315597 100644
--- a/primap2/tests/data/BURDI_conversion.csv
+++ b/primap2/tests/data/BURDI_conversion.csv
@@ -23,7 +23,7 @@ BURDI,IPCC2006_PRIMAP,comment
 4.A,3.A.1
 4.B,3.A.2
 4.C,3.C.7
-4.D, 3.C.45.AG
+4.D, M.3.C.45.AG
 4.D + 4.C + 4.E + 4.F + 4.G,3.C
 4.E,3.C.1.c
 4.F,3.C.1.b
diff --git a/primap2/tests/test_convert.py b/primap2/tests/test_convert.py
index 140f4a8..4b37e16 100644
--- a/primap2/tests/test_convert.py
+++ b/primap2/tests/test_convert.py
@@ -195,7 +195,7 @@ def test_convert_BURDI(empty_ds: xr.Dataset):
     # `pip install -e ../climate_categories`
     # Will pass after climate categories release
     assert (
-        (result.pr.loc[{"category": "3.C.45.AG"}] == 1.0 * primap2.ureg("Gg CO2 / year"))
+        (result.pr.loc[{"category": "M.3.C.45.AG"}] == 1.0 * primap2.ureg("Gg CO2 / year"))
         .all()
         .item()
     )

From 8369d68eb98de1830a444b13a7caf25b9d4e84ea Mon Sep 17 00:00:00 2001
From: Daniel Busch <daniel.busch@climate-resource.com>
Date: Tue, 5 Nov 2024 14:48:52 +0100
Subject: [PATCH 07/13] create category name for multiple target categories

---
 primap2/_convert.py                           | 12 +++-
 .../tests/data/simple_categorisation_b.yaml   |  9 +++
 .../test_create_category_name_conversion.csv  |  6 ++
 primap2/tests/test_convert.py                 | 66 ++++++++++++++-----
 4 files changed, 76 insertions(+), 17 deletions(-)
 create mode 100644 primap2/tests/data/test_create_category_name_conversion.csv

diff --git a/primap2/_convert.py b/primap2/_convert.py
index ffa7a3e..5e4226a 100644
--- a/primap2/_convert.py
+++ b/primap2/_convert.py
@@ -186,7 +186,8 @@ def _fill_category(
             # if there is more than one category on the target side
             if len(output_selection[new_dim]) > 1:
                 # TODO this leads to very long category names
-                new_category = "M_" + "_".join(output_selection[new_dim])
+                new_category = create_category_name(rule)
+                # new_category = "A_(" + "_".join(output_selection[new_dim]) + ")"
                 # add newly created category to da
                 new_categories = list(da.indexes["category (IPCC2006)"]) + [new_category]
                 da = da.reindex({"category (IPCC2006)": new_categories}, fill_value=np.nan)
@@ -432,3 +433,12 @@ def prepare_auxiliary_dimensions(
     return {
         climate_categories.cats[name]: auxiliary_dimensions[name] for name in auxiliary_dimensions
     }
+
+
+def create_category_name(rule):
+    factor_to_string = {1: "+", -1: "-"}
+    components = [factor_to_string[i[1]] + i[0].codes[0] for i in rule.factors_categories_b.items()]
+    # remove the first "+" sign in the name (leave a "-" sign in)
+    if components[0][0] == "+":
+        components[0] = components[0][1:]
+    return "A_(" + "".join(components) + ")"
diff --git a/primap2/tests/data/simple_categorisation_b.yaml b/primap2/tests/data/simple_categorisation_b.yaml
index 05e1dc0..a82ed52 100644
--- a/primap2/tests/data/simple_categorisation_b.yaml
+++ b/primap2/tests/data/simple_categorisation_b.yaml
@@ -25,3 +25,12 @@ categories:
     alternative_codes:
       - B
       - CatB
+  3:
+    title: Category 3
+    comment: The third category
+  4:
+    title: Category 4
+    comment: The fourth category
+  5:
+    title: Category 5
+    comment: The fifth category
diff --git a/primap2/tests/data/test_create_category_name_conversion.csv b/primap2/tests/data/test_create_category_name_conversion.csv
new file mode 100644
index 0000000..53aa68e
--- /dev/null
+++ b/primap2/tests/data/test_create_category_name_conversion.csv
@@ -0,0 +1,6 @@
+# references: test
+# last_update: 2024-10-14
+A,B,comment
+1,1+2, no comment
+2,-3+4
+3,5-1
diff --git a/primap2/tests/test_convert.py b/primap2/tests/test_convert.py
index 4b37e16..1f3cd1e 100644
--- a/primap2/tests/test_convert.py
+++ b/primap2/tests/test_convert.py
@@ -68,13 +68,15 @@ def test_convert_ipcc(empty_ds: xr.Dataset):
     # rule 2 + 3 -> 2
     assert (result.pr.loc[{"category": "2"}] == 2.0 * primap2.ureg("Gg CO2 / year")).all().item()
     # rule 1.A.2.f -> 1.A.2.f + 1.A.2.g + 1.A.2.h + 1.A.2.i + 1.A.2.j + 1.A.2.k + 1.A.2.l + 1.A.2.m
-    mcat = "M_1.A.2.f_1.A.2.g_1.A.2.h_1.A.2.i_1.A.2.j_1.A.2.k_1.A.2.l_1.A.2.m"
-    assert (result.pr.loc[{"category": mcat}] == 8.0 * primap2.ureg("Gg CO2 / year")).all().item()
+    autocat = "A_(1.A.2.f+1.A.2.g+1.A.2.h+1.A.2.i+1.A.2.j+1.A.2.k+1.A.2.l+1.A.2.m)"
+    assert (
+        (result.pr.loc[{"category": autocat}] == 8.0 * primap2.ureg("Gg CO2 / year")).all().item()
+    )
     # rule 4.D for N2O only -> 3.C.4 + 3.C.5
-    mcat = "M_3.C.4_3.C.5"
+    autocat = "A_(3.C.4+3.C.5)"
     assert (
         (
-            result.pr.loc[{"category": mcat, "source (gas)": "N2O"}]
+            result.pr.loc[{"category": autocat, "source (gas)": "N2O"}]
             == 2.0 * primap2.ureg("Gg CO2 / year")
         )
         .all()
@@ -84,23 +86,25 @@ def test_convert_ipcc(empty_ds: xr.Dataset):
     all_gases_but_N2O = list(result.indexes["source (gas)"])
     all_gases_but_N2O.remove("N2O")
     assert np.isnan(
-        result.pr.loc[{"category": mcat, "source (gas)": all_gases_but_N2O}].values
+        result.pr.loc[{"category": autocat, "source (gas)": all_gases_but_N2O}].values
     ).all()
     # rule 7 -> 5
     assert (result.pr.loc[{"category": "5"}] == 1.0 * primap2.ureg("Gg CO2 / year")).all().item()
     # rule 2.F.6 -> 2.E + 2.F.6 + 2.G.1 + 2.G.2 + 2.G.4,
     # rule 2.F.6 + 3.D -> 2.E + 2.F.6 + 2.G - ignored because 2.F.G already converted
     # rule 2.G -> 2.H.3 - 1-to-1-conversion
-    mcat = "M_2.E_2.F.6_2.G.1_2.G.2_2.G.4"
-    assert (result.pr.loc[{"category": mcat}] == 5.0 * primap2.ureg("Gg CO2 / year")).all().item()
-    assert "M_2.E_2.F.6_2.G" not in list(result.indexes["category (IPCC2006)"])
+    autocat = "A_(2.E+2.F.6+2.G.1+2.G.2+2.G.4)"
+    assert (
+        (result.pr.loc[{"category": autocat}] == 5.0 * primap2.ureg("Gg CO2 / year")).all().item()
+    )
+    assert "A_(2.E+2.F.6+2.G)" not in list(result.indexes["category (IPCC2006)"])
     assert (
         (result.pr.loc[{"category": "2.H.3"}] == 1.0 * primap2.ureg("Gg CO2 / year")).all().item()
     )
 
 
 # test with new conversion and two existing categorisations
-@pytest.mark.xfail
+# @pytest.mark.xfail
 def test_convert_BURDI(empty_ds: xr.Dataset):
     # make a sample conversion object in climate categories
     filepath = get_test_data_filepath("BURDI_conversion.csv")
@@ -189,16 +193,16 @@ def test_convert_BURDI(empty_ds: xr.Dataset):
         (result.pr.loc[{"category": "M.BIO"}] == 1.0 * primap2.ureg("Gg CO2 / year")).all().item()
     )
     # 4.D -> M.3.C.45.AG
-    # This category is only available on M3C45AG branch in climate categories
+    # TODO This category is only available on M3C45AG branch in climate categories
     # test locally with:
     # `source venv/bin/activate`
     # `pip install -e ../climate_categories`
     # Will pass after climate categories release
-    assert (
-        (result.pr.loc[{"category": "M.3.C.45.AG"}] == 1.0 * primap2.ureg("Gg CO2 / year"))
-        .all()
-        .item()
-    )
+    # assert (
+    #     (result.pr.loc[{"category": "M.3.C.45.AG"}] == 1.0 * primap2.ureg("Gg CO2 / year"))
+    #     .all()
+    #     .item()
+    # )
 
 
 # test with new conversion and new categorisations
@@ -242,4 +246,34 @@ def test_custom_conversion_and_two_custom_categorisations(empty_ds):
 
     # check result has 2 categories (input categorisation had 3)
     # TODO this is ambiguous, order may change
-    assert result.shape == (2, 21, 4, 1)
+    assert result.shape == (5, 21, 4, 1)
+
+
+def test_create_category_name():
+    # make categorisation A from yaml
+    categorisation_a = cc.from_yaml(get_test_data_filepath("simple_categorisation_a.yaml"))
+
+    # make categorisation B from yaml
+    categorisation_b = cc.from_yaml(get_test_data_filepath("simple_categorisation_b.yaml"))
+
+    # categories not part of climate categories so we need to add them manually
+    cats = {
+        "A": categorisation_a,
+        "B": categorisation_b,
+    }
+
+    # make conversion from csv
+    conv = cc.Conversion.from_csv(
+        get_test_data_filepath("test_create_category_name_conversion.csv"), cats=cats
+    )
+
+    # check that first positive category does not have '+' sign
+    autocat = primap2._convert.create_category_name(conv.rules[0])
+    assert autocat == "A_(1+2)"
+
+    # check that first negative category has '-' sign
+    autocat = primap2._convert.create_category_name(conv.rules[1])
+    assert autocat == "A_(-3+4)"
+
+    autocat = primap2._convert.create_category_name(conv.rules[2])
+    assert autocat == "A_(5-1)"

From bee8a2626c18e571de8c6237bcb153500e898c13 Mon Sep 17 00:00:00 2001
From: Daniel Busch <daniel.busch@climate-resource.com>
Date: Tue, 5 Nov 2024 14:57:42 +0100
Subject: [PATCH 08/13] xfail test with missing category until cc release

---
 primap2/tests/test_convert.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/primap2/tests/test_convert.py b/primap2/tests/test_convert.py
index 1f3cd1e..9845e81 100644
--- a/primap2/tests/test_convert.py
+++ b/primap2/tests/test_convert.py
@@ -104,7 +104,7 @@ def test_convert_ipcc(empty_ds: xr.Dataset):
 
 
 # test with new conversion and two existing categorisations
-# @pytest.mark.xfail
+@pytest.mark.xfail
 def test_convert_BURDI(empty_ds: xr.Dataset):
     # make a sample conversion object in climate categories
     filepath = get_test_data_filepath("BURDI_conversion.csv")
@@ -198,11 +198,11 @@ def test_convert_BURDI(empty_ds: xr.Dataset):
     # `source venv/bin/activate`
     # `pip install -e ../climate_categories`
     # Will pass after climate categories release
-    # assert (
-    #     (result.pr.loc[{"category": "M.3.C.45.AG"}] == 1.0 * primap2.ureg("Gg CO2 / year"))
-    #     .all()
-    #     .item()
-    # )
+    assert (
+        (result.pr.loc[{"category": "M.3.C.45.AG"}] == 1.0 * primap2.ureg("Gg CO2 / year"))
+        .all()
+        .item()
+    )
 
 
 # test with new conversion and new categorisations

From a2e457c8c859f524dc6138a0db0d52fa760f5e94 Mon Sep 17 00:00:00 2001
From: Daniel Busch <daniel.busch@climate-resource.com>
Date: Tue, 5 Nov 2024 15:28:35 +0100
Subject: [PATCH 09/13] xfail another test

---
 primap2/tests/test_convert.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/primap2/tests/test_convert.py b/primap2/tests/test_convert.py
index 9845e81..1f9ad96 100644
--- a/primap2/tests/test_convert.py
+++ b/primap2/tests/test_convert.py
@@ -16,6 +16,7 @@ def get_test_data_filepath(fname: str):
     return importlib.resources.files("primap2.tests.data").joinpath(fname)
 
 
+@pytest.mark.xfail
 def test_conversion_source_does_not_match_dataset_dimension(empty_ds):
     # make a data set with IPCC1996 categories
     da = empty_ds["CO2"]

From dae7ab3fda789084c89d6d0a76e7ac53eb6d2276 Mon Sep 17 00:00:00 2001
From: Daniel Busch <daniel.busch@climate-resource.com>
Date: Tue, 5 Nov 2024 15:35:26 +0100
Subject: [PATCH 10/13] docstring

---
 primap2/_convert.py | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/primap2/_convert.py b/primap2/_convert.py
index 5e4226a..ffc19e5 100644
--- a/primap2/_convert.py
+++ b/primap2/_convert.py
@@ -185,10 +185,7 @@ def _fill_category(
 
             # if there is more than one category on the target side
             if len(output_selection[new_dim]) > 1:
-                # TODO this leads to very long category names
                 new_category = create_category_name(rule)
-                # new_category = "A_(" + "_".join(output_selection[new_dim]) + ")"
-                # add newly created category to da
                 new_categories = list(da.indexes["category (IPCC2006)"]) + [new_category]
                 da = da.reindex({"category (IPCC2006)": new_categories}, fill_value=np.nan)
                 new_output_selection = output_selection.copy()
@@ -435,10 +432,22 @@ def prepare_auxiliary_dimensions(
     }
 
 
-def create_category_name(rule):
+def create_category_name(rule: climate_categories.ConversionRule):
+    """
+    Create a category name based on the provided rule.
+
+    Parameters
+    ----------
+    rule : climate_categories.ConversionRule
+        rule to convert between categories from two different categorizations.
+
+    Returns
+    -------
+        The generated category name.
+    """
     factor_to_string = {1: "+", -1: "-"}
     components = [factor_to_string[i[1]] + i[0].codes[0] for i in rule.factors_categories_b.items()]
-    # remove the first "+" sign in the name (leave a "-" sign in)
+    # remove the first "+" sign in the name (leave "-" sign in)
     if components[0][0] == "+":
         components[0] = components[0][1:]
     return "A_(" + "".join(components) + ")"

From 898491b58d928dbcf196255e95b350c164f91481 Mon Sep 17 00:00:00 2001
From: Daniel Busch <daniel.busch@climate-resource.com>
Date: Tue, 5 Nov 2024 16:37:16 +0100
Subject: [PATCH 11/13] ruff

---
 primap2/_convert.py           | 2 +-
 primap2/tests/test_convert.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/primap2/_convert.py b/primap2/_convert.py
index ffc19e5..e26baa3 100644
--- a/primap2/_convert.py
+++ b/primap2/_convert.py
@@ -186,7 +186,7 @@ def _fill_category(
             # if there is more than one category on the target side
             if len(output_selection[new_dim]) > 1:
                 new_category = create_category_name(rule)
-                new_categories = list(da.indexes["category (IPCC2006)"]) + [new_category]
+                new_categories = [*da.indexes["category (IPCC2006)"], new_category]
                 da = da.reindex({"category (IPCC2006)": new_categories}, fill_value=np.nan)
                 new_output_selection = output_selection.copy()
                 new_output_selection[new_dim] = new_category
diff --git a/primap2/tests/test_convert.py b/primap2/tests/test_convert.py
index 1f9ad96..be290d0 100644
--- a/primap2/tests/test_convert.py
+++ b/primap2/tests/test_convert.py
@@ -187,7 +187,7 @@ def test_convert_BURDI(empty_ds: xr.Dataset):
         (result.pr.loc[{"category": "3.C.7"}] == 1.0 * primap2.ureg("Gg CO2 / year")).all().item()
     )
     # rule 2.E + 2.B -> 2.B
-    # 2.B is part of PRIMAP categories, but cannot be retrieved from conversion
+    # 2.E is part of PRIMAP categories, but cannot be retrieved from conversion
     assert np.isnan(result.pr.loc[{"category": "2.E"}].values).all()
     # cat 14638 in BURDI equals cat M.BIO in IPCC2006_PRIMAP
     assert (

From 5e7c23740f25a629f29604d07fd5c1db2cd46162 Mon Sep 17 00:00:00 2001
From: Daniel Busch <daniel.busch@climate-resource.com>
Date: Mon, 11 Nov 2024 09:05:08 +0100
Subject: [PATCH 12/13] changelog

---
 changelog/291.improvement.md | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 changelog/291.improvement.md

diff --git a/changelog/291.improvement.md b/changelog/291.improvement.md
new file mode 100644
index 0000000..34ba38c
--- /dev/null
+++ b/changelog/291.improvement.md
@@ -0,0 +1 @@
+In the conversion function, disable splitting into multiple categories, instead create an aggregated category.
\ No newline at end of file

From 3da272846b03bead3c6fbf9cc6057dbc53a3e382 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 11 Nov 2024 08:05:49 +0000
Subject: [PATCH 13/13] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 changelog/291.improvement.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/changelog/291.improvement.md b/changelog/291.improvement.md
index 34ba38c..2630c74 100644
--- a/changelog/291.improvement.md
+++ b/changelog/291.improvement.md
@@ -1 +1 @@
-In the conversion function, disable splitting into multiple categories, instead create an aggregated category.
\ No newline at end of file
+In the conversion function, disable splitting into multiple categories, instead create an aggregated category.