Skip to content

Commit

Permalink
Merge pull request #291 from primap-community/1-to-n-mapping
Browse files Browse the repository at this point in the history
  • Loading branch information
crdanielbusch authored Nov 11, 2024
2 parents 032e77c + 3da2728 commit fcdaf32
Show file tree
Hide file tree
Showing 6 changed files with 143 additions and 92 deletions.
1 change: 1 addition & 0 deletions changelog/291.improvement.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
In the conversion function, disable splitting into multiple categories, instead create an aggregated category.
128 changes: 41 additions & 87 deletions primap2/_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,9 @@ def _fill_category(
already_converted = set(output_selection[new_dim]).intersection(
set(already_converted_categories)
)
# if there are several categories on the target side
# we can still convert because it will
# create a new category
if already_converted:
logger.warning(
f"For category {category!r}, would want to use a "
Expand All @@ -175,35 +178,26 @@ def _fill_category(
)
continue

try:
effective_input_weights = derive_weights(
dim=dim,
category=category,
rule=rule,
operation_type="input",
)
effective_output_weights = derive_weights(
dim=new_dim,
category=category,
rule=rule,
operation_type="output",
)
except WeightingInfoMissing as err:
logger.warning(str(err))
continue

# the left-hand side of the conversion formula summed up
lhs = (input_factors * effective_input_weights * self._da.loc[input_selection]).sum(
dim=dim
)
lhs = (input_factors * self._da.loc[input_selection]).sum(dim=dim)
# the right-hand side of the conversion formula split up
rhs = lhs / output_factors / effective_output_weights

da.loc[output_selection] = rhs

if not rule.is_restricted:
# stop processing rules for this category
rhs = lhs / output_factors

# if there is more than one category on the target side
if len(output_selection[new_dim]) > 1:
new_category = create_category_name(rule)
new_categories = [*da.indexes["category (IPCC2006)"], new_category]
da = da.reindex({"category (IPCC2006)": new_categories}, fill_value=np.nan)
new_output_selection = output_selection.copy()
new_output_selection[new_dim] = new_category
da.loc[new_output_selection] = rhs.sum(dim=new_dim)
return output_selection[new_dim], da
else:
da.loc[output_selection] = rhs

if not rule.is_restricted:
# stop processing rules for this category
return output_selection[new_dim], da

logger.debug(
f"No unrestricted rule to derive data for {category!r} applied, some or "
Expand Down Expand Up @@ -394,67 +388,6 @@ def factors_categories_to_xarray(
return selection, factors


class WeightingInfoMissing(ValueError):
"""Some information to derive weighting factors for a rule is missing."""

def __init__(
self,
category: climate_categories.Category,
rule: climate_categories.ConversionRule,
message: str,
):
full_message = (
f"Can not derive data for category {category!r} using rule"
f" '{rule}': {message} Skipping this rule."
)
ValueError.__init__(self, full_message)


def derive_weights(
*,
dim: str,
category: climate_categories.Category,
rule: climate_categories.ConversionRule,
operation_type: str,
) -> xr.DataArray | float:
"""Derive the weights to use for applying a specific rule.
Parameters
----------
dim: str
Dimension which contains the categories.
category: climate_categories.Category
Category which should be derived.
rule: climate_categories.ConversionRule
Rule that should be used to derive the category.
operation_type: ``input`` or ``output``
If weights for the source data (input) or the result data (output) should
be derived.
Returns
-------
factors: float or xr.DataArray
Object which can be multiplied with the input or output DataArray to apply
weights.
"""
# TODO this may change again in the next PR
if operation_type == "input":
return 1.0
elif operation_type == "output":
if rule.cardinality_b == "one":
return 1.0
else:
raise NotImplementedError(
"Splitting input categories into multiple"
" output categories is currently not supported. "
f"{rule.csv_original_text=}, {category=}"
)
else:
raise NotImplementedError(
f"operation_type must be either input or output. Got {operation_type}"
)


def prepare_auxiliary_dimensions(
conversion: climate_categories.Conversion,
auxiliary_dimensions: dict[str, str] | None,
Expand Down Expand Up @@ -497,3 +430,24 @@ def prepare_auxiliary_dimensions(
return {
climate_categories.cats[name]: auxiliary_dimensions[name] for name in auxiliary_dimensions
}


def create_category_name(rule: climate_categories.ConversionRule):
"""
Create a category name based on the provided rule.
Parameters
----------
rule : climate_categories.ConversionRule
rule to convert between categories from two different categorizations.
Returns
-------
The generated category name.
"""
factor_to_string = {1: "+", -1: "-"}
components = [factor_to_string[i[1]] + i[0].codes[0] for i in rule.factors_categories_b.items()]
# remove the first "+" sign in the name (leave "-" sign in)
if components[0][0] == "+":
components[0] = components[0][1:]
return "A_(" + "".join(components) + ")"
3 changes: 3 additions & 0 deletions primap2/tests/data/BURDI_conversion.csv
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,19 @@ BURDI,IPCC2006_PRIMAP,comment
2.C,2.C
2.F,2.F
2.G + 2.D, 2.H
2.G, 2.H.3
3,2.D
4,M.AG
4.A,3.A.1
4.B,3.A.2
4.C,3.C.7
4.D, M.3.C.45.AG
4.D + 4.C + 4.E + 4.F + 4.G,3.C
4.E,3.C.1.c
4.F,3.C.1.b
4.G,3.C.8
5,M.LULUCF
4+5,3
6,4
6.A,4.A
6.B,4.D
Expand Down
9 changes: 9 additions & 0 deletions primap2/tests/data/simple_categorisation_b.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,12 @@ categories:
alternative_codes:
- B
- CatB
3:
title: Category 3
comment: The third category
4:
title: Category 4
comment: The fourth category
5:
title: Category 5
comment: The fifth category
6 changes: 6 additions & 0 deletions primap2/tests/data/test_create_category_name_conversion.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# references: test
# last_update: 2024-10-14
A,B,comment
1,1+2, no comment
2,-3+4
3,5-1
88 changes: 83 additions & 5 deletions primap2/tests/test_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ def get_test_data_filepath(fname: str):
return importlib.resources.files("primap2.tests.data").joinpath(fname)


@pytest.mark.xfail
def test_conversion_source_does_not_match_dataset_dimension(empty_ds):
# make a data set with IPCC1996 categories
da = empty_ds["CO2"]
Expand All @@ -40,7 +41,6 @@ def test_conversion_source_does_not_match_dataset_dimension(empty_ds):
)


@pytest.mark.xfail
def test_convert_ipcc(empty_ds: xr.Dataset):
# build a DA categorized by IPCC1996 and with 1 everywhere so results are easy
# to see
Expand All @@ -64,12 +64,48 @@ def test_convert_ipcc(empty_ds: xr.Dataset):
conversion=conversion,
auxiliary_dimensions={"gas": "source (gas)"},
)

# rule 1 -> 1
assert (result.pr.loc[{"category": "1"}] == 1.0 * primap2.ureg("Gg CO2 / year")).all().item()
# rule 2 + 3 -> 2
assert (result.pr.loc[{"category": "2"}] == 2.0 * primap2.ureg("Gg CO2 / year")).all().item()
# rule 1.A.2.f -> 1.A.2.f + 1.A.2.g + 1.A.2.h + 1.A.2.i + 1.A.2.j + 1.A.2.k + 1.A.2.l + 1.A.2.m
autocat = "A_(1.A.2.f+1.A.2.g+1.A.2.h+1.A.2.i+1.A.2.j+1.A.2.k+1.A.2.l+1.A.2.m)"
assert (
(result.pr.loc[{"category": autocat}] == 8.0 * primap2.ureg("Gg CO2 / year")).all().item()
)
# rule 4.D for N2O only -> 3.C.4 + 3.C.5
autocat = "A_(3.C.4+3.C.5)"
assert (
(
result.pr.loc[{"category": autocat, "source (gas)": "N2O"}]
== 2.0 * primap2.ureg("Gg CO2 / year")
)
.all()
.item()
)
# all other gases should be nan
all_gases_but_N2O = list(result.indexes["source (gas)"])
all_gases_but_N2O.remove("N2O")
assert np.isnan(
result.pr.loc[{"category": autocat, "source (gas)": all_gases_but_N2O}].values
).all()
# rule 7 -> 5
assert (result.pr.loc[{"category": "5"}] == 1.0 * primap2.ureg("Gg CO2 / year")).all().item()
# rule 2.F.6 -> 2.E + 2.F.6 + 2.G.1 + 2.G.2 + 2.G.4,
# rule 2.F.6 + 3.D -> 2.E + 2.F.6 + 2.G - ignored because 2.F.G already converted
# rule 2.G -> 2.H.3 - 1-to-1-conversion
autocat = "A_(2.E+2.F.6+2.G.1+2.G.2+2.G.4)"
assert (
(result.pr.loc[{"category": autocat}] == 5.0 * primap2.ureg("Gg CO2 / year")).all().item()
)
assert "A_(2.E+2.F.6+2.G)" not in list(result.indexes["category (IPCC2006)"])
assert (
(result.pr.loc[{"category": "2.H.3"}] == 1.0 * primap2.ureg("Gg CO2 / year")).all().item()
)


# test with new conversion and two existing categorisations
@pytest.mark.xfail
def test_convert_BURDI(empty_ds: xr.Dataset):
# make a sample conversion object in climate categories
filepath = get_test_data_filepath("BURDI_conversion.csv")
Expand Down Expand Up @@ -150,12 +186,24 @@ def test_convert_BURDI(empty_ds: xr.Dataset):
assert (
(result.pr.loc[{"category": "3.C.7"}] == 1.0 * primap2.ureg("Gg CO2 / year")).all().item()
)
# 2.E + 2.B = 2.E, 2.E should not be part of new data set
# rule 2.E + 2.B -> 2.B
# 2.E is part of PRIMAP categories, but cannot be retrieved from conversion
assert np.isnan(result.pr.loc[{"category": "2.E"}].values).all()
# cat 14638 in BURDI equals cat M.BIO in IPCC2006_PRIMAP
assert (
(result.pr.loc[{"category": "M.BIO"}] == 1.0 * primap2.ureg("Gg CO2 / year")).all().item()
)
# 4.D -> M.3.C.45.AG
# TODO This category is only available on M3C45AG branch in climate categories
# test locally with:
# `source venv/bin/activate`
# `pip install -e ../climate_categories`
# Will pass after climate categories release
assert (
(result.pr.loc[{"category": "M.3.C.45.AG"}] == 1.0 * primap2.ureg("Gg CO2 / year"))
.all()
.item()
)


# test with new conversion and new categorisations
Expand Down Expand Up @@ -198,5 +246,35 @@ def test_custom_conversion_and_two_custom_categorisations(empty_ds):
assert (result.pr.loc[{"category": "2"}] == 2.0 * primap2.ureg("Gg CO2 / year")).all().item()

# check result has 2 categories (input categorisation had 3)
# TODO this is ambiguous when order changes
assert result.shape == (2, 21, 4, 1)
# TODO this is ambiguous, order may change
assert result.shape == (5, 21, 4, 1)


def test_create_category_name():
# make categorisation A from yaml
categorisation_a = cc.from_yaml(get_test_data_filepath("simple_categorisation_a.yaml"))

# make categorisation B from yaml
categorisation_b = cc.from_yaml(get_test_data_filepath("simple_categorisation_b.yaml"))

# categories not part of climate categories so we need to add them manually
cats = {
"A": categorisation_a,
"B": categorisation_b,
}

# make conversion from csv
conv = cc.Conversion.from_csv(
get_test_data_filepath("test_create_category_name_conversion.csv"), cats=cats
)

# check that first positive category does not have '+' sign
autocat = primap2._convert.create_category_name(conv.rules[0])
assert autocat == "A_(1+2)"

# check that first negative category has '-' sign
autocat = primap2._convert.create_category_name(conv.rules[1])
assert autocat == "A_(-3+4)"

autocat = primap2._convert.create_category_name(conv.rules[2])
assert autocat == "A_(5-1)"

0 comments on commit fcdaf32

Please sign in to comment.