From a21f853be5b58f96eb9ed8da1def7fd9c7177a42 Mon Sep 17 00:00:00 2001
From: Chris Mutel <cmutel@gmail.com>
Date: Fri, 22 Nov 2024 09:38:58 +0100
Subject: [PATCH 1/2] Cleaner names for multifunctional products

---
 bw_simapro_csv/brightway.py        |  12 +++-
 pyproject.toml                     |   2 +-
 tests/unit/test_brightway_utils.py | 105 +++++++++++++++++++++++++++++
 3 files changed, 116 insertions(+), 3 deletions(-)
 create mode 100644 tests/unit/test_brightway_utils.py

diff --git a/bw_simapro_csv/brightway.py b/bw_simapro_csv/brightway.py
index 2d22bb8..390a756 100644
--- a/bw_simapro_csv/brightway.py
+++ b/bw_simapro_csv/brightway.py
@@ -67,6 +67,14 @@ def allocation_as_manual_property(exc: dict) -> dict:
 
 def name_for_process(process: Process, missing_string: str) -> str:
     """Try several ways to generate a sensible name."""
+
+    def clean_name(name: str) -> str:
+        """Cleanup awkward name endings if needed."""
+        name = name.strip()
+        if name.endswith(","):
+            name = name[:-1]
+        return name
+
     if given_name := substitute_unspecified(process.parsed["metadata"].get("Process name")):
         return given_name
     if "Products" in process.blocks:
@@ -74,13 +82,13 @@ def name_for_process(process: Process, missing_string: str) -> str:
         if len(names) == 1:
             return names[0]
         else:
-            return "MFP: {}".format("⧺".join([name[:25] for name in names]))
+            return clean_name("MFP: {}".format("⧺".join([name[:25] for name in names])))
     if "Waste treatment" in process.blocks:
         names = [edge["name"] for edge in process.blocks["Waste treatment"].parsed]
         if len(names) == 1:
             return names[0]
         else:
-            return "MFP: {}".format("⧺".join([name[:25] for name in names]))
+            return clean_name("MFP: {}".format("⧺".join([name[:25] for name in names])))
     return missing_string
 
 
diff --git a/pyproject.toml b/pyproject.toml
index b06b283..991fa3e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -85,7 +85,7 @@ norecursedirs = [
     "build",
     ".tox"
 ]
-testpaths = ["tests/*.py", "tests/blocks/*.py", "tests/integration/*.py"]
+testpaths = ["tests/*.py", "tests/blocks/*.py", "tests/integration/*.py", "tests/unit/*.py"]
 
 [tool.flake8]
 # Some sane defaults for the code style checker flake8
diff --git a/tests/unit/test_brightway_utils.py b/tests/unit/test_brightway_utils.py
new file mode 100644
index 0000000..d6b66e9
--- /dev/null
+++ b/tests/unit/test_brightway_utils.py
@@ -0,0 +1,105 @@
+from bw_simapro_csv.blocks import Process
+from bw_simapro_csv.brightway import name_for_process
+
+
+def test_name_for_process_given_name():
+    class P(Process):
+        def __init__(self):
+            pass
+
+    p = P()
+    p.parsed = {"metadata": {"Process name": "foo"}}
+
+    assert name_for_process(p, "bar") == "foo"
+
+
+def test_name_for_process_given_name_unspecified():
+    class P(Process):
+        def __init__(self):
+            pass
+
+    p = P()
+    p.blocks = {}
+    p.parsed = {"metadata": {"Process name": "unspecified"}}
+    assert name_for_process(p, "bar") == "bar"
+
+    p.parsed = {"metadata": {"Process name": "UNSPECIFIED"}}
+    assert name_for_process(p, "bar") == "bar"
+
+
+def test_name_for_process_products():
+    class Dummy:
+        pass
+
+    class P(Process):
+        def __init__(self):
+            pass
+
+    o = Dummy()
+    o.parsed = [{"name": "foo"}, {"name": "bar"}]
+
+    p = P()
+    p.blocks = {"Products": o}
+    p.parsed = {"metadata": {}}
+
+    assert name_for_process(p, "no") == "MFP: foo⧺bar"
+
+
+def test_name_for_process_products_clean_name():
+    class Dummy:
+        pass
+
+    class P(Process):
+        def __init__(self):
+            pass
+
+    o = Dummy()
+    o.parsed = [
+        {"name": r"Albacore, fillet, raw, at processing {FR} U"},
+        {"name": r"Albacore, residues, raw, at processing {FR} U"},
+    ]
+
+    p = P()
+    p.blocks = {"Waste treatment": o}
+    p.parsed = {"metadata": {}}
+
+    assert name_for_process(p, "no") == "MFP: Albacore, fillet, raw, at⧺Albacore, residues, raw"
+
+
+def test_name_for_waste_treatment_products():
+    class Dummy:
+        pass
+
+    class P(Process):
+        def __init__(self):
+            pass
+
+    o = Dummy()
+    o.parsed = [{"name": "foo"}, {"name": "bar"}]
+
+    p = P()
+    p.blocks = {"Products": o}
+    p.parsed = {"metadata": {}}
+
+    assert name_for_process(p, "no") == "MFP: foo⧺bar"
+
+
+def test_name_for_waste_treatment_products_clean_name():
+    class Dummy:
+        pass
+
+    class P(Process):
+        def __init__(self):
+            pass
+
+    o = Dummy()
+    o.parsed = [
+        {"name": r"Albacore, fillet, raw, at processing {FR} U"},
+        {"name": r"Albacore, residues, raw, at processing {FR} U"},
+    ]
+
+    p = P()
+    p.blocks = {"Waste treatment": o}
+    p.parsed = {"metadata": {}}
+
+    assert name_for_process(p, "no") == "MFP: Albacore, fillet, raw, at⧺Albacore, residues, raw"

From ff350ca5cbb85b8934f3d1da07321b7df9765e11 Mon Sep 17 00:00:00 2001
From: Chris Mutel <cmutel@gmail.com>
Date: Mon, 25 Nov 2024 10:20:26 +0100
Subject: [PATCH 2/2] Add ability to separate products as separate nodes when
 exporting to Brightway

---
 CHANGES.md                                    |   5 +
 bw_simapro_csv/brightway.py                   |  63 ++++++-
 bw_simapro_csv/main.py                        |   6 +-
 .../test_brightway_export_allocation.py       |   2 +-
 .../test_brightway_product_creation.py        | 165 ++++++++++++++++++
 5 files changed, 230 insertions(+), 11 deletions(-)
 create mode 100644 tests/integration/test_brightway_product_creation.py

diff --git a/CHANGES.md b/CHANGES.md
index 5a93a21..5b5cdc5 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -1,5 +1,10 @@
 # `bw_simapro_csv` Changelog
 
+### [DEV]
+
+* Add ability to separate products as separate nodes when exporting to Brightway
+* BREAKING CHANGE: Default is now to separate products as separate nodes when exporting to Brightway
+
 ### [0.2.6] - 2024-09-10
 
 * Packaging fix
diff --git a/bw_simapro_csv/brightway.py b/bw_simapro_csv/brightway.py
index 390a756..ada82e9 100644
--- a/bw_simapro_csv/brightway.py
+++ b/bw_simapro_csv/brightway.py
@@ -1,4 +1,5 @@
 import datetime
+import itertools
 from copy import deepcopy
 from typing import Union
 from uuid import uuid4
@@ -92,7 +93,39 @@ def clean_name(name: str) -> str:
     return missing_string
 
 
-def lci_to_brightway(spcsv: SimaProCSV, missing_string: str = "(unknown)") -> dict:
+def as_product_dct(edge: dict, node: dict) -> dict:
+    """Take an edge on a node and generate a new product node."""
+    NODE_ATTRS = ("name", "unit", "simapro_project", "location", "tags", "database", "comment")
+    EDGE_ATTRS = (
+        "name",
+        "unit",
+        "line_no",
+        "category",
+        "waste_type",
+        "comment",
+        "properties",
+        "simapro_category",
+    )
+    return (
+        {
+            "type": "product",
+            "code": uuid4().hex,
+            "reference process": (node["database"], node["code"]),
+        }
+        | {key: node[key] for key in NODE_ATTRS if node.get(key)}
+        | {key: edge[key] for key in EDGE_ATTRS if edge.get(key)}
+    )
+
+
+def reference_to_product(process_edge: dict, product: dict) -> dict:
+    """Add explicit link from process edge to new product node"""
+    process_edge["input"] = (product["database"], product["code"])
+    return process_edge
+
+
+def lci_to_brightway(
+    spcsv: SimaProCSV, missing_string: str = "(unknown)", separate_products: bool = False
+) -> dict:
     """Turn an extracted SimaPro CSV extract into metadata that can be imported into Brightway.
 
     Doesn't do any normalization or other data changes, just reorganizes the existing data."""
@@ -110,6 +143,7 @@ def lci_to_brightway(spcsv: SimaProCSV, missing_string: str = "(unknown)") -> di
         # Note reversing of database and project terms here
         # In SimaPro, the project is lower priority than the database
         # but in Brightway it's the opposite.
+        "products": [],
         "project_parameters": [
             param
             for block in spcsv.blocks
@@ -214,14 +248,26 @@ def lci_to_brightway(spcsv: SimaProCSV, missing_string: str = "(unknown)") -> di
                     process_dataset["exchanges"].append(edge | {"type": "biosphere"})
         if "Products" in process.blocks:
             for edge in process.blocks["Products"].parsed:
-                process_dataset["exchanges"].append(
-                    allocation_as_manual_property(edge | {"type": "production", "functional": True})
+                production_dct = allocation_as_manual_property(
+                    edge | {"type": "production", "functional": True}
                 )
+                if separate_products:
+                    product_dct = as_product_dct(production_dct, process_dataset)
+                    data["products"].append(product_dct)
+                    process_dataset["exchanges"].append(
+                        reference_to_product(production_dct, product_dct)
+                    )
+                else:
+                    process_dataset["exchanges"].append(production_dct)
         elif "Waste treatment" in process.blocks:
             for edge in process.blocks["Waste treatment"].parsed:
-                process_dataset["exchanges"].append(
-                    edge | {"type": "technosphere", "functional": True}
-                )
+                waste_edge = edge | {"type": "technosphere", "functional": True}
+                if separate_products:
+                    waste_dct = as_product_dct(waste_edge, process_dataset)
+                    data["products"].append(waste_dct)
+                    process_dataset["exchanges"].append(reference_to_product(waste_edge, waste_dct))
+                else:
+                    process_dataset["exchanges"].append(waste_edge)
                 if not any(e for e in process_dataset["exchanges"] if e["type"] == "production"):
                     dummy = deepcopy(edge)
                     dummy.update(
@@ -240,8 +286,9 @@ def lci_to_brightway(spcsv: SimaProCSV, missing_string: str = "(unknown)") -> di
         sum(1 for exc in ds.get("exchanges") if exc.get("functional")) > 1
         for ds in data["processes"]
     ):
-        formatted = {(spcsv.database_name, ds["code"]): ds for ds in data["processes"]}
-        as_dict = allocation_before_writing(formatted, "manual_allocation")
+        as_dict = allocation_before_writing(
+            {(spcsv.database_name, ds["code"]): ds for ds in data["processes"]}, "manual_allocation"
+        )
         for (database, code), ds in as_dict.items():
             ds["code"] = code
             ds["database"] = database
diff --git a/bw_simapro_csv/main.py b/bw_simapro_csv/main.py
index 898e80f..fa5a47a 100644
--- a/bw_simapro_csv/main.py
+++ b/bw_simapro_csv/main.py
@@ -182,11 +182,13 @@ def __init__(
     def __iter__(self):
         return iter(self.blocks)
 
-    def to_brightway(self, filepath: Optional[Path] = None) -> Union[dict, Path]:
+    def to_brightway(
+        self, filepath: Optional[Path] = None, separate_products: bool = True
+    ) -> Union[dict, Path]:
         if self.header["kind"] == SimaProCSVType.processes:
             from .brightway import lci_to_brightway
 
-            data = lci_to_brightway(self)
+            data = lci_to_brightway(self, separate_products=separate_products)
             if filepath is not None:
                 with open(filepath, "w") as f:
                     json.dump(data, f, indent=2, ensure_ascii=False, default=json_serializer)
diff --git a/tests/integration/test_brightway_export_allocation.py b/tests/integration/test_brightway_export_allocation.py
index 2d07236..08b89a7 100644
--- a/tests/integration/test_brightway_export_allocation.py
+++ b/tests/integration/test_brightway_export_allocation.py
@@ -4,7 +4,7 @@
 
 
 def test_basic_header_extraction(fixtures_dir):
-    given = SimaProCSV(fixtures_dir / "allocation.csv").to_brightway()
+    given = SimaProCSV(fixtures_dir / "allocation.csv").to_brightway(separate_products=False)
     expected = {
         "created": "2016-10-12T22:54:47",
         "name": "Bobs_burgers",
diff --git a/tests/integration/test_brightway_product_creation.py b/tests/integration/test_brightway_product_creation.py
new file mode 100644
index 0000000..e0b97cc
--- /dev/null
+++ b/tests/integration/test_brightway_product_creation.py
@@ -0,0 +1,165 @@
+from bw_simapro_csv import SimaProCSV
+
+
+def test_basic_product_creation(fixtures_dir):
+    given = SimaProCSV(fixtures_dir / "allocation.csv").to_brightway()
+
+    assert len(given["processes"]) == 4
+    assert len(given["products"]) == 3
+
+    residue_process = given["processes"][0]
+    expected = {
+        "code": "ReCenter000033915300046",
+        "name": "Agricultural residues, non-mechanized, sun dried, at farm, 1 kg dry matter (WFLDB 3.0)/GLO U",
+        "type": "process",
+    }
+    for key, value in expected.items():
+        assert residue_process[key] == value
+
+    mfp = given["processes"][1]
+    expected = {
+        "code": "ReCenter000033915302504",
+        "name": "MFP: Rice, at farm (WFLDB 3.0)⧺Rice straw, at farm (WFLD",
+        "type": "multifunctional",
+    }
+    for key, value in expected.items():
+        assert mfp[key] == value
+
+    rice_process = given["processes"][2]
+    expected = {
+        "mf_parent_key": ("Bobs_burgers", mfp["code"]),
+        "name": "Rice, at farm (WFLDB 3.0)/IN U (read-only process)",
+        "reference product": "Rice, at farm (WFLDB 3.0)/IN U",
+        "type": "readonly_process",
+        "unit": "kg",
+    }
+    for key, value in expected.items():
+        assert rice_process[key] == value
+
+    straw_process = given["processes"][3]
+    expected = {
+        "comment": "Stuff happened, and then we enjoyed life outside the office",
+        "mf_parent_key": ("Bobs_burgers", mfp["code"]),
+        "name": "Rice straw, at farm (WFLDB 3.0)/IN U (read-only process)",
+        "reference product": "Rice straw, at farm (WFLDB 3.0)/IN U",
+        "type": "readonly_process",
+    }
+    for key, value in expected.items():
+        assert straw_process[key] == value
+
+    common = (
+        "comment",
+        "data_entry",
+        "data_generator",
+        "database",
+        "location",
+        "mf_allocation_run_uuid",
+        "mf_strategy_label",
+        "publication_date",
+        "references",
+        "tags",
+        "simapro_project",
+    )
+    for label in common:
+        assert rice_process[label] == mfp[label]
+        assert straw_process[label] == mfp[label]
+
+    residue_product = given["products"][0]
+    expected = {
+        "comment": residue_process["comment"],
+        "database": residue_process["database"],
+        "line_no": 100,
+        "name": "Agricultural residues, non-mechanized, sun dried, at farm, 1 kg dry matter (WFLDB 3.0)/GLO U",
+        "properties": {"manual_allocation": 100.0},
+        "simapro_project": residue_process["simapro_project"],
+        "tags": residue_process["tags"],
+        "type": "product",
+        "unit": "kg",
+        "waste_type": "Compost",
+        "category": "_WFLDB 3.0\\_sub-datasets\\Animal production\\Feed",
+        "reference process": (residue_process["database"], residue_process["code"]),
+    }
+    for key, value in expected.items():
+        assert residue_product[key] == value
+
+    expected = {
+        "allocation": 100,
+        "amount": 1,
+        "functional": True,
+        "input": (residue_product["database"], residue_product["code"]),
+        "type": "production",
+        "unit": "kg",
+    }
+
+    exc = residue_process["exchanges"][-1]
+    for key, value in expected.items():
+        assert exc[key] == value
+
+    rice_product = given["products"][1]
+    expected = {
+        "comment": "INDIA",
+        "database": mfp["database"],
+        "line_no": 208,
+        "name": "Rice, at farm (WFLDB 3.0)/IN U",
+        "properties": {"manual_allocation": 95.8},
+        "simapro_project": mfp["simapro_project"],
+        "tags": mfp["tags"],
+        "type": "product",
+        "unit": "kg",
+        "waste_type": "not defined",
+        "category": "_WFLDB 3.0\\Plant products\\Arable\\Rice",
+        "reference process": (mfp["database"], mfp["code"]),
+    }
+    for key, value in expected.items():
+        assert rice_product[key] == value
+
+    expected = {
+        "allocation": 95.8,
+        "amount": 6250,
+        "functional": True,
+        "input": (rice_product["database"], rice_product["code"]),
+        "type": "production",
+        "unit": "kg",
+    }
+
+    exc = rice_process["exchanges"][-1]
+    for key, value in expected.items():
+        assert exc[key] == value
+
+    straw_product = given["products"][2]
+    expected = {
+        "comment": r"The amount of straw is calculated from the straw to grain-ratio of 1 and a straw harvest rate of 50%. Economic allocation is based on the assumption that grains account for 92% and straw for 8% of the price.",
+        "database": mfp["database"],
+        "line_no": 209,
+        "name": "Rice straw, at farm (WFLDB 3.0)/IN U",
+        "properties": {"manual_allocation": 4.2},
+        "simapro_project": mfp["simapro_project"],
+        "tags": mfp["tags"],
+        "type": "product",
+        "unit": "kg",
+        "waste_type": "not defined",
+        "category": "_WFLDB 3.0\\Plant products\\Arable\\Rice",
+        "reference process": (mfp["database"], mfp["code"]),
+    }
+    for key, value in expected.items():
+        assert straw_product[key] == value
+
+    expected = {
+        "allocation": 4.2,
+        "amount": 3125,
+        "functional": True,
+        "input": (straw_product["database"], straw_product["code"]),
+        "type": "production",
+        "unit": "kg",
+    }
+
+    exc = straw_process["exchanges"][-1]
+    for key, value in expected.items():
+        assert exc[key] == value
+
+
+def test_no_product_creation(fixtures_dir):
+    given = SimaProCSV(fixtures_dir / "allocation.csv").to_brightway(separate_products=False)
+
+    assert len(given["processes"]) == 4
+    assert len(given["products"]) == 0