Merge pull request #12 from brightway-lca/separate-products

Add ability to separate products as separate nodes when exporting to Brightway
brightway-lca · Nov 25, 2024 · 36b57f0 · 36b57f0
2 parents 7370e7b + ff350ca
commit 36b57f0
Show file tree

Hide file tree

Showing 7 changed files with 346 additions and 14 deletions.
diff --git a/CHANGES.md b/CHANGES.md
@@ -1,5 +1,10 @@
 # `bw_simapro_csv` Changelog
 
+### [DEV]
+
+* Add ability to separate products as separate nodes when exporting to Brightway
+* BREAKING CHANGE: Default is now to separate products as separate nodes when exporting to Brightway
+
 ### [0.2.6] - 2024-09-10
 
 * Packaging fix

diff --git a/bw_simapro_csv/brightway.py b/bw_simapro_csv/brightway.py
@@ -1,4 +1,5 @@
 import datetime
+import itertools
 from copy import deepcopy
 from typing import Union
 from uuid import uuid4
@@ -67,24 +68,64 @@ def allocation_as_manual_property(exc: dict) -> dict:
 
 def name_for_process(process: Process, missing_string: str) -> str:
     """Try several ways to generate a sensible name."""
+
+    def clean_name(name: str) -> str:
+        """Cleanup awkward name endings if needed."""
+        name = name.strip()
+        if name.endswith(","):
+            name = name[:-1]
+        return name
+
     if given_name := substitute_unspecified(process.parsed["metadata"].get("Process name")):
         return given_name
     if "Products" in process.blocks:
         names = [edge["name"] for edge in process.blocks["Products"].parsed]
         if len(names) == 1:
             return names[0]
         else:
-            return "MFP: {}".format("⧺".join([name[:25] for name in names]))
+            return clean_name("MFP: {}".format("⧺".join([name[:25] for name in names])))
     if "Waste treatment" in process.blocks:
         names = [edge["name"] for edge in process.blocks["Waste treatment"].parsed]
         if len(names) == 1:
             return names[0]
         else:
-            return "MFP: {}".format("⧺".join([name[:25] for name in names]))
+            return clean_name("MFP: {}".format("⧺".join([name[:25] for name in names])))
     return missing_string
 
 
-def lci_to_brightway(spcsv: SimaProCSV, missing_string: str = "(unknown)") -> dict:
+def as_product_dct(edge: dict, node: dict) -> dict:
+    """Take an edge on a node and generate a new product node."""
+    NODE_ATTRS = ("name", "unit", "simapro_project", "location", "tags", "database", "comment")
+    EDGE_ATTRS = (
+        "name",
+        "unit",
+        "line_no",
+        "category",
+        "waste_type",
+        "comment",
+        "properties",
+        "simapro_category",
+    )
+    return (
+        {
+            "type": "product",
+            "code": uuid4().hex,
+            "reference process": (node["database"], node["code"]),
+        }
+        | {key: node[key] for key in NODE_ATTRS if node.get(key)}
+        | {key: edge[key] for key in EDGE_ATTRS if edge.get(key)}
+    )
+
+
+def reference_to_product(process_edge: dict, product: dict) -> dict:
+    """Add explicit link from process edge to new product node"""
+    process_edge["input"] = (product["database"], product["code"])
+    return process_edge
+
+
+def lci_to_brightway(
+    spcsv: SimaProCSV, missing_string: str = "(unknown)", separate_products: bool = False
+) -> dict:
     """Turn an extracted SimaPro CSV extract into metadata that can be imported into Brightway.
 
     Doesn't do any normalization or other data changes, just reorganizes the existing data."""
@@ -102,6 +143,7 @@ def lci_to_brightway(spcsv: SimaProCSV, missing_string: str = "(unknown)") -> di
         # Note reversing of database and project terms here
         # In SimaPro, the project is lower priority than the database
         # but in Brightway it's the opposite.
+        "products": [],
         "project_parameters": [
             param
             for block in spcsv.blocks
@@ -206,14 +248,26 @@ def lci_to_brightway(spcsv: SimaProCSV, missing_string: str = "(unknown)") -> di
                     process_dataset["exchanges"].append(edge | {"type": "biosphere"})
         if "Products" in process.blocks:
             for edge in process.blocks["Products"].parsed:
-                process_dataset["exchanges"].append(
-                    allocation_as_manual_property(edge | {"type": "production", "functional": True})
+                production_dct = allocation_as_manual_property(
+                    edge | {"type": "production", "functional": True}
                 )
+                if separate_products:
+                    product_dct = as_product_dct(production_dct, process_dataset)
+                    data["products"].append(product_dct)
+                    process_dataset["exchanges"].append(
+                        reference_to_product(production_dct, product_dct)
+                    )
+                else:
+                    process_dataset["exchanges"].append(production_dct)
         elif "Waste treatment" in process.blocks:
             for edge in process.blocks["Waste treatment"].parsed:
-                process_dataset["exchanges"].append(
-                    edge | {"type": "technosphere", "functional": True}
-                )
+                waste_edge = edge | {"type": "technosphere", "functional": True}
+                if separate_products:
+                    waste_dct = as_product_dct(waste_edge, process_dataset)
+                    data["products"].append(waste_dct)
+                    process_dataset["exchanges"].append(reference_to_product(waste_edge, waste_dct))
+                else:
+                    process_dataset["exchanges"].append(waste_edge)
                 if not any(e for e in process_dataset["exchanges"] if e["type"] == "production"):
                     dummy = deepcopy(edge)
                     dummy.update(
@@ -232,8 +286,9 @@ def lci_to_brightway(spcsv: SimaProCSV, missing_string: str = "(unknown)") -> di
         sum(1 for exc in ds.get("exchanges") if exc.get("functional")) > 1
         for ds in data["processes"]
     ):
-        formatted = {(spcsv.database_name, ds["code"]): ds for ds in data["processes"]}
-        as_dict = allocation_before_writing(formatted, "manual_allocation")
+        as_dict = allocation_before_writing(
+            {(spcsv.database_name, ds["code"]): ds for ds in data["processes"]}, "manual_allocation"
+        )
         for (database, code), ds in as_dict.items():
             ds["code"] = code
             ds["database"] = database

diff --git a/bw_simapro_csv/main.py b/bw_simapro_csv/main.py
@@ -182,11 +182,13 @@ def __init__(
     def __iter__(self):
         return iter(self.blocks)
 
-    def to_brightway(self, filepath: Optional[Path] = None) -> Union[dict, Path]:
+    def to_brightway(
+        self, filepath: Optional[Path] = None, separate_products: bool = True
+    ) -> Union[dict, Path]:
         if self.header["kind"] == SimaProCSVType.processes:
             from .brightway import lci_to_brightway
 
-            data = lci_to_brightway(self)
+            data = lci_to_brightway(self, separate_products=separate_products)
             if filepath is not None:
                 with open(filepath, "w") as f:
                     json.dump(data, f, indent=2, ensure_ascii=False, default=json_serializer)

diff --git a/pyproject.toml b/pyproject.toml
@@ -85,7 +85,7 @@ norecursedirs = [
     "build",
     ".tox"
 ]
-testpaths = ["tests/*.py", "tests/blocks/*.py", "tests/integration/*.py"]
+testpaths = ["tests/*.py", "tests/blocks/*.py", "tests/integration/*.py", "tests/unit/*.py"]
 
 [tool.flake8]
 # Some sane defaults for the code style checker flake8

diff --git a/tests/integration/test_brightway_export_allocation.py b/tests/integration/test_brightway_export_allocation.py
@@ -4,7 +4,7 @@
 
 
 def test_basic_header_extraction(fixtures_dir):
-    given = SimaProCSV(fixtures_dir / "allocation.csv").to_brightway()
+    given = SimaProCSV(fixtures_dir / "allocation.csv").to_brightway(separate_products=False)
     expected = {
         "created": "2016-10-12T22:54:47",
         "name": "Bobs_burgers",

diff --git a/tests/integration/test_brightway_product_creation.py b/tests/integration/test_brightway_product_creation.py
@@ -0,0 +1,165 @@
+from bw_simapro_csv import SimaProCSV
+
+
+def test_basic_product_creation(fixtures_dir):
+    given = SimaProCSV(fixtures_dir / "allocation.csv").to_brightway()
+
+    assert len(given["processes"]) == 4
+    assert len(given["products"]) == 3
+
+    residue_process = given["processes"][0]
+    expected = {
+        "code": "ReCenter000033915300046",
+        "name": "Agricultural residues, non-mechanized, sun dried, at farm, 1 kg dry matter (WFLDB 3.0)/GLO U",
+        "type": "process",
+    }
+    for key, value in expected.items():
+        assert residue_process[key] == value
+
+    mfp = given["processes"][1]
+    expected = {
+        "code": "ReCenter000033915302504",
+        "name": "MFP: Rice, at farm (WFLDB 3.0)⧺Rice straw, at farm (WFLD",
+        "type": "multifunctional",
+    }
+    for key, value in expected.items():
+        assert mfp[key] == value
+
+    rice_process = given["processes"][2]
+    expected = {
+        "mf_parent_key": ("Bobs_burgers", mfp["code"]),
+        "name": "Rice, at farm (WFLDB 3.0)/IN U (read-only process)",
+        "reference product": "Rice, at farm (WFLDB 3.0)/IN U",
+        "type": "readonly_process",
+        "unit": "kg",
+    }
+    for key, value in expected.items():
+        assert rice_process[key] == value
+
+    straw_process = given["processes"][3]
+    expected = {
+        "comment": "Stuff happened, and then we enjoyed life outside the office",
+        "mf_parent_key": ("Bobs_burgers", mfp["code"]),
+        "name": "Rice straw, at farm (WFLDB 3.0)/IN U (read-only process)",
+        "reference product": "Rice straw, at farm (WFLDB 3.0)/IN U",
+        "type": "readonly_process",
+    }
+    for key, value in expected.items():
+        assert straw_process[key] == value
+
+    common = (
+        "comment",
+        "data_entry",
+        "data_generator",
+        "database",
+        "location",
+        "mf_allocation_run_uuid",
+        "mf_strategy_label",
+        "publication_date",
+        "references",
+        "tags",
+        "simapro_project",
+    )
+    for label in common:
+        assert rice_process[label] == mfp[label]
+        assert straw_process[label] == mfp[label]
+
+    residue_product = given["products"][0]
+    expected = {
+        "comment": residue_process["comment"],
+        "database": residue_process["database"],
+        "line_no": 100,
+        "name": "Agricultural residues, non-mechanized, sun dried, at farm, 1 kg dry matter (WFLDB 3.0)/GLO U",
+        "properties": {"manual_allocation": 100.0},
+        "simapro_project": residue_process["simapro_project"],
+        "tags": residue_process["tags"],
+        "type": "product",
+        "unit": "kg",
+        "waste_type": "Compost",
+        "category": "_WFLDB 3.0\\_sub-datasets\\Animal production\\Feed",
+        "reference process": (residue_process["database"], residue_process["code"]),
+    }
+    for key, value in expected.items():
+        assert residue_product[key] == value
+
+    expected = {
+        "allocation": 100,
+        "amount": 1,
+        "functional": True,
+        "input": (residue_product["database"], residue_product["code"]),
+        "type": "production",
+        "unit": "kg",
+    }
+
+    exc = residue_process["exchanges"][-1]
+    for key, value in expected.items():
+        assert exc[key] == value
+
+    rice_product = given["products"][1]
+    expected = {
+        "comment": "INDIA",
+        "database": mfp["database"],
+        "line_no": 208,
+        "name": "Rice, at farm (WFLDB 3.0)/IN U",
+        "properties": {"manual_allocation": 95.8},
+        "simapro_project": mfp["simapro_project"],
+        "tags": mfp["tags"],
+        "type": "product",
+        "unit": "kg",
+        "waste_type": "not defined",
+        "category": "_WFLDB 3.0\\Plant products\\Arable\\Rice",
+        "reference process": (mfp["database"], mfp["code"]),
+    }
+    for key, value in expected.items():
+        assert rice_product[key] == value
+
+    expected = {
+        "allocation": 95.8,
+        "amount": 6250,
+        "functional": True,
+        "input": (rice_product["database"], rice_product["code"]),
+        "type": "production",
+        "unit": "kg",
+    }
+
+    exc = rice_process["exchanges"][-1]
+    for key, value in expected.items():
+        assert exc[key] == value
+
+    straw_product = given["products"][2]
+    expected = {
+        "comment": r"The amount of straw is calculated from the straw to grain-ratio of 1 and a straw harvest rate of 50%. Economic allocation is based on the assumption that grains account for 92% and straw for 8% of the price.",
+        "database": mfp["database"],
+        "line_no": 209,
+        "name": "Rice straw, at farm (WFLDB 3.0)/IN U",
+        "properties": {"manual_allocation": 4.2},
+        "simapro_project": mfp["simapro_project"],
+        "tags": mfp["tags"],
+        "type": "product",
+        "unit": "kg",
+        "waste_type": "not defined",
+        "category": "_WFLDB 3.0\\Plant products\\Arable\\Rice",
+        "reference process": (mfp["database"], mfp["code"]),
+    }
+    for key, value in expected.items():
+        assert straw_product[key] == value
+
+    expected = {
+        "allocation": 4.2,
+        "amount": 3125,
+        "functional": True,
+        "input": (straw_product["database"], straw_product["code"]),
+        "type": "production",
+        "unit": "kg",
+    }
+
+    exc = straw_process["exchanges"][-1]
+    for key, value in expected.items():
+        assert exc[key] == value
+
+
+def test_no_product_creation(fixtures_dir):
+    given = SimaProCSV(fixtures_dir / "allocation.csv").to_brightway(separate_products=False)
+
+    assert len(given["processes"]) == 4
+    assert len(given["products"]) == 0