diff --git a/CHANGES.md b/CHANGES.md index 5a93a21..5b5cdc5 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,5 +1,10 @@ # `bw_simapro_csv` Changelog +### [DEV] + +* Add ability to separate products as separate nodes when exporting to Brightway +* BREAKING CHANGE: Default is now to separate products as separate nodes when exporting to Brightway + ### [0.2.6] - 2024-09-10 * Packaging fix diff --git a/bw_simapro_csv/brightway.py b/bw_simapro_csv/brightway.py index 2d22bb8..ada82e9 100644 --- a/bw_simapro_csv/brightway.py +++ b/bw_simapro_csv/brightway.py @@ -1,4 +1,5 @@ import datetime +import itertools from copy import deepcopy from typing import Union from uuid import uuid4 @@ -67,6 +68,14 @@ def allocation_as_manual_property(exc: dict) -> dict: def name_for_process(process: Process, missing_string: str) -> str: """Try several ways to generate a sensible name.""" + + def clean_name(name: str) -> str: + """Cleanup awkward name endings if needed.""" + name = name.strip() + if name.endswith(","): + name = name[:-1] + return name + if given_name := substitute_unspecified(process.parsed["metadata"].get("Process name")): return given_name if "Products" in process.blocks: @@ -74,17 +83,49 @@ def name_for_process(process: Process, missing_string: str) -> str: if len(names) == 1: return names[0] else: - return "MFP: {}".format("⧺".join([name[:25] for name in names])) + return clean_name("MFP: {}".format("⧺".join([name[:25] for name in names]))) if "Waste treatment" in process.blocks: names = [edge["name"] for edge in process.blocks["Waste treatment"].parsed] if len(names) == 1: return names[0] else: - return "MFP: {}".format("⧺".join([name[:25] for name in names])) + return clean_name("MFP: {}".format("⧺".join([name[:25] for name in names]))) return missing_string -def lci_to_brightway(spcsv: SimaProCSV, missing_string: str = "(unknown)") -> dict: +def as_product_dct(edge: dict, node: dict) -> dict: + """Take an edge on a node and generate a new product node.""" + NODE_ATTRS = ("name", "unit", "simapro_project", "location", "tags", "database", "comment") + EDGE_ATTRS = ( + "name", + "unit", + "line_no", + "category", + "waste_type", + "comment", + "properties", + "simapro_category", + ) + return ( + { + "type": "product", + "code": uuid4().hex, + "reference process": (node["database"], node["code"]), + } + | {key: node[key] for key in NODE_ATTRS if node.get(key)} + | {key: edge[key] for key in EDGE_ATTRS if edge.get(key)} + ) + + +def reference_to_product(process_edge: dict, product: dict) -> dict: + """Add explicit link from process edge to new product node""" + process_edge["input"] = (product["database"], product["code"]) + return process_edge + + +def lci_to_brightway( + spcsv: SimaProCSV, missing_string: str = "(unknown)", separate_products: bool = False +) -> dict: """Turn an extracted SimaPro CSV extract into metadata that can be imported into Brightway. Doesn't do any normalization or other data changes, just reorganizes the existing data.""" @@ -102,6 +143,7 @@ def lci_to_brightway(spcsv: SimaProCSV, missing_string: str = "(unknown)") -> di # Note reversing of database and project terms here # In SimaPro, the project is lower priority than the database # but in Brightway it's the opposite. + "products": [], "project_parameters": [ param for block in spcsv.blocks @@ -206,14 +248,26 @@ def lci_to_brightway(spcsv: SimaProCSV, missing_string: str = "(unknown)") -> di process_dataset["exchanges"].append(edge | {"type": "biosphere"}) if "Products" in process.blocks: for edge in process.blocks["Products"].parsed: - process_dataset["exchanges"].append( - allocation_as_manual_property(edge | {"type": "production", "functional": True}) + production_dct = allocation_as_manual_property( + edge | {"type": "production", "functional": True} ) + if separate_products: + product_dct = as_product_dct(production_dct, process_dataset) + data["products"].append(product_dct) + process_dataset["exchanges"].append( + reference_to_product(production_dct, product_dct) + ) + else: + process_dataset["exchanges"].append(production_dct) elif "Waste treatment" in process.blocks: for edge in process.blocks["Waste treatment"].parsed: - process_dataset["exchanges"].append( - edge | {"type": "technosphere", "functional": True} - ) + waste_edge = edge | {"type": "technosphere", "functional": True} + if separate_products: + waste_dct = as_product_dct(waste_edge, process_dataset) + data["products"].append(waste_dct) + process_dataset["exchanges"].append(reference_to_product(waste_edge, waste_dct)) + else: + process_dataset["exchanges"].append(waste_edge) if not any(e for e in process_dataset["exchanges"] if e["type"] == "production"): dummy = deepcopy(edge) dummy.update( @@ -232,8 +286,9 @@ def lci_to_brightway(spcsv: SimaProCSV, missing_string: str = "(unknown)") -> di sum(1 for exc in ds.get("exchanges") if exc.get("functional")) > 1 for ds in data["processes"] ): - formatted = {(spcsv.database_name, ds["code"]): ds for ds in data["processes"]} - as_dict = allocation_before_writing(formatted, "manual_allocation") + as_dict = allocation_before_writing( + {(spcsv.database_name, ds["code"]): ds for ds in data["processes"]}, "manual_allocation" + ) for (database, code), ds in as_dict.items(): ds["code"] = code ds["database"] = database diff --git a/bw_simapro_csv/main.py b/bw_simapro_csv/main.py index 898e80f..fa5a47a 100644 --- a/bw_simapro_csv/main.py +++ b/bw_simapro_csv/main.py @@ -182,11 +182,13 @@ def __init__( def __iter__(self): return iter(self.blocks) - def to_brightway(self, filepath: Optional[Path] = None) -> Union[dict, Path]: + def to_brightway( + self, filepath: Optional[Path] = None, separate_products: bool = True + ) -> Union[dict, Path]: if self.header["kind"] == SimaProCSVType.processes: from .brightway import lci_to_brightway - data = lci_to_brightway(self) + data = lci_to_brightway(self, separate_products=separate_products) if filepath is not None: with open(filepath, "w") as f: json.dump(data, f, indent=2, ensure_ascii=False, default=json_serializer) diff --git a/pyproject.toml b/pyproject.toml index b06b283..991fa3e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -85,7 +85,7 @@ norecursedirs = [ "build", ".tox" ] -testpaths = ["tests/*.py", "tests/blocks/*.py", "tests/integration/*.py"] +testpaths = ["tests/*.py", "tests/blocks/*.py", "tests/integration/*.py", "tests/unit/*.py"] [tool.flake8] # Some sane defaults for the code style checker flake8 diff --git a/tests/integration/test_brightway_export_allocation.py b/tests/integration/test_brightway_export_allocation.py index 2d07236..08b89a7 100644 --- a/tests/integration/test_brightway_export_allocation.py +++ b/tests/integration/test_brightway_export_allocation.py @@ -4,7 +4,7 @@ def test_basic_header_extraction(fixtures_dir): - given = SimaProCSV(fixtures_dir / "allocation.csv").to_brightway() + given = SimaProCSV(fixtures_dir / "allocation.csv").to_brightway(separate_products=False) expected = { "created": "2016-10-12T22:54:47", "name": "Bobs_burgers", diff --git a/tests/integration/test_brightway_product_creation.py b/tests/integration/test_brightway_product_creation.py new file mode 100644 index 0000000..e0b97cc --- /dev/null +++ b/tests/integration/test_brightway_product_creation.py @@ -0,0 +1,165 @@ +from bw_simapro_csv import SimaProCSV + + +def test_basic_product_creation(fixtures_dir): + given = SimaProCSV(fixtures_dir / "allocation.csv").to_brightway() + + assert len(given["processes"]) == 4 + assert len(given["products"]) == 3 + + residue_process = given["processes"][0] + expected = { + "code": "ReCenter000033915300046", + "name": "Agricultural residues, non-mechanized, sun dried, at farm, 1 kg dry matter (WFLDB 3.0)/GLO U", + "type": "process", + } + for key, value in expected.items(): + assert residue_process[key] == value + + mfp = given["processes"][1] + expected = { + "code": "ReCenter000033915302504", + "name": "MFP: Rice, at farm (WFLDB 3.0)⧺Rice straw, at farm (WFLD", + "type": "multifunctional", + } + for key, value in expected.items(): + assert mfp[key] == value + + rice_process = given["processes"][2] + expected = { + "mf_parent_key": ("Bobs_burgers", mfp["code"]), + "name": "Rice, at farm (WFLDB 3.0)/IN U (read-only process)", + "reference product": "Rice, at farm (WFLDB 3.0)/IN U", + "type": "readonly_process", + "unit": "kg", + } + for key, value in expected.items(): + assert rice_process[key] == value + + straw_process = given["processes"][3] + expected = { + "comment": "Stuff happened, and then we enjoyed life outside the office", + "mf_parent_key": ("Bobs_burgers", mfp["code"]), + "name": "Rice straw, at farm (WFLDB 3.0)/IN U (read-only process)", + "reference product": "Rice straw, at farm (WFLDB 3.0)/IN U", + "type": "readonly_process", + } + for key, value in expected.items(): + assert straw_process[key] == value + + common = ( + "comment", + "data_entry", + "data_generator", + "database", + "location", + "mf_allocation_run_uuid", + "mf_strategy_label", + "publication_date", + "references", + "tags", + "simapro_project", + ) + for label in common: + assert rice_process[label] == mfp[label] + assert straw_process[label] == mfp[label] + + residue_product = given["products"][0] + expected = { + "comment": residue_process["comment"], + "database": residue_process["database"], + "line_no": 100, + "name": "Agricultural residues, non-mechanized, sun dried, at farm, 1 kg dry matter (WFLDB 3.0)/GLO U", + "properties": {"manual_allocation": 100.0}, + "simapro_project": residue_process["simapro_project"], + "tags": residue_process["tags"], + "type": "product", + "unit": "kg", + "waste_type": "Compost", + "category": "_WFLDB 3.0\\_sub-datasets\\Animal production\\Feed", + "reference process": (residue_process["database"], residue_process["code"]), + } + for key, value in expected.items(): + assert residue_product[key] == value + + expected = { + "allocation": 100, + "amount": 1, + "functional": True, + "input": (residue_product["database"], residue_product["code"]), + "type": "production", + "unit": "kg", + } + + exc = residue_process["exchanges"][-1] + for key, value in expected.items(): + assert exc[key] == value + + rice_product = given["products"][1] + expected = { + "comment": "INDIA", + "database": mfp["database"], + "line_no": 208, + "name": "Rice, at farm (WFLDB 3.0)/IN U", + "properties": {"manual_allocation": 95.8}, + "simapro_project": mfp["simapro_project"], + "tags": mfp["tags"], + "type": "product", + "unit": "kg", + "waste_type": "not defined", + "category": "_WFLDB 3.0\\Plant products\\Arable\\Rice", + "reference process": (mfp["database"], mfp["code"]), + } + for key, value in expected.items(): + assert rice_product[key] == value + + expected = { + "allocation": 95.8, + "amount": 6250, + "functional": True, + "input": (rice_product["database"], rice_product["code"]), + "type": "production", + "unit": "kg", + } + + exc = rice_process["exchanges"][-1] + for key, value in expected.items(): + assert exc[key] == value + + straw_product = given["products"][2] + expected = { + "comment": r"The amount of straw is calculated from the straw to grain-ratio of 1 and a straw harvest rate of 50%. Economic allocation is based on the assumption that grains account for 92% and straw for 8% of the price.", + "database": mfp["database"], + "line_no": 209, + "name": "Rice straw, at farm (WFLDB 3.0)/IN U", + "properties": {"manual_allocation": 4.2}, + "simapro_project": mfp["simapro_project"], + "tags": mfp["tags"], + "type": "product", + "unit": "kg", + "waste_type": "not defined", + "category": "_WFLDB 3.0\\Plant products\\Arable\\Rice", + "reference process": (mfp["database"], mfp["code"]), + } + for key, value in expected.items(): + assert straw_product[key] == value + + expected = { + "allocation": 4.2, + "amount": 3125, + "functional": True, + "input": (straw_product["database"], straw_product["code"]), + "type": "production", + "unit": "kg", + } + + exc = straw_process["exchanges"][-1] + for key, value in expected.items(): + assert exc[key] == value + + +def test_no_product_creation(fixtures_dir): + given = SimaProCSV(fixtures_dir / "allocation.csv").to_brightway(separate_products=False) + + assert len(given["processes"]) == 4 + assert len(given["products"]) == 0 diff --git a/tests/unit/test_brightway_utils.py b/tests/unit/test_brightway_utils.py new file mode 100644 index 0000000..d6b66e9 --- /dev/null +++ b/tests/unit/test_brightway_utils.py @@ -0,0 +1,105 @@ +from bw_simapro_csv.blocks import Process +from bw_simapro_csv.brightway import name_for_process + + +def test_name_for_process_given_name(): + class P(Process): + def __init__(self): + pass + + p = P() + p.parsed = {"metadata": {"Process name": "foo"}} + + assert name_for_process(p, "bar") == "foo" + + +def test_name_for_process_given_name_unspecified(): + class P(Process): + def __init__(self): + pass + + p = P() + p.blocks = {} + p.parsed = {"metadata": {"Process name": "unspecified"}} + assert name_for_process(p, "bar") == "bar" + + p.parsed = {"metadata": {"Process name": "UNSPECIFIED"}} + assert name_for_process(p, "bar") == "bar" + + +def test_name_for_process_products(): + class Dummy: + pass + + class P(Process): + def __init__(self): + pass + + o = Dummy() + o.parsed = [{"name": "foo"}, {"name": "bar"}] + + p = P() + p.blocks = {"Products": o} + p.parsed = {"metadata": {}} + + assert name_for_process(p, "no") == "MFP: foo⧺bar" + + +def test_name_for_process_products_clean_name(): + class Dummy: + pass + + class P(Process): + def __init__(self): + pass + + o = Dummy() + o.parsed = [ + {"name": r"Albacore, fillet, raw, at processing {FR} U"}, + {"name": r"Albacore, residues, raw, at processing {FR} U"}, + ] + + p = P() + p.blocks = {"Waste treatment": o} + p.parsed = {"metadata": {}} + + assert name_for_process(p, "no") == "MFP: Albacore, fillet, raw, at⧺Albacore, residues, raw" + + +def test_name_for_waste_treatment_products(): + class Dummy: + pass + + class P(Process): + def __init__(self): + pass + + o = Dummy() + o.parsed = [{"name": "foo"}, {"name": "bar"}] + + p = P() + p.blocks = {"Products": o} + p.parsed = {"metadata": {}} + + assert name_for_process(p, "no") == "MFP: foo⧺bar" + + +def test_name_for_waste_treatment_products_clean_name(): + class Dummy: + pass + + class P(Process): + def __init__(self): + pass + + o = Dummy() + o.parsed = [ + {"name": r"Albacore, fillet, raw, at processing {FR} U"}, + {"name": r"Albacore, residues, raw, at processing {FR} U"}, + ] + + p = P() + p.blocks = {"Waste treatment": o} + p.parsed = {"metadata": {}} + + assert name_for_process(p, "no") == "MFP: Albacore, fillet, raw, at⧺Albacore, residues, raw"