From 538d6aeee349f577746adfc8b136c670a21a9c4a Mon Sep 17 00:00:00 2001 From: Anthony Onwuli Date: Wed, 28 Aug 2024 20:07:12 +0100 Subject: [PATCH 01/15] Add composition utility module --- smact/utils/composition.py | 47 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 smact/utils/composition.py diff --git a/smact/utils/composition.py b/smact/utils/composition.py new file mode 100644 index 0000000..a5091e4 --- /dev/null +++ b/smact/utils/composition.py @@ -0,0 +1,47 @@ +"""Utility functioms for handling elements, species, formulas and composition""" +import re +from collections import defaultdict + + +# Adapted from ElementEmbeddings and Pymatgen +def parse_formula(formula: str) -> dict[str, int]: + """Parse a formula into a dict of el:amt + + Args: + formula (str): Chemical formula + + Returns: + dict: Dictionary of element symbol: amount + """ + regex = r"\(([^\(\)]+)\)\s*([\.e\d]*)" + r = re.compile(regex) + m = re.search(r, formula) + if m: + factor = 1.0 + if m.group(2) != "": + factor = float(m.group(2)) + unit_sym_dict = _get_sym_dict(m.group(1), factor) + expanded_sym = "".join( + [f"{el}{amt}" for el, amt in unit_sym_dict.items()] + ) + expanded_formula = formula.replace(m.group(), expanded_sym) + return parse_formula(expanded_formula) + return _get_sym_dict(formula, 1) + + +def _get_sym_dict(formula: str, factor: float) -> dict[str, float]: + sym_dict: dict[str, float] = defaultdict(float) + regex = r"([A-Z][a-z]*)\s*([-*\.e\d]*)" + r = re.compile(regex) + for m in re.finditer(r, formula): + el = m.group(1) + amt = 1.0 + if m.group(2).strip() != "": + amt = float(m.group(2)) + sym_dict[el] += amt * factor + formula = formula.replace(m.group(), "", 1) + if formula.strip(): + msg = f"{formula} is an invalid formula" + raise ValueError(msg) + + return sym_dict From e73ca254b4b0e38fe89f0159738fe22df854327f Mon Sep 17 00:00:00 2001 From: Anthony Onwuli Date: Wed, 28 Aug 2024 20:07:55 +0100 Subject: [PATCH 02/15] Add test for parsing formulas --- smact/tests/test_utils.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 smact/tests/test_utils.py diff --git a/smact/tests/test_utils.py b/smact/tests/test_utils.py new file mode 100644 index 0000000..7df1ebb --- /dev/null +++ b/smact/tests/test_utils.py @@ -0,0 +1,32 @@ +import unittest + +from smact.utils.composition import parse_formula + + +class TestComposition(unittest.TestCase): + """Test composition utilities""" + + def test_parse_formula(self): + formulas = ["Li10GeP2S12", "Mg0.5O0.5", "CaMg(CO3)2"] + + LGPS = parse_formula(formulas[0]) + self.assertIsInstance(LGPS, dict) + for el_sym, ammt in LGPS.items(): + self.assertIsInstance(el_sym, str) + self.assertIsInstance(ammt, float) + self.assertEqual(LGPS["Li"], 10) + self.assertEqual(LGPS["Ge"], 1) + self.assertEqual(LGPS["P"], 2) + self.assertEqual(LGPS["S"], 12) + + MgO = parse_formula(formulas[1]) + self.assertIsInstance(MgO, dict) + self.assertEqual(MgO["Mg"], 0.5) + self.assertEqual(MgO["O"], 0.5) + + dolomite = parse_formula(formulas[2]) + self.assertIsInstance(dolomite, dict) + self.assertEqual(dolomite["Ca"], 1) + self.assertEqual(dolomite["Mg"], 1) + self.assertEqual(dolomite["C"], 2) + self.assertEqual(dolomite["O"], 6) From 44927f5821fb43fbf4783c06ab24d967877531bd Mon Sep 17 00:00:00 2001 From: Anthony Onwuli Date: Wed, 28 Aug 2024 20:08:30 +0100 Subject: [PATCH 03/15] Remove parser function defined within a function --- smact/properties.py | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/smact/properties.py b/smact/properties.py index 8b2abf5..3d39247 100644 --- a/smact/properties.py +++ b/smact/properties.py @@ -6,6 +6,7 @@ import numpy as np import smact +from smact.utils.composition import parse_formula def eneg_mulliken(element: Union[smact.Element, str]) -> float: @@ -179,16 +180,6 @@ def valence_electron_count(compound: str) -> float: Raises: ValueError: If an element in the compound is not found in the valence data. """ - from typing import Dict - - def parse_formula(formula: str) -> Dict[str, int]: - pattern = re.compile(r"([A-Z][a-z]*)(\d*)") - elements = pattern.findall(formula) - element_stoich: Dict[str, int] = defaultdict(int) - for element, count in elements: - count = int(count) if count else 1 - element_stoich[element] += count - return element_stoich def get_element_valence(element: str) -> int: try: From bbf35d49fdb1ed50cf2a98681a21366eb91192ba Mon Sep 17 00:00:00 2001 From: Anthony Onwuli Date: Wed, 28 Aug 2024 20:11:33 +0100 Subject: [PATCH 04/15] Add contributing guide --- CONTRIBUTING.md | 51 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 CONTRIBUTING.md diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..58165ab --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,51 @@ +# Contributing + +This is a quick guide on how to follow best practice and contribute smoothly to `SMACT`. + +## Workflow + +We follow the [GitHub flow] +(), using +branches for new work and pull requests for verifying the work. + +The steps for a new piece of work can be summarised as follows: + +1. Push up or create [an issue](https://github.com/WMD-group/SMACT/issues). +2. Create a branch from main, with a sensible name that relates to the issue. +3. Do the work and commit changes to the branch. Push the branch + regularly to GitHub to make sure no work is accidentally lost. +4. Write or update unit tests for the code you work on. +5. When you are finished with the work, ensure that all of the unit + tests pass on your own machine. +6. Open a pull request [on the pull request page](https://github.com/WMD-group/SMACT/pulls). +7. If nobody acknowledges your pull request promptly, feel free to poke one of the main developers into action. + +## Pull requests + +For a general overview of using pull requests on GitHub look [in the GitHub docs](https://help.github.com/en/articles/about-pull-requests). + +When creating a pull request you should: + +- Ensure that the title succinctly describes the changes so it is easy to read on the overview page +- Reference the issue which the pull request is closing + +Recommended reading: [How to Write the Perfect Pull Request](https://github.blog/2015-01-21-how-to-write-the-perfect-pull-request/) + +## Dev requirements + +When developing locally, it is recommended to install the python packages in `requirements-dev.txt`. + +```bash +pip install -r requirements-dev.txt +``` + +This will allow you to run the tests locally with pytest as described in the main README, +as well as run pre-commit hooks to automatically format python files with isort and black. +To install the pre-commit hooks (only needs to be done once): + +```bash +pre-commit install +pre-commit run --all-files # optionally run hooks on all files +``` + +Pre-commit hooks will check all files when you commit changes, automatically fixing any files which are not formatted correctly. Those files will need to be staged again before re-attempting the commit. From 6dbda8b1cfdaa35860d5c02a9326dc5e0e5a39c1 Mon Sep 17 00:00:00 2001 From: Anthony Onwuli Date: Thu, 29 Aug 2024 11:48:55 +0100 Subject: [PATCH 05/15] Add form_maker and comp_maker to utils --- smact/utils/composition.py | 48 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/smact/utils/composition.py b/smact/utils/composition.py index a5091e4..284fd59 100644 --- a/smact/utils/composition.py +++ b/smact/utils/composition.py @@ -2,6 +2,10 @@ import re from collections import defaultdict +from pymatgen.core import Composition + +from smact.structure_prediction.utilities import unparse_spec + # Adapted from ElementEmbeddings and Pymatgen def parse_formula(formula: str) -> dict[str, int]: @@ -45,3 +49,47 @@ def _get_sym_dict(formula: str, factor: float) -> dict[str, float]: raise ValueError(msg) return sym_dict + + +def comp_maker( + smact_filter_output: tuple[str, int, int] | tuple[str, int] +) -> Composition: + """Convert an output of smact.screening.smact_filer into a Pymatgen Compositions + + Args: + smact_filter_output (tuple[str, int, int]|tuple[str, int]): An item in the list returned from smact_filter + + Returns: + composition (pymatgen.core.Composition): An instance of the Composition class + """ + if len(smact_filter_output) == 2: + form = [] + for el, ammt in zip(smact_filter_output[0], smact_filter_output[-1]): + form.append(el) + form.append(ammt) + form = "".join(str(e) for e in form) + else: + form = {} + for el, ox, ammt in zip( + smact_filter_output[0], + smact_filter_output[1], + smact_filter_output[2], + ): + sp = unparse_spec((el, ox)) + form[sp] = ammt + return Composition(form) + + +def formula_maker( + smact_filter_output: tuple[str, int, int] | tuple[str, int] +) -> str: + """Convert an output of smact.screening.smact_filter into a formula. + + Args: + smact_filter_output (tuple[str, int, int]|tuple[str, int]): An item in the list returned from smact_filter + + Returns: + formula (str): A formula + + """ + return comp_maker(smact_filter_output).reduced_formula From df08b83b2061e12efbb5075f47f6d4ba312e6dd1 Mon Sep 17 00:00:00 2001 From: Anthony Onwuli Date: Thu, 29 Aug 2024 11:49:10 +0100 Subject: [PATCH 06/15] Add tests for comp_maker and form_maker --- smact/tests/test_utils.py | 46 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/smact/tests/test_utils.py b/smact/tests/test_utils.py index 7df1ebb..aad99f6 100644 --- a/smact/tests/test_utils.py +++ b/smact/tests/test_utils.py @@ -1,12 +1,29 @@ import unittest -from smact.utils.composition import parse_formula +from pymatgen.core import Composition + +from smact import Element +from smact.screening import smact_filter +from smact.utils.composition import comp_maker, formula_maker, parse_formula class TestComposition(unittest.TestCase): """Test composition utilities""" + def setUp(self) -> None: + self.mock_filter_output = [ + (("Fe", "O"), (2, -2), (1, 1)), + (("Fe", "O"), (1, 1)), + (("Fe", "Fe", "O"), (2, 3, -2), (1, 2, 4)), + ] + self.smact_filter_output = smact_filter( + els=[Element("Li"), Element("Ge"), Element("P"), Element("S")], + stoichs=[[10], [1], [2], [12]], + ) + def test_parse_formula(self): + """Test the parse_formula function""" + formulas = ["Li10GeP2S12", "Mg0.5O0.5", "CaMg(CO3)2"] LGPS = parse_formula(formulas[0]) @@ -30,3 +47,30 @@ def test_parse_formula(self): self.assertEqual(dolomite["Mg"], 1) self.assertEqual(dolomite["C"], 2) self.assertEqual(dolomite["O"], 6) + + def test_comp_maker(self): + """Test the comp_maker function""" + comp1 = comp_maker(self.mock_filter_output[0]) + comp2 = comp_maker(self.mock_filter_output[1]) + comp3 = comp_maker(self.mock_filter_output[2]) + comp4 = comp_maker(self.smact_filter_output[1]) + for comp in [comp1, comp2, comp3, comp4]: + self.assertIsInstance(comp, Composition) + self.assertEqual(Composition("FeO"), comp2) + self.assertEqual(Composition({"Fe2+": 1, "O2-": 1}), comp1) + self.assertEqual(Composition({"Fe2+": 1, "Fe3+": 2, "O2-": 4}), comp3) + self.assertEqual( + Composition({"Li+": 10, "Ge4+": 1, "P5+": 2, "S2-": 12}), comp4 + ) + + def test_formula_maker(self): + """Test the formula_maker function""" + form1 = formula_maker(self.mock_filter_output[0]) + form2 = formula_maker(self.mock_filter_output[1]) + form3 = formula_maker(self.mock_filter_output[2]) + form4 = formula_maker(self.smact_filter_output[1]) + self.assertEqual(form1, "FeO") + self.assertEqual(form2, "FeO") + self.assertEqual(form1, form2) + self.assertEqual(form3, "Fe3O4") + self.assertEqual(form4, "Li10Ge(PS6)2") From 508f78af9f51d52b73af89eba6c60975d990058f Mon Sep 17 00:00:00 2001 From: Anthony Onwuli Date: Thu, 29 Aug 2024 11:49:20 +0100 Subject: [PATCH 07/15] Fix link --- CONTRIBUTING.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 58165ab..7298200 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -4,8 +4,7 @@ This is a quick guide on how to follow best practice and contribute smoothly to ## Workflow -We follow the [GitHub flow] -(), using +We follow the [GitHub flow](), using branches for new work and pull requests for verifying the work. The steps for a new piece of work can be summarised as follows: From 92fca9fe40a5974fd48e561d19381652e70bea0c Mon Sep 17 00:00:00 2001 From: Anthony Onwuli Date: Thu, 29 Aug 2024 12:21:51 +0100 Subject: [PATCH 08/15] Add __future__ import --- smact/utils/composition.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/smact/utils/composition.py b/smact/utils/composition.py index 284fd59..1b04119 100644 --- a/smact/utils/composition.py +++ b/smact/utils/composition.py @@ -1,4 +1,6 @@ """Utility functioms for handling elements, species, formulas and composition""" +from __future__ import annotations + import re from collections import defaultdict From 4bf54cef8fb36fa1a559d356a1b12cd3ce51a028 Mon Sep 17 00:00:00 2001 From: Anthony Onwuli Date: Thu, 29 Aug 2024 12:22:17 +0100 Subject: [PATCH 09/15] Correct the type hint --- smact/utils/composition.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/smact/utils/composition.py b/smact/utils/composition.py index 1b04119..b833bc0 100644 --- a/smact/utils/composition.py +++ b/smact/utils/composition.py @@ -10,7 +10,7 @@ # Adapted from ElementEmbeddings and Pymatgen -def parse_formula(formula: str) -> dict[str, int]: +def parse_formula(formula: str) -> dict[str, float]: """Parse a formula into a dict of el:amt Args: From 87e4f612144cf92b444ae0fe6097b04503e2aec0 Mon Sep 17 00:00:00 2001 From: Anthony Onwuli Date: Thu, 29 Aug 2024 13:37:40 +0100 Subject: [PATCH 10/15] Add __init__.py in utils --- smact/utils/__init__.py | 1 + 1 file changed, 1 insertion(+) create mode 100644 smact/utils/__init__.py diff --git a/smact/utils/__init__.py b/smact/utils/__init__.py new file mode 100644 index 0000000..bf0ca3f --- /dev/null +++ b/smact/utils/__init__.py @@ -0,0 +1 @@ +"""Utility functions for SMACT.""" \ No newline at end of file From 7a8a13b4c9e3d5f62b84901e1c6e7989bfe32d9d Mon Sep 17 00:00:00 2001 From: Anthony Onwuli Date: Thu, 29 Aug 2024 13:37:54 +0100 Subject: [PATCH 11/15] Run pre-commit --- smact/utils/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/smact/utils/__init__.py b/smact/utils/__init__.py index bf0ca3f..bdf9739 100644 --- a/smact/utils/__init__.py +++ b/smact/utils/__init__.py @@ -1 +1 @@ -"""Utility functions for SMACT.""" \ No newline at end of file +"""Utility functions for SMACT.""" From b06704808cd76005303c0beff0f7231b6048e9be Mon Sep 17 00:00:00 2001 From: Anthony Onwuli Date: Thu, 29 Aug 2024 13:40:54 +0100 Subject: [PATCH 12/15] Add utils to packages in setup.py --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 421b713..1b786a3 100644 --- a/setup.py +++ b/setup.py @@ -32,6 +32,7 @@ license="MIT", packages=[ "smact", + "smact.utils", "smact.tests", "smact.structure_prediction", "smact.dopant_prediction", From e34abb8e19e437014ed951ac8a6ad0428fac6afb Mon Sep 17 00:00:00 2001 From: Anthony Onwuli Date: Thu, 29 Aug 2024 14:22:32 +0100 Subject: [PATCH 13/15] Add docs for utils --- docs/smact.utils.compositions.rst | 9 +++++++++ docs/smact.utils.rst | 11 +++++++++++ 2 files changed, 20 insertions(+) create mode 100644 docs/smact.utils.compositions.rst create mode 100644 docs/smact.utils.rst diff --git a/docs/smact.utils.compositions.rst b/docs/smact.utils.compositions.rst new file mode 100644 index 0000000..68f87a7 --- /dev/null +++ b/docs/smact.utils.compositions.rst @@ -0,0 +1,9 @@ +SMACT Utilities Composition Module +===================================== + +Miscellaneous utilities for composition handling + +.. automodule:: smact.utils.composition + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/smact.utils.rst b/docs/smact.utils.rst new file mode 100644 index 0000000..06d0a86 --- /dev/null +++ b/docs/smact.utils.rst @@ -0,0 +1,11 @@ +SMACT Utilities module +=========================== + +The utilities module provides some utilty functions to support the core functionalities of SMACT + +Submodules +---------- + +.. toctree:: + + smact.utils.composition From 487247a11482e5538addb7b11231897d89ab6749 Mon Sep 17 00:00:00 2001 From: Anthony Onwuli Date: Thu, 29 Aug 2024 14:23:08 +0100 Subject: [PATCH 14/15] Fix name --- .../{smact.utils.compositions.rst => smact.utils.composition.rst} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename docs/{smact.utils.compositions.rst => smact.utils.composition.rst} (100%) diff --git a/docs/smact.utils.compositions.rst b/docs/smact.utils.composition.rst similarity index 100% rename from docs/smact.utils.compositions.rst rename to docs/smact.utils.composition.rst From 83507d80ed6b163cca8dfe2843bc167d9d482fc1 Mon Sep 17 00:00:00 2001 From: Anthony Onwuli Date: Thu, 29 Aug 2024 15:05:30 +0100 Subject: [PATCH 15/15] Add utils to docs --- docs/smact.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/smact.rst b/docs/smact.rst index 215effd..8278377 100644 --- a/docs/smact.rst +++ b/docs/smact.rst @@ -25,6 +25,7 @@ Submodules smact.structure_prediction smact.dopant_prediction + smact.utils smact.properties smact.screening smact.oxidation_states