Merge pull request #192 from hjkgrp/additional_tests

Additional tests
hjkgrp · Feb 26, 2024 · 292cc44 · 292cc44
2 parents 9481fd8 + e8dbfdc
commit 292cc44
Show file tree

Hide file tree

Showing 66 changed files with 3,871 additions and 1,200 deletions.
diff --git a/molSimplify/Informatics/MOF/.ipynb_checkpoints/MOF_RAC_example-checkpoint.ipynb b/molSimplify/Informatics/MOF/.ipynb_checkpoints/MOF_RAC_example-checkpoint.ipynb
diff --git a/molSimplify/Informatics/MOF/MOF_descriptors.py b/molSimplify/Informatics/MOF/MOF_descriptors.py
diff --git a/molSimplify/Informatics/MOF/MOF_descriptors_alternate_functional.py b/molSimplify/Informatics/MOF/MOF_descriptors_alternate_functional.py
@@ -23,11 +23,15 @@
 # This MOF RAC generator assumes that pymatgen is installed.                            #
 # Pymatgen is used to get the primitive cell.                                           #
 #########################################################################################
-from pymatgen.io.cif import CifParser
+
+
 def get_primitive(datapath, writepath):
+    from pymatgen.io.cif import CifParser
     s = CifParser(datapath, occupancy_tolerance=1).get_structures()[0]
     sprim = s.get_primitive_structure()
-    sprim.to("cif",writepath)
+    sprim.to("cif", writepath)
+
+
 '''<<<< END OF CODE TO COMPUTE PRIMITIVE UNIT CELLS >>>>'''
 
 #########################################################################################

diff --git a/molSimplify/Informatics/MOF/MOF_descriptors_alternate_functional_2.py b/molSimplify/Informatics/MOF/MOF_descriptors_alternate_functional_2.py
diff --git a/molSimplify/Informatics/MOF/cluster_extraction.py b/molSimplify/Informatics/MOF/cluster_extraction.py
@@ -19,10 +19,10 @@
 # This MOF RAC generator assumes that pymatgen is installed.                            #
 # Pymatgen is used to get the primitive cell.                                           #
 #########################################################################################
-from pymatgen.io.cif import CifParser
 
 
 def get_primitive(datapath, writepath):
+    from pymatgen.io.cif import CifParser
     s = CifParser(datapath, occupancy_tolerance=1).get_structures()[0]
     sprim = s.get_primitive_structure()
     sprim.to("cif", writepath)

diff --git a/molSimplify/Scripts/io.py b/molSimplify/Scripts/io.py
@@ -115,12 +115,6 @@ def readdict(fname):
 
 
 def readdict_sub(fname):
-    # Constructor
-    #  @param self The object pointer
-    #  @param subname The name of the substrate
-    class substrate:
-        def __init__(self, subname):
-            self.subname = subname
     d = dict()
     with open(fname, 'r') as f:
         txt = f.read()
@@ -137,8 +131,7 @@ def __init__(self, subname):
                     vv.append(vvs)
                 else:
                     vv += vvs
-            # dict keys are instances of the substrate class
-            d[substrate(key)] = vv
+            d[key] = vv
     return d
 
 # Get ligands in dictionary

diff --git a/molSimplify/__main__.py b/molSimplify/__main__.py
@@ -166,7 +166,7 @@ def main(args=None):
             parser = argparse.ArgumentParser(description=DescString_basic,
                                              formatter_class=argparse.RawDescriptionHelpFormatter)
             parseinputs_basic(parser)
-        exit()
+        return
     # ## run with gui ###
     elif gui and len(args) == 0:
         print('molSimplify is starting!')

diff --git a/tests/informatics/test_MOF_descriptors.py b/tests/informatics/test_MOF_descriptors.py
@@ -0,0 +1,128 @@
+import pytest
+import json
+import numpy as np
+import pandas as pd
+from molSimplify.Informatics.MOF.MOF_descriptors import get_MOF_descriptors
+from molSimplify.utils.timer import DebugTimer
+
+
+@pytest.fixture
+def ref_names():
+    def RACs_names(depth=3, Gval=True, alpha=True):
+
+        def generate_names(starts, properties, depth, scope="all"):
+            names = []
+            for start in starts:
+                for prop in properties:
+                    for d in range(depth + 1):
+                        if scope is None:
+                            names.append(f"{start}-{prop}-{d}")
+                        else:
+                            names.append(f"{start}-{prop}-{d}-{scope}")
+            return names
+
+        properties = ["chi", "Z", "I", "T", "S"]
+        if Gval:
+            properties.append("Gval")
+
+        names = generate_names(["f", "mc", "D_mc"], properties, depth)
+        # f-lig does not include the "scope"
+        names.extend(generate_names(["f-lig"], properties, depth, scope=None))
+
+        # Same for the starts that include the additional property alpha
+        if alpha:
+            properties.append("alpha")
+        names.extend(
+            generate_names(["lc", "D_lc", "func", "D_func"], properties, depth))
+        return names
+    return RACs_names
+
+
+@pytest.mark.parametrize(
+    "name",
+    [
+        "odac-21383",
+        "odac-21433",
+        "odac-21478",
+        "odac-21735",
+        "odac-21816",
+    ])
+def test_get_MOF_descriptors_ODAC(resource_path_root, tmpdir, name, ref_names):
+    # NOTE All the .cif files were converted to primitive unit cell using the
+    # MOF_descriptors.get_primitive() function
+
+    with DebugTimer("get_MOF_descriptors()"):
+        full_names, full_descriptors = get_MOF_descriptors(
+            str(resource_path_root / "inputs" / "cif_files" / f"{name}.cif"),
+            depth=3,
+            path=str(tmpdir),
+            xyzpath=str(tmpdir / "test.xyz"),
+            Gval=True,
+        )
+
+    with open(resource_path_root / "refs" / "MOF_descriptors"
+              / name / f"{name}.json", "r") as fin:
+        ref = json.load(fin)
+
+    assert full_names == ref_names()
+    np.testing.assert_allclose(full_descriptors, ref["descriptors"], atol=1e-6)
+
+    lc_descriptors = pd.read_csv(tmpdir / "lc_descriptors.csv")
+    lc_ref = pd.read_csv(resource_path_root / "refs" / "MOF_descriptors" / name / "lc_descriptors.csv")
+    assert all(lc_descriptors == lc_ref)
+
+    sbu_descriptors = pd.read_csv(tmpdir / "sbu_descriptors.csv")
+    sbu_ref = pd.read_csv(resource_path_root / "refs" / "MOF_descriptors" / name / "sbu_descriptors.csv")
+    assert all(sbu_descriptors == sbu_ref)
+
+
+@pytest.mark.parametrize(
+    "name",
+    [
+        "FOKYIP_clean",
+        "SETDUS_clean",
+        "UXUPEK_clean",
+        "NEXXIZ_clean",
+        # "ETECIR_clean",  TODO: Figure out why these two example do not work!
+        # "FAVGUH_clean",  Disagreement on all ligand center RACs
+        "YICDAR_clean",
+        "VONBIK_clean",
+    ])
+def test_get_MOF_descriptors_JACS(resource_path_root, tmpdir, name, ref_names):
+    """
+    Tests a handful of the MOFs used in
+    Nandy et al., J. Am. Chem. Soc. 2021, 143, 42, 17535-17547
+    https://doi.org/10.1021/jacs.1c07217
+    """
+    # NOTE All the .cif files were converted to primitive unit cell using the
+    # MOF_descriptors.get_primitive() function
+
+    with DebugTimer("get_MOF_descriptors()"):
+        full_names, full_descriptors = get_MOF_descriptors(
+            str(resource_path_root / "inputs" / "cif_files" / f"{name}.cif"),
+            depth=3,
+            path=str(tmpdir),
+            xyzpath=str(tmpdir / "test.xyz"),
+            Gval=False,
+        )
+
+    with open(resource_path_root / "refs" / "MOF_descriptors"
+              / name / f"{name}.json", "r") as fin:
+        ref = json.load(fin)
+
+    # For now we are using a workaround because polarization descriptors
+    # are now added by default.
+    # Here they should be compared to ref_names(Gval=False, alpha=False)
+    assert full_names == ref_names(Gval=False, alpha=True)
+    np.testing.assert_allclose(
+        # Get only the subset of descriptors without the property alpha
+        [d for d, n in zip(full_descriptors, full_names) if "alpha" not in n],
+        ref["descriptors"], atol=1e-6)
+
+    lc_descriptors = pd.read_csv(tmpdir / "lc_descriptors.csv")
+    lc_ref = pd.read_csv(resource_path_root / "refs" / "MOF_descriptors" / name / "lc_descriptors.csv")
+    assert all(lc_descriptors == lc_ref)
+
+    sbu_descriptors = pd.read_csv(tmpdir / "sbu_descriptors.csv")
+    sbu_ref = pd.read_csv(resource_path_root / "refs" / "MOF_descriptors" / name / "sbu_descriptors.csv")
+    assert all(sbu_descriptors == sbu_ref)
diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -0,0 +1,20 @@
+import pytest
+from helperFuncs import compare_report_new
+from molSimplify.__main__ import main
+
+
+def test_main_empty(tmpdir, resource_path_root):
+    main(args=[f"-rundir {tmpdir}"])
+    compare_report_new(
+        tmpdir / "fe_oct_2_water_6_s_5" / "fe_oct_2_water_6_s_5_conf_1" /
+        "fe_oct_2_water_6_s_5_conf_1.report",
+        resource_path_root / "refs" / "test_cli" /
+        "fe_oct_2_water_6_s_5_conf_1.report")
+
+
+@pytest.mark.skip("Test for help not working yet.")
+def test_help(capsys):
+    main(args=["--help",])
+    captured = capsys.readouterr()
+    print(captured.out)
+    assert "Welcome to molSimplify. Only basic usage is described here." in captured.out
diff --git a/tests/test_geometry.py b/tests/test_geometry.py
@@ -0,0 +1,44 @@
+import numpy as np
+from molSimplify.Scripts.geometry import (norm,
+                                          normalize,
+                                          checkplanar,
+                                          dihedral,
+                                          )
+from molSimplify.Classes.mol3D import mol3D
+from molSimplify.Classes.atom3D import atom3D
+
+
+def test_norm():
+    v = [1.2, -.2, 0.8]
+
+    assert abs(norm(v) - np.linalg.norm(v)) < 1e-6
+
+
+def test_normalize():
+    v = [1.8, 0.6, -1.8]
+    v_norm = normalize(v)
+
+    np.testing.assert_allclose(v_norm, np.array(v)/np.linalg.norm(v), atol=1e-6)
+
+
+def test_checkplanar():
+    a1 = [0.0, 0.0, 0.0]
+    a2 = [1.2, 0.6, 1.6]
+    a3 = [-1.1, 0.3, 0.8]
+    a4 = [0.4, -1.2, -0.3]
+
+    assert not checkplanar(a1, a2, a3, a4)
+    # Construct a set four point in plane with the first 3
+    a4 = [0.1, 0.9, 2.4]
+    assert checkplanar(a1, a2, a3, a4)
+
+
+def test_dihedral():
+    mol = mol3D()
+    mol.addAtom(atom3D(Sym='X', xyz=[0.5, 0.0, 1.2]))
+    mol.addAtom(atom3D(Sym='X', xyz=[0.0, 0.0, 0.0]))
+    mol.addAtom(atom3D(Sym='X', xyz=[0.0, 0.0, 1.0]))
+    mol.addAtom(atom3D(Sym='X', xyz=[0.5, 0.5, -0.2]))
+
+    d = dihedral(mol, 0, 1, 2, 3)
+    assert abs(d - 45.0) < 1e-6
diff --git a/tests/test_io.py b/tests/test_io.py
@@ -1,5 +1,11 @@
 import numpy as np
-from molSimplify.Scripts.io import lig_load
+from molSimplify.Scripts.io import (lig_load,
+                                    core_load,
+                                    printgeoms,
+                                    getsubstrates,
+                                    readdict_sub,
+                                    )
+from importlib_resources import files as resource_files
 
 
 def test_lig_load(resource_path_root):
@@ -18,3 +24,60 @@ def test_lig_load(resource_path_root):
     print(ref.coordsvect())
     np.testing.assert_allclose(mol.coordsvect(), ref.coordsvect())
     assert mol.charge == ref.charge
+
+
+def test_core_load():
+    file = str(resource_files("molSimplify").joinpath("Cores/ferrcore.xyz"))
+    core, emsg = core_load(file)
+    # Assert that the error message is empty
+    assert not emsg
+    core.convert2mol3D()
+    assert core.make_formula(latex=False) == "Fe1F1C10H9"
+
+    file = str(resource_files("molSimplify").joinpath("Cores/ferrocene.mol"))
+    core, emsg = core_load(file)
+    # Assert that the error message is empty
+    assert not emsg
+    core.convert2mol3D()
+    assert core.make_formula(latex=False) == "Fe1C10H10"
+
+
+def test_printgeoms(capsys):
+    printgeoms()
+    captured = capsys.readouterr()
+
+    ref = (
+        "Coordination: 1, geometry: none,\t short name: no \n"
+        "Coordination: 2, geometry: linear,\t short name: li \n"
+        "Coordination: 3, geometry: trigonal_planar,\t short name: tpl \n"
+        "Coordination: 4, geometry: square_planar,\t short name: sqp \n"
+        "Coordination: 4, geometry: tetrahedral,\t short name: thd \n"
+        "Coordination: 5, geometry: square_pyramidal,\t short name: spy \n"
+        "Coordination: 5, geometry: trigonal_bipyramidal,\t short name: tbp \n"
+        "Coordination: 6, geometry: octahedral,\t short name: oct \n"
+        "Coordination: 6, geometry: trigonal_prismatic,\t short name: tpr \n"
+        "Coordination: 7, geometry: pentagonal_bipyramidal,\t short name: pbp \n"
+        "Coordination: 8, geometry: square_antiprismatic,\t short name: sqap \n\n"
+    )
+    assert captured.out == ref
+
+
+def test_readdict_sub():
+    file = resource_files("molSimplify").joinpath("Substrates/substrates.dict")
+    sub_dict = readdict_sub(file)
+    assert sub_dict["methane"] == ['methane.xyz', 'ch4', '1', ['inter'],
+                                   ['N'], ['0', '#', 'BDH', '=', '104.9(0.1)']]
+    assert sub_dict["ethane"] == ['ethane.xyz', 'c2h6', '2', ['inter'],
+                                  ['B'], ['0', '#', 'BDH', '=', '101.1(0.4)']]
+
+
+def test_getsubstrates():
+    subs = getsubstrates()
+    ref = (
+        "acetaldehyde acetylene benzene biphenyl bromobenzene cumene "
+        "cyclohexene dha diphenylmethane estrogen ethanal ethane ethene "
+        "ethylene fluorene formaldehyde formicacid iodobenzene methanal "
+        "methane methanoicacid methanol methylazide n-quinolinylbutyramidate "
+        "n2 phenyl propane propene propylene propyne tert-butane toluene triazole xanthene"
+        )
+    assert subs == ref
diff --git a/tests/test_ligand_assign.py b/tests/test_ligand_assign.py
@@ -0,0 +1,58 @@
+from molSimplify.Classes.ligand import ligand_assign, ligand_breakdown
+from molSimplify.Classes.mol3D import mol3D
+
+
+def test_six_monodentate(resource_path_root):
+    xyz_file = (resource_path_root / "inputs" / "ligand_assign_consistent"
+                / "fe_water_ammonia_carbonyl_formaldehyde_hydrogensulfide_hydrocyanide.xyz")
+    mol = mol3D()
+    mol.readfromxyz(xyz_file)
+
+    liglist, ligdents, ligcons = ligand_breakdown(mol, BondedOct=True)
+    (ax_ligand_list, eq_ligand_list, ax_natoms_list, eq_natoms_list,
+     ax_con_int_list, eq_con_int_list, ax_con_list, eq_con_list,
+     built_ligand_list) = ligand_assign(mol, liglist, ligdents, ligcons)
+    # Expecting:
+    # ax_ligands: ['water', 'carbonyl']
+    # eq_ligands: ['hydrogensulfide', 'ammonia', 'hydrocyanide', 'formaldehyde']
+
+    ax_formulas = [lig.mol.make_formula(latex=False) for lig in ax_ligand_list]
+    assert ax_formulas == ['S1H2', 'N1C1H1']
+    eq_formulas = [lig.mol.make_formula(latex=False) for lig in eq_ligand_list]
+    assert eq_formulas == ['O1H2', 'N1H3', 'O1C1', 'O1C1H2']
+
+    assert ax_natoms_list == [3, 3]
+    assert eq_natoms_list == [3, 4, 2, 4]
+
+    assert ax_con_int_list == [[0], [1]]
+    assert eq_con_int_list == [[0], [0], [0], [1]]
+
+    assert ax_con_list == [[14], [18]]
+    assert eq_con_list == [[1], [4], [8], [11]]
+
+
+def test_triple_bidentate(resource_path_root):
+    xyz_file = (resource_path_root / "inputs" / "ligand_assign_consistent"
+                / "fe_acac_bipy_bipy.xyz")
+    mol = mol3D()
+    mol.readfromxyz(xyz_file)
+
+    liglist, ligdents, ligcons = ligand_breakdown(mol, BondedOct=True)
+    (ax_ligand_list, eq_ligand_list, ax_natoms_list, eq_natoms_list,
+     ax_con_int_list, eq_con_int_list, ax_con_list, eq_con_list,
+     built_ligand_list) = ligand_assign(mol, liglist, ligdents, ligcons)
+
+    print(ax_ligand_list, eq_ligand_list)
+    ax_formulas = [lig.mol.make_formula(latex=False) for lig in ax_ligand_list]
+    assert ax_formulas == ['O2C5H7']
+    eq_formulas = [lig.mol.make_formula(latex=False) for lig in eq_ligand_list]
+    assert eq_formulas == ['N2C10H8', 'N2C10H8']
+
+    assert ax_natoms_list == [14]
+    assert eq_natoms_list == [20, 20]
+
+    assert ax_con_int_list == [[0, 5]]
+    assert eq_con_int_list == [[0, 1], [0, 1]]
+
+    assert ax_con_list == [[1, 6]]
+    assert eq_con_list == [[15, 16], [35, 36]]