From abebd386395b3d61956da4ea5a4d546e7299fac6 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Tue, 16 Jul 2024 21:56:57 +0200
Subject: [PATCH 1/6] make sure graph hash is always annotated

---
 polyply/src/generate_templates.py        | 1 +
 polyply/tests/test_generate_templates.py | 4 ++++
 2 files changed, 5 insertions(+)

diff --git a/polyply/src/generate_templates.py b/polyply/src/generate_templates.py
index ca7df079..0dd06fd2 100644
--- a/polyply/src/generate_templates.py
+++ b/polyply/src/generate_templates.py
@@ -36,6 +36,7 @@ def _extract_template_graphs(meta_molecule, template_graphs={}, skip_filter=Fals
             resname = meta_molecule.nodes[node]["resname"]
             graph = meta_molecule.nodes[node]["graph"]
             graph_hash = nx.algorithms.graph_hashing.weisfeiler_lehman_graph_hash(graph, node_attr='atomname')
+            meta_molecule.nodes[node]["template"] = graph_hash
             if resname in template_graphs:
                 template_graphs[graph_hash] = graph
                 del template_graphs[resname]
diff --git a/polyply/tests/test_generate_templates.py b/polyply/tests/test_generate_templates.py
index 7e84af88..0e93ccee 100644
--- a/polyply/tests/test_generate_templates.py
+++ b/polyply/tests/test_generate_templates.py
@@ -342,3 +342,7 @@ def test_extract_template_graphs(example_meta_molecule, resnames, gen_template_g
         graph_hash = nx.algorithms.graph_hashing.weisfeiler_lehman_graph_hash(graph, node_attr='atomname')
         templated = list(nx.get_node_attributes(unique_graphs[graph_hash], 'template').values())
         assert all(templated)
+
+    # assert that all nodes have the template attribute
+    for node in example_meta_molecule.nodes:
+        assert example_meta_molecule.nodes[node].get('template', False)

From 8f67cc0fb7149771ba95e4c68d12dadb24949152 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Wed, 17 Jul 2024 18:40:07 +0200
Subject: [PATCH 2/6] fix case where template coordinates are provided

---
 polyply/src/build_file_parser.py         | 13 ++++++++-----
 polyply/src/check_residue_equivalence.py |  8 ++++----
 polyply/tests/test_generate_templates.py |  8 ++------
 3 files changed, 14 insertions(+), 15 deletions(-)

diff --git a/polyply/src/build_file_parser.py b/polyply/src/build_file_parser.py
index 0895f64e..ce39977c 100644
--- a/polyply/src/build_file_parser.py
+++ b/polyply/src/build_file_parser.py
@@ -152,6 +152,7 @@ def _template_atoms(self, line, lineno=0):
         node_name, atype = tokens[0], tokens[1]
         position = np.array(tokens[2:], dtype=float)
         self.current_template.add_node(node_name,
+                                       atomname=node_name,
                                        atype=atype,
                                        position=position)
 
@@ -200,14 +201,16 @@ def finalize_section(self, previous_section, ended_section):
             # if the volume is not defined yet compute the volume, this still
             # can be overwritten by an explicit volume directive later
             resname = self.current_template.name
-            if resname not in self.topology.volumes:
-                self.topology.volumes[resname] = compute_volume(self.current_template,
-                                                                coords,
-                                                                self.topology.nonbond_params,)
+            graph_hash = nx.algorithms.graph_hashing.weisfeiler_lehman_graph_hash(self.current_template,
+                                                                                  node_attr='atomname')
+            if graph_hash not in self.topology.volumes:
+                self.topology.volumes[graph_hash] = compute_volume(self.current_template,
+                                                                   coords,
+                                                                   self.topology.nonbond_params,)
             # internally a template is defined as vectors from the
             # center of geometry
             mapped_coords = map_from_CoG(coords)
-            self.templates[resname] = mapped_coords
+            self.templates[graph_hash] = mapped_coords
             self.current_template = None
 
     def finalize(self, lineno=0):
diff --git a/polyply/src/check_residue_equivalence.py b/polyply/src/check_residue_equivalence.py
index 527f2045..57d2c9e2 100644
--- a/polyply/src/check_residue_equivalence.py
+++ b/polyply/src/check_residue_equivalence.py
@@ -62,10 +62,10 @@ def group_residues_by_hash(meta_molecule, template_graphs={}):
     dict[`:class:nx.Graph`]
         keys are the hash of the graph
     """
-    unique_graphs = {}
-    for graph in template_graphs.values():
-        graph_hash = nx.algorithms.graph_hashing.weisfeiler_lehman_graph_hash(graph, node_attr='atomname')
-        unique_graphs[graph_hash] = graph
+    unique_graphs = template_graphs
+#    for graph in template_graphs.values():
+#        graph_hash = nx.algorithms.graph_hashing.weisfeiler_lehman_graph_hash(graph, node_attr='atomname')
+#        unique_graphs[graph_hash] = graph
 
     for node in meta_molecule.nodes:
         graph = meta_molecule.nodes[node]["graph"]
diff --git a/polyply/tests/test_generate_templates.py b/polyply/tests/test_generate_templates.py
index 0e93ccee..1adf2c1e 100644
--- a/polyply/tests/test_generate_templates.py
+++ b/polyply/tests/test_generate_templates.py
@@ -330,7 +330,8 @@ def test_extract_template_graphs(example_meta_molecule, resnames, gen_template_g
     for node in gen_template_graphs:
         graph = example_meta_molecule.nodes[node]['graph']
         nx.set_node_attributes(graph, True, 'template')
-        template_graphs[example_meta_molecule.nodes[node]['resname']] = graph
+        graph_hash = nx.algorithms.graph_hashing.weisfeiler_lehman_graph_hash(graph, node_attr='atomname')
+        template_graphs[graph_hash] = None
 
     # perfrom the grouping
     unique_graphs = _extract_template_graphs(example_meta_molecule, template_graphs, skip_filter)
@@ -338,11 +339,6 @@ def test_extract_template_graphs(example_meta_molecule, resnames, gen_template_g
     # check the outcome
     assert len(unique_graphs) == 2
 
-    for graph in template_graphs.values():
-        graph_hash = nx.algorithms.graph_hashing.weisfeiler_lehman_graph_hash(graph, node_attr='atomname')
-        templated = list(nx.get_node_attributes(unique_graphs[graph_hash], 'template').values())
-        assert all(templated)
-
     # assert that all nodes have the template attribute
     for node in example_meta_molecule.nodes:
         assert example_meta_molecule.nodes[node].get('template', False)

From 29bfbc94b0c92bdc6730795dc0960474f285edce Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Mon, 22 Jul 2024 11:10:02 +0200
Subject: [PATCH 3/6] use graph hashes consistently

---
 polyply/src/build_file_parser.py          |  9 +++++++
 polyply/src/check_residue_equivalence.py  |  4 ----
 polyply/src/generate_templates.py         | 14 ++++++-----
 polyply/tests/test_build_file_parser.py   | 29 ++++++++++++++++-------
 polyply/tests/test_load_library.py        | 15 ++++++++++--
 polyply/tests/test_residue_equivalence.py |  6 ++---
 6 files changed, 54 insertions(+), 23 deletions(-)

diff --git a/polyply/src/build_file_parser.py b/polyply/src/build_file_parser.py
index ce39977c..3a2416fb 100644
--- a/polyply/src/build_file_parser.py
+++ b/polyply/src/build_file_parser.py
@@ -41,6 +41,7 @@ def __init__(self, molecules, topology):
         self.persistence_length = {}
         self.templates = {}
         self.current_template = None
+        self.resnames_to_hash = {}
 
     @SectionLineParser.section_parser('molecule')
     def _molecule(self, line, lineno=0):
@@ -211,6 +212,7 @@ def finalize_section(self, previous_section, ended_section):
             # center of geometry
             mapped_coords = map_from_CoG(coords)
             self.templates[graph_hash] = mapped_coords
+            self.resnames_to_hash[resname] = graph_hash
             self.current_template = None
 
     def finalize(self, lineno=0):
@@ -232,6 +234,13 @@ def finalize(self, lineno=0):
 
         super().finalize(lineno=lineno)
 
+        # if template graphs and volumes are provided
+        # make sure that volumes are indexed by the hash
+        for resname, graph_hash in self.resnames_to_hash.items():
+            if resname in self.topology.volumes:
+                self.topology.volumes[graph_hash] = self.topology.volumes[resname]
+                del self.topology.volumes[resname]
+
     @staticmethod
     def _tag_nodes(molecule, keyword, option, molname=""):
         resids = np.arange(option['start'], option['stop'], 1.)
diff --git a/polyply/src/check_residue_equivalence.py b/polyply/src/check_residue_equivalence.py
index 57d2c9e2..279eb203 100644
--- a/polyply/src/check_residue_equivalence.py
+++ b/polyply/src/check_residue_equivalence.py
@@ -63,10 +63,6 @@ def group_residues_by_hash(meta_molecule, template_graphs={}):
         keys are the hash of the graph
     """
     unique_graphs = template_graphs
-#    for graph in template_graphs.values():
-#        graph_hash = nx.algorithms.graph_hashing.weisfeiler_lehman_graph_hash(graph, node_attr='atomname')
-#        unique_graphs[graph_hash] = graph
-
     for node in meta_molecule.nodes:
         graph = meta_molecule.nodes[node]["graph"]
         graph_hash = nx.algorithms.graph_hashing.weisfeiler_lehman_graph_hash(graph, node_attr='atomname')
diff --git a/polyply/src/generate_templates.py b/polyply/src/generate_templates.py
index 0dd06fd2..646b41a4 100644
--- a/polyply/src/generate_templates.py
+++ b/polyply/src/generate_templates.py
@@ -337,8 +337,8 @@ class variable.
         self.templates
         self.volumes
         """
-        for resname, template_graph in tqdm(template_graphs.items()):
-            if resname not in self.templates:
+        for graph_hash, template_graph in tqdm(template_graphs.items()):
+            if graph_hash not in self.templates:
                 block = extract_block(meta_molecule.molecule,
                                       template_graph,
                                       self.topology.defines)
@@ -364,13 +364,15 @@ class variable.
                         break
                     else:
                         opt_counter += 1
-
-                if resname not in self.volumes:
-                    self.volumes[resname] = compute_volume(block,
+                resname = block.nodes[list(block.nodes)[0]]['resname']
+                if resname in self.volumes:
+                    self.volumes[graph_hash] = self.volumes[resname]
+                else:
+                    self.volumes[graph_hash] = compute_volume(block,
                                                            coords,
                                                            self.topology.nonbond_params)
                 coords = map_from_CoG(coords)
-                self.templates[resname] = coords
+                self.templates[graph_hash] = coords
 
     def run_molecule(self, meta_molecule):
         """
diff --git a/polyply/tests/test_build_file_parser.py b/polyply/tests/test_build_file_parser.py
index a56ecf53..c55a657d 100644
--- a/polyply/tests/test_build_file_parser.py
+++ b/polyply/tests/test_build_file_parser.py
@@ -403,12 +403,25 @@ def test_template_volume_parsing(test_system, line, names, edges, positions, out
     polyply.src.build_file_parser.read_build_file(lines,
                                                   test_system,
                                                   test_system.molecules)
-    for mol in test_system.molecules:
-        assert len(mol.templates) == len(names)
-        for idx, name in enumerate(names):
-            template = mol.templates[name]
-            for node_pos in positions[idx]:
-                node = node_pos[0]
-                assert np.all(np.array(node_pos[1:], dtype=float) == template[node])
+    # verify results parsing based on hashes
+    idx = 0
+    for name, edge_list in zip(names, edges):
+        template_graph = nx.Graph()
+        template_graph.add_edges_from(edge_list)
+        atomnames = {node: node for node in template_graph.nodes}
+        nx.set_node_attributes(template_graph, atomnames, 'atomname')
+        graph_hash = nx.algorithms.graph_hashing.weisfeiler_lehman_graph_hash(template_graph,
+                                                                              node_attr='atomname')
 
-    assert test_system.volumes == out_vol
+        mol = test_system.molecules[idx]
+        assert graph_hash in mol.templates
+
+        for node_pos in positions[idx]:
+            template = mol.templates[graph_hash]
+            node = node_pos[0]
+            assert np.all(np.array(node_pos[1:], dtype=float) == template[node])
+
+        assert graph_hash in test_system.volumes
+        assert test_system.volumes[graph_hash] == out_vol[name]
+
+        idx += 1
diff --git a/polyply/tests/test_load_library.py b/polyply/tests/test_load_library.py
index 86d2857a..d13ee7e7 100644
--- a/polyply/tests/test_load_library.py
+++ b/polyply/tests/test_load_library.py
@@ -20,6 +20,7 @@
 import pytest
 from pathlib import Path
 from contextlib import contextmanager
+import networkx as nx
 import vermouth
 from polyply import TEST_DATA
 from polyply.src.logging import LOGGER
@@ -72,6 +73,16 @@ def test_read_ff_from_files(caplog):
 
 def test_read_build_options_from_files():
 
+    # PMMA template edge_list
+    edges = [('C1', 'C2'), ('C2', 'C3'), ('C2', 'C4'),
+             ('C4', 'O1'), ('C4', 'O2'), ('O2', 'C5')]
+    g = nx.Graph()
+    g.add_edges_from(edges)
+
+    atomnames = {node: node for node in g.nodes}
+    nx.set_node_attributes(g, atomnames, 'atomname')
+    graph_hash = nx.algorithms.graph_hashing.weisfeiler_lehman_graph_hash(g, node_attr='atomname')
+
     topfile = Path('topology_test/system.top')
     bldfile = Path('topology_test/test.bld')
     lib_name = '2016H66'
@@ -83,6 +94,6 @@ def test_read_build_options_from_files():
     load_build_files(topology, lib_name, user_files)
 
     # check if build files are parsed
-    assert topology.volumes == {'PMMA': 1.0}
+    assert topology.volumes[graph_hash] == 1.0
     molecule = topology.molecules[0]
-    assert molecule.templates
+    assert graph_hash in molecule.templates
diff --git a/polyply/tests/test_residue_equivalence.py b/polyply/tests/test_residue_equivalence.py
index 4ae276df..7ff0340a 100644
--- a/polyply/tests/test_residue_equivalence.py
+++ b/polyply/tests/test_residue_equivalence.py
@@ -28,8 +28,9 @@ def test_group_by_hash(example_meta_molecule, resnames, gen_template_graphs):
     template_graphs = {}
     for node in gen_template_graphs:
         graph = example_meta_molecule.nodes[node]['graph']
+        graph_hash = nx.algorithms.graph_hashing.weisfeiler_lehman_graph_hash(graph, node_attr='atomname')
         nx.set_node_attributes(graph, True, 'template')
-        template_graphs[example_meta_molecule.nodes[node]['resname']] = graph
+        template_graphs[graph_hash] = graph
 
     # perfrom the grouping
     unique_graphs = group_residues_by_hash(example_meta_molecule, template_graphs)
@@ -37,8 +38,7 @@ def test_group_by_hash(example_meta_molecule, resnames, gen_template_graphs):
     # check the outcome
     assert len(unique_graphs) == 2
 
-    for graph in template_graphs.values():
-        graph_hash = nx.algorithms.graph_hashing.weisfeiler_lehman_graph_hash(graph, node_attr='atomname')
+    for graph_hash in template_graphs:
         templated = list(nx.get_node_attributes(unique_graphs[graph_hash], 'template').values())
         assert all(templated)
 

From 92def3ca5737431381f7a96d2b74cb5dc4342c10 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Mon, 22 Jul 2024 14:01:58 +0200
Subject: [PATCH 4/6] remove annoying tqdm

---
 polyply/src/generate_templates.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/polyply/src/generate_templates.py b/polyply/src/generate_templates.py
index 646b41a4..5bd1d69f 100644
--- a/polyply/src/generate_templates.py
+++ b/polyply/src/generate_templates.py
@@ -22,7 +22,6 @@
 from .topology import replace_defined_interaction
 from .linalg_functions import dih
 from .check_residue_equivalence import group_residues_by_hash
-from tqdm import tqdm
 """
 Processor generating coordinates for all residues of a meta_molecule
 matching those in the meta_molecule.molecule attribute.
@@ -337,7 +336,7 @@ class variable.
         self.templates
         self.volumes
         """
-        for graph_hash, template_graph in tqdm(template_graphs.items()):
+        for graph_hash, template_graph in template_graphs.items():
             if graph_hash not in self.templates:
                 block = extract_block(meta_molecule.molecule,
                                       template_graph,

From 8bb37f0eadaaa0dfa201e0cb2529c1318d516365 Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Fri, 26 Jul 2024 11:05:34 +0200
Subject: [PATCH 5/6] add additional test

---
 polyply/tests/test_generate_templates.py | 25 +++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/polyply/tests/test_generate_templates.py b/polyply/tests/test_generate_templates.py
index 1adf2c1e..daa6d4a2 100644
--- a/polyply/tests/test_generate_templates.py
+++ b/polyply/tests/test_generate_templates.py
@@ -309,17 +309,24 @@ def test_compute_volume(lines, coords, volume):
     assert np.isclose(new_vol, volume, atol=0.000001)
 
 
-@pytest.mark.parametrize('resnames, gen_template_graphs, skip_filter', (
+@pytest.mark.parametrize('resnames, gen_template_graphs, use_resname, skip_filter', (
                         # two different residues no template_graphs
-                        (['A', 'B', 'A'], [], False),
+                        (['A', 'B', 'A'], [], False, False),
                         # two different residues no template_graphs
-                        (['A', 'B', 'A'], [], True),
+                        (['A', 'B', 'A'], [], False, True),
                         # two different residues one template_graphs
-                        (['A', 'B', 'A'], [1], True),
+                        (['A', 'B', 'A'], [1], False, True),
                         # two different residues one template_graphs
-                        (['A', 'B', 'A'], [1], False),
+                        (['A', 'B', 'A'], [1], False, False),
+                        # here the template is indexed with the resname
+                        # instead of the hash which needs to be cleared
+                        (['A', 'B', 'A'], [1], True, True),
 ))
-def test_extract_template_graphs(example_meta_molecule, resnames, gen_template_graphs, skip_filter):
+def test_extract_template_graphs(example_meta_molecule,
+                                 resnames,
+                                 gen_template_graphs,
+                                 use_resname,
+                                 skip_filter):
     # set the residue names
     for resname, node in zip(resnames, example_meta_molecule.nodes):
         example_meta_molecule.nodes[node]['resname'] = resname
@@ -331,7 +338,11 @@ def test_extract_template_graphs(example_meta_molecule, resnames, gen_template_g
         graph = example_meta_molecule.nodes[node]['graph']
         nx.set_node_attributes(graph, True, 'template')
         graph_hash = nx.algorithms.graph_hashing.weisfeiler_lehman_graph_hash(graph, node_attr='atomname')
-        template_graphs[graph_hash] = None
+        if use_resname:
+            resname =  example_meta_molecule.nodes[node]['resname']
+            template_graphs[resname] = None
+        else:
+            template_graphs[graph_hash] = None
 
     # perfrom the grouping
     unique_graphs = _extract_template_graphs(example_meta_molecule, template_graphs, skip_filter)

From d8d3b668445dd16e15732e6e23c507e1db5128cb Mon Sep 17 00:00:00 2001
From: Fabian Gruenewald <f.grunewald@rug.nl>
Date: Fri, 26 Jul 2024 12:11:38 +0200
Subject: [PATCH 6/6] add test for when volume but no template is provided

---
 polyply/tests/test_generate_templates.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/polyply/tests/test_generate_templates.py b/polyply/tests/test_generate_templates.py
index daa6d4a2..8324490b 100644
--- a/polyply/tests/test_generate_templates.py
+++ b/polyply/tests/test_generate_templates.py
@@ -153,14 +153,22 @@ def test_extract_block():
              len(ff.blocks["GLY"].interactions[inter_type]) == len(new_block.interactions[inter_type])
 
       @staticmethod
-      def test_run_molecule():
+      @pytest.mark.parametrize('volumes', (
+                               None,
+                               {"PMMA": 0.55},
+      ))
+      def test_run_molecule(volumes):
           top = polyply.src.topology.Topology.from_gmx_topfile(TEST_DATA / "topology_test" / "system.top", "test")
           top.gen_pairs()
+          if volumes:
+            top.volumes = volumes
           top.convert_nonbond_to_sig_eps()
           GenerateTemplates(topology=top, skip_filter=False, max_opt=10).run_molecule(top.molecules[0])
           graph = top.molecules[0].nodes[0]['graph']
           graph_hash = nx.algorithms.graph_hashing.weisfeiler_lehman_graph_hash(graph, node_attr='atomname')
           assert graph_hash in top.volumes
+          if volumes:
+            assert top.volumes[graph_hash] == volumes['PMMA']
           assert graph_hash in top.molecules[0].templates
 
       @staticmethod