From abebd386395b3d61956da4ea5a4d546e7299fac6 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Tue, 16 Jul 2024 21:56:57 +0200 Subject: [PATCH 1/6] make sure graph hash is always annotated --- polyply/src/generate_templates.py | 1 + polyply/tests/test_generate_templates.py | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/polyply/src/generate_templates.py b/polyply/src/generate_templates.py index ca7df079..0dd06fd2 100644 --- a/polyply/src/generate_templates.py +++ b/polyply/src/generate_templates.py @@ -36,6 +36,7 @@ def _extract_template_graphs(meta_molecule, template_graphs={}, skip_filter=Fals resname = meta_molecule.nodes[node]["resname"] graph = meta_molecule.nodes[node]["graph"] graph_hash = nx.algorithms.graph_hashing.weisfeiler_lehman_graph_hash(graph, node_attr='atomname') + meta_molecule.nodes[node]["template"] = graph_hash if resname in template_graphs: template_graphs[graph_hash] = graph del template_graphs[resname] diff --git a/polyply/tests/test_generate_templates.py b/polyply/tests/test_generate_templates.py index 7e84af88..0e93ccee 100644 --- a/polyply/tests/test_generate_templates.py +++ b/polyply/tests/test_generate_templates.py @@ -342,3 +342,7 @@ def test_extract_template_graphs(example_meta_molecule, resnames, gen_template_g graph_hash = nx.algorithms.graph_hashing.weisfeiler_lehman_graph_hash(graph, node_attr='atomname') templated = list(nx.get_node_attributes(unique_graphs[graph_hash], 'template').values()) assert all(templated) + + # assert that all nodes have the template attribute + for node in example_meta_molecule.nodes: + assert example_meta_molecule.nodes[node].get('template', False) From 8f67cc0fb7149771ba95e4c68d12dadb24949152 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Wed, 17 Jul 2024 18:40:07 +0200 Subject: [PATCH 2/6] fix case where template coordinates are provided --- polyply/src/build_file_parser.py | 13 ++++++++----- polyply/src/check_residue_equivalence.py | 8 ++++---- polyply/tests/test_generate_templates.py | 8 ++------ 3 files changed, 14 insertions(+), 15 deletions(-) diff --git a/polyply/src/build_file_parser.py b/polyply/src/build_file_parser.py index 0895f64e..ce39977c 100644 --- a/polyply/src/build_file_parser.py +++ b/polyply/src/build_file_parser.py @@ -152,6 +152,7 @@ def _template_atoms(self, line, lineno=0): node_name, atype = tokens[0], tokens[1] position = np.array(tokens[2:], dtype=float) self.current_template.add_node(node_name, + atomname=node_name, atype=atype, position=position) @@ -200,14 +201,16 @@ def finalize_section(self, previous_section, ended_section): # if the volume is not defined yet compute the volume, this still # can be overwritten by an explicit volume directive later resname = self.current_template.name - if resname not in self.topology.volumes: - self.topology.volumes[resname] = compute_volume(self.current_template, - coords, - self.topology.nonbond_params,) + graph_hash = nx.algorithms.graph_hashing.weisfeiler_lehman_graph_hash(self.current_template, + node_attr='atomname') + if graph_hash not in self.topology.volumes: + self.topology.volumes[graph_hash] = compute_volume(self.current_template, + coords, + self.topology.nonbond_params,) # internally a template is defined as vectors from the # center of geometry mapped_coords = map_from_CoG(coords) - self.templates[resname] = mapped_coords + self.templates[graph_hash] = mapped_coords self.current_template = None def finalize(self, lineno=0): diff --git a/polyply/src/check_residue_equivalence.py b/polyply/src/check_residue_equivalence.py index 527f2045..57d2c9e2 100644 --- a/polyply/src/check_residue_equivalence.py +++ b/polyply/src/check_residue_equivalence.py @@ -62,10 +62,10 @@ def group_residues_by_hash(meta_molecule, template_graphs={}): dict[`:class:nx.Graph`] keys are the hash of the graph """ - unique_graphs = {} - for graph in template_graphs.values(): - graph_hash = nx.algorithms.graph_hashing.weisfeiler_lehman_graph_hash(graph, node_attr='atomname') - unique_graphs[graph_hash] = graph + unique_graphs = template_graphs +# for graph in template_graphs.values(): +# graph_hash = nx.algorithms.graph_hashing.weisfeiler_lehman_graph_hash(graph, node_attr='atomname') +# unique_graphs[graph_hash] = graph for node in meta_molecule.nodes: graph = meta_molecule.nodes[node]["graph"] diff --git a/polyply/tests/test_generate_templates.py b/polyply/tests/test_generate_templates.py index 0e93ccee..1adf2c1e 100644 --- a/polyply/tests/test_generate_templates.py +++ b/polyply/tests/test_generate_templates.py @@ -330,7 +330,8 @@ def test_extract_template_graphs(example_meta_molecule, resnames, gen_template_g for node in gen_template_graphs: graph = example_meta_molecule.nodes[node]['graph'] nx.set_node_attributes(graph, True, 'template') - template_graphs[example_meta_molecule.nodes[node]['resname']] = graph + graph_hash = nx.algorithms.graph_hashing.weisfeiler_lehman_graph_hash(graph, node_attr='atomname') + template_graphs[graph_hash] = None # perfrom the grouping unique_graphs = _extract_template_graphs(example_meta_molecule, template_graphs, skip_filter) @@ -338,11 +339,6 @@ def test_extract_template_graphs(example_meta_molecule, resnames, gen_template_g # check the outcome assert len(unique_graphs) == 2 - for graph in template_graphs.values(): - graph_hash = nx.algorithms.graph_hashing.weisfeiler_lehman_graph_hash(graph, node_attr='atomname') - templated = list(nx.get_node_attributes(unique_graphs[graph_hash], 'template').values()) - assert all(templated) - # assert that all nodes have the template attribute for node in example_meta_molecule.nodes: assert example_meta_molecule.nodes[node].get('template', False) From 29bfbc94b0c92bdc6730795dc0960474f285edce Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Mon, 22 Jul 2024 11:10:02 +0200 Subject: [PATCH 3/6] use graph hashes consistently --- polyply/src/build_file_parser.py | 9 +++++++ polyply/src/check_residue_equivalence.py | 4 ---- polyply/src/generate_templates.py | 14 ++++++----- polyply/tests/test_build_file_parser.py | 29 ++++++++++++++++------- polyply/tests/test_load_library.py | 15 ++++++++++-- polyply/tests/test_residue_equivalence.py | 6 ++--- 6 files changed, 54 insertions(+), 23 deletions(-) diff --git a/polyply/src/build_file_parser.py b/polyply/src/build_file_parser.py index ce39977c..3a2416fb 100644 --- a/polyply/src/build_file_parser.py +++ b/polyply/src/build_file_parser.py @@ -41,6 +41,7 @@ def __init__(self, molecules, topology): self.persistence_length = {} self.templates = {} self.current_template = None + self.resnames_to_hash = {} @SectionLineParser.section_parser('molecule') def _molecule(self, line, lineno=0): @@ -211,6 +212,7 @@ def finalize_section(self, previous_section, ended_section): # center of geometry mapped_coords = map_from_CoG(coords) self.templates[graph_hash] = mapped_coords + self.resnames_to_hash[resname] = graph_hash self.current_template = None def finalize(self, lineno=0): @@ -232,6 +234,13 @@ def finalize(self, lineno=0): super().finalize(lineno=lineno) + # if template graphs and volumes are provided + # make sure that volumes are indexed by the hash + for resname, graph_hash in self.resnames_to_hash.items(): + if resname in self.topology.volumes: + self.topology.volumes[graph_hash] = self.topology.volumes[resname] + del self.topology.volumes[resname] + @staticmethod def _tag_nodes(molecule, keyword, option, molname=""): resids = np.arange(option['start'], option['stop'], 1.) diff --git a/polyply/src/check_residue_equivalence.py b/polyply/src/check_residue_equivalence.py index 57d2c9e2..279eb203 100644 --- a/polyply/src/check_residue_equivalence.py +++ b/polyply/src/check_residue_equivalence.py @@ -63,10 +63,6 @@ def group_residues_by_hash(meta_molecule, template_graphs={}): keys are the hash of the graph """ unique_graphs = template_graphs -# for graph in template_graphs.values(): -# graph_hash = nx.algorithms.graph_hashing.weisfeiler_lehman_graph_hash(graph, node_attr='atomname') -# unique_graphs[graph_hash] = graph - for node in meta_molecule.nodes: graph = meta_molecule.nodes[node]["graph"] graph_hash = nx.algorithms.graph_hashing.weisfeiler_lehman_graph_hash(graph, node_attr='atomname') diff --git a/polyply/src/generate_templates.py b/polyply/src/generate_templates.py index 0dd06fd2..646b41a4 100644 --- a/polyply/src/generate_templates.py +++ b/polyply/src/generate_templates.py @@ -337,8 +337,8 @@ class variable. self.templates self.volumes """ - for resname, template_graph in tqdm(template_graphs.items()): - if resname not in self.templates: + for graph_hash, template_graph in tqdm(template_graphs.items()): + if graph_hash not in self.templates: block = extract_block(meta_molecule.molecule, template_graph, self.topology.defines) @@ -364,13 +364,15 @@ class variable. break else: opt_counter += 1 - - if resname not in self.volumes: - self.volumes[resname] = compute_volume(block, + resname = block.nodes[list(block.nodes)[0]]['resname'] + if resname in self.volumes: + self.volumes[graph_hash] = self.volumes[resname] + else: + self.volumes[graph_hash] = compute_volume(block, coords, self.topology.nonbond_params) coords = map_from_CoG(coords) - self.templates[resname] = coords + self.templates[graph_hash] = coords def run_molecule(self, meta_molecule): """ diff --git a/polyply/tests/test_build_file_parser.py b/polyply/tests/test_build_file_parser.py index a56ecf53..c55a657d 100644 --- a/polyply/tests/test_build_file_parser.py +++ b/polyply/tests/test_build_file_parser.py @@ -403,12 +403,25 @@ def test_template_volume_parsing(test_system, line, names, edges, positions, out polyply.src.build_file_parser.read_build_file(lines, test_system, test_system.molecules) - for mol in test_system.molecules: - assert len(mol.templates) == len(names) - for idx, name in enumerate(names): - template = mol.templates[name] - for node_pos in positions[idx]: - node = node_pos[0] - assert np.all(np.array(node_pos[1:], dtype=float) == template[node]) + # verify results parsing based on hashes + idx = 0 + for name, edge_list in zip(names, edges): + template_graph = nx.Graph() + template_graph.add_edges_from(edge_list) + atomnames = {node: node for node in template_graph.nodes} + nx.set_node_attributes(template_graph, atomnames, 'atomname') + graph_hash = nx.algorithms.graph_hashing.weisfeiler_lehman_graph_hash(template_graph, + node_attr='atomname') - assert test_system.volumes == out_vol + mol = test_system.molecules[idx] + assert graph_hash in mol.templates + + for node_pos in positions[idx]: + template = mol.templates[graph_hash] + node = node_pos[0] + assert np.all(np.array(node_pos[1:], dtype=float) == template[node]) + + assert graph_hash in test_system.volumes + assert test_system.volumes[graph_hash] == out_vol[name] + + idx += 1 diff --git a/polyply/tests/test_load_library.py b/polyply/tests/test_load_library.py index 86d2857a..d13ee7e7 100644 --- a/polyply/tests/test_load_library.py +++ b/polyply/tests/test_load_library.py @@ -20,6 +20,7 @@ import pytest from pathlib import Path from contextlib import contextmanager +import networkx as nx import vermouth from polyply import TEST_DATA from polyply.src.logging import LOGGER @@ -72,6 +73,16 @@ def test_read_ff_from_files(caplog): def test_read_build_options_from_files(): + # PMMA template edge_list + edges = [('C1', 'C2'), ('C2', 'C3'), ('C2', 'C4'), + ('C4', 'O1'), ('C4', 'O2'), ('O2', 'C5')] + g = nx.Graph() + g.add_edges_from(edges) + + atomnames = {node: node for node in g.nodes} + nx.set_node_attributes(g, atomnames, 'atomname') + graph_hash = nx.algorithms.graph_hashing.weisfeiler_lehman_graph_hash(g, node_attr='atomname') + topfile = Path('topology_test/system.top') bldfile = Path('topology_test/test.bld') lib_name = '2016H66' @@ -83,6 +94,6 @@ def test_read_build_options_from_files(): load_build_files(topology, lib_name, user_files) # check if build files are parsed - assert topology.volumes == {'PMMA': 1.0} + assert topology.volumes[graph_hash] == 1.0 molecule = topology.molecules[0] - assert molecule.templates + assert graph_hash in molecule.templates diff --git a/polyply/tests/test_residue_equivalence.py b/polyply/tests/test_residue_equivalence.py index 4ae276df..7ff0340a 100644 --- a/polyply/tests/test_residue_equivalence.py +++ b/polyply/tests/test_residue_equivalence.py @@ -28,8 +28,9 @@ def test_group_by_hash(example_meta_molecule, resnames, gen_template_graphs): template_graphs = {} for node in gen_template_graphs: graph = example_meta_molecule.nodes[node]['graph'] + graph_hash = nx.algorithms.graph_hashing.weisfeiler_lehman_graph_hash(graph, node_attr='atomname') nx.set_node_attributes(graph, True, 'template') - template_graphs[example_meta_molecule.nodes[node]['resname']] = graph + template_graphs[graph_hash] = graph # perfrom the grouping unique_graphs = group_residues_by_hash(example_meta_molecule, template_graphs) @@ -37,8 +38,7 @@ def test_group_by_hash(example_meta_molecule, resnames, gen_template_graphs): # check the outcome assert len(unique_graphs) == 2 - for graph in template_graphs.values(): - graph_hash = nx.algorithms.graph_hashing.weisfeiler_lehman_graph_hash(graph, node_attr='atomname') + for graph_hash in template_graphs: templated = list(nx.get_node_attributes(unique_graphs[graph_hash], 'template').values()) assert all(templated) From 92def3ca5737431381f7a96d2b74cb5dc4342c10 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Mon, 22 Jul 2024 14:01:58 +0200 Subject: [PATCH 4/6] remove annoying tqdm --- polyply/src/generate_templates.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/polyply/src/generate_templates.py b/polyply/src/generate_templates.py index 646b41a4..5bd1d69f 100644 --- a/polyply/src/generate_templates.py +++ b/polyply/src/generate_templates.py @@ -22,7 +22,6 @@ from .topology import replace_defined_interaction from .linalg_functions import dih from .check_residue_equivalence import group_residues_by_hash -from tqdm import tqdm """ Processor generating coordinates for all residues of a meta_molecule matching those in the meta_molecule.molecule attribute. @@ -337,7 +336,7 @@ class variable. self.templates self.volumes """ - for graph_hash, template_graph in tqdm(template_graphs.items()): + for graph_hash, template_graph in template_graphs.items(): if graph_hash not in self.templates: block = extract_block(meta_molecule.molecule, template_graph, From 8bb37f0eadaaa0dfa201e0cb2529c1318d516365 Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Fri, 26 Jul 2024 11:05:34 +0200 Subject: [PATCH 5/6] add additional test --- polyply/tests/test_generate_templates.py | 25 +++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/polyply/tests/test_generate_templates.py b/polyply/tests/test_generate_templates.py index 1adf2c1e..daa6d4a2 100644 --- a/polyply/tests/test_generate_templates.py +++ b/polyply/tests/test_generate_templates.py @@ -309,17 +309,24 @@ def test_compute_volume(lines, coords, volume): assert np.isclose(new_vol, volume, atol=0.000001) -@pytest.mark.parametrize('resnames, gen_template_graphs, skip_filter', ( +@pytest.mark.parametrize('resnames, gen_template_graphs, use_resname, skip_filter', ( # two different residues no template_graphs - (['A', 'B', 'A'], [], False), + (['A', 'B', 'A'], [], False, False), # two different residues no template_graphs - (['A', 'B', 'A'], [], True), + (['A', 'B', 'A'], [], False, True), # two different residues one template_graphs - (['A', 'B', 'A'], [1], True), + (['A', 'B', 'A'], [1], False, True), # two different residues one template_graphs - (['A', 'B', 'A'], [1], False), + (['A', 'B', 'A'], [1], False, False), + # here the template is indexed with the resname + # instead of the hash which needs to be cleared + (['A', 'B', 'A'], [1], True, True), )) -def test_extract_template_graphs(example_meta_molecule, resnames, gen_template_graphs, skip_filter): +def test_extract_template_graphs(example_meta_molecule, + resnames, + gen_template_graphs, + use_resname, + skip_filter): # set the residue names for resname, node in zip(resnames, example_meta_molecule.nodes): example_meta_molecule.nodes[node]['resname'] = resname @@ -331,7 +338,11 @@ def test_extract_template_graphs(example_meta_molecule, resnames, gen_template_g graph = example_meta_molecule.nodes[node]['graph'] nx.set_node_attributes(graph, True, 'template') graph_hash = nx.algorithms.graph_hashing.weisfeiler_lehman_graph_hash(graph, node_attr='atomname') - template_graphs[graph_hash] = None + if use_resname: + resname = example_meta_molecule.nodes[node]['resname'] + template_graphs[resname] = None + else: + template_graphs[graph_hash] = None # perfrom the grouping unique_graphs = _extract_template_graphs(example_meta_molecule, template_graphs, skip_filter) From d8d3b668445dd16e15732e6e23c507e1db5128cb Mon Sep 17 00:00:00 2001 From: Fabian Gruenewald Date: Fri, 26 Jul 2024 12:11:38 +0200 Subject: [PATCH 6/6] add test for when volume but no template is provided --- polyply/tests/test_generate_templates.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/polyply/tests/test_generate_templates.py b/polyply/tests/test_generate_templates.py index daa6d4a2..8324490b 100644 --- a/polyply/tests/test_generate_templates.py +++ b/polyply/tests/test_generate_templates.py @@ -153,14 +153,22 @@ def test_extract_block(): len(ff.blocks["GLY"].interactions[inter_type]) == len(new_block.interactions[inter_type]) @staticmethod - def test_run_molecule(): + @pytest.mark.parametrize('volumes', ( + None, + {"PMMA": 0.55}, + )) + def test_run_molecule(volumes): top = polyply.src.topology.Topology.from_gmx_topfile(TEST_DATA / "topology_test" / "system.top", "test") top.gen_pairs() + if volumes: + top.volumes = volumes top.convert_nonbond_to_sig_eps() GenerateTemplates(topology=top, skip_filter=False, max_opt=10).run_molecule(top.molecules[0]) graph = top.molecules[0].nodes[0]['graph'] graph_hash = nx.algorithms.graph_hashing.weisfeiler_lehman_graph_hash(graph, node_attr='atomname') assert graph_hash in top.volumes + if volumes: + assert top.volumes[graph_hash] == volumes['PMMA'] assert graph_hash in top.molecules[0].templates @staticmethod