diff --git a/README.md b/README.md index 015a6baef..5795255ba 100644 --- a/README.md +++ b/README.md @@ -43,6 +43,9 @@ the following command: Note that vermouth and Martinize2, in particular development versions, may contain bugs that cause it to produce incorrect topologies. Check the produced output carefully! +Martinize2 and vermouth have [mdtraj][mdtraj] as optional dependency as an +alternative to dssp. + The behavior of the `pip` command can vary depending of the specificity of your python installation. See the [documentation on installing a python package][pipdoc] to learn more. @@ -108,6 +111,7 @@ academic paper is left to our sole discretion. [Martini tutorials]: http://cgmartini.nl/index.php/tutorials-general-introduction-gmx5 [Gromacs]: http://www.gromacs.org [pypi_vermouth]: https://pypi.org/project/vermouth/ +[mdtraj]: https://www.mdtraj.org/ [pipdoc]: https://packaging.python.org/tutorials/installing-packages/#installing-packages [license]: https://github.com/marrink-lab/vermouth-martinize/blob/master/LICENSE [github]: https://github.com/marrink-lab/vermouth-martinize diff --git a/bin/martinize2 b/bin/martinize2 index 2dde3aa9a..346ffb6bc 100755 --- a/bin/martinize2 +++ b/bin/martinize2 @@ -226,7 +226,7 @@ def write_gmx_topology(system, top_path, defines=(), header=()): with deferred_open("{}.itp".format(moltype), "w") as outfile: # here we format and merge all citations header[-1] = header[-1] + "\n" - header.append("Pleas cite the following papers:") + header.append("Please cite the following papers:") for citation in molecule.citations: cite_string = citation_formatter( molecule.force_field.citations[citation] @@ -520,8 +520,10 @@ def entry(): secstruct_exclusion.add_argument( "-dssp", nargs="?", - const="dssp", - help="DSSP executable for determining structure", + const=True, + help="DSSP executable for determining structure. If this flag is given" + "but no executable is specified, the mdtraj library will be used" + "to compute the secondary structure, if it can be imported.", ) secstruct_exclusion.add_argument( "-ss", @@ -890,8 +892,10 @@ def entry(): LOGGER.debug("Read molecule {}.", molecule, type="step") target_ff = known_force_fields[args.to_ff] - if args.dssp is not None: - AnnotateDSSP(executable=args.dssp, savedir=".").run_system(system) + if args.dssp: + if not isinstance(args.dssp, str): + args.dssp = None + AnnotateDSSP(executable=args.dssp, savedir='.').run_system(system) AnnotateMartiniSecondaryStructures().run_system(system) elif args.ss is not None: AnnotateResidues( diff --git a/requirements-tests.txt b/requirements-tests.txt index 074a03281..84b9c3f7d 100644 --- a/requirements-tests.txt +++ b/requirements-tests.txt @@ -7,3 +7,4 @@ pylint hypothesis hypothesis-networkx codecov +mdtraj diff --git a/setup.cfg b/setup.cfg index 89812702e..fb0f3fc2b 100644 --- a/setup.cfg +++ b/setup.cfg @@ -37,6 +37,9 @@ install-requires = # ?? requires-dist? scipy zip-safe = False +[options.extras_require] +mdtraj = mdtraj + [build_sphinx] source-dir = doc/source build-dir = doc/build diff --git a/vermouth/citation_parser.py b/vermouth/citation_parser.py index d44231f9d..e32448053 100644 --- a/vermouth/citation_parser.py +++ b/vermouth/citation_parser.py @@ -182,7 +182,7 @@ def citation_formatter(citation, title=False): [journal] ; [doi] - Note that the formatter cannot fromat latex + Note that the formatter cannot format latex like syntax (e.g. a{\"} for ae) """ # first we split the author-list diff --git a/vermouth/data/force_fields/elnedyn21/citations.bib b/vermouth/data/force_fields/elnedyn21/citations.bib index 6d4c40f46..d8ea38234 100644 --- a/vermouth/data/force_fields/elnedyn21/citations.bib +++ b/vermouth/data/force_fields/elnedyn21/citations.bib @@ -52,3 +52,13 @@ @article{M2polarizable year={2010}, publisher={Public Library of Science} } +@article{MDTraj, + title={MDTraj: A Modern Open Library for the Analysis of Molecular Dynamics Trajectories}, + author={McGibbon, Robert T. and Beauchamp, Kyle A. and Harrigan, Matthew P. and Klein, Christoph and Swails, Jason M. and Hernández, Carlos X. and Schwantes, Christian R. and Wang, Lee-Ping and Lane, Thomas J. and Pande, Vijay S.}, + journal={Biophysical Journal}, + volume={109}, + number={8}, + pages={1528 -- 1532}, + year={2015}, + doi={10.1016/j.bpj.2015.08.015} +} \ No newline at end of file diff --git a/vermouth/data/force_fields/elnedyn22/citations.bib b/vermouth/data/force_fields/elnedyn22/citations.bib index 6d4c40f46..d8ea38234 100644 --- a/vermouth/data/force_fields/elnedyn22/citations.bib +++ b/vermouth/data/force_fields/elnedyn22/citations.bib @@ -52,3 +52,13 @@ @article{M2polarizable year={2010}, publisher={Public Library of Science} } +@article{MDTraj, + title={MDTraj: A Modern Open Library for the Analysis of Molecular Dynamics Trajectories}, + author={McGibbon, Robert T. and Beauchamp, Kyle A. and Harrigan, Matthew P. and Klein, Christoph and Swails, Jason M. and Hernández, Carlos X. and Schwantes, Christian R. and Wang, Lee-Ping and Lane, Thomas J. and Pande, Vijay S.}, + journal={Biophysical Journal}, + volume={109}, + number={8}, + pages={1528 -- 1532}, + year={2015}, + doi={10.1016/j.bpj.2015.08.015} +} \ No newline at end of file diff --git a/vermouth/data/force_fields/elnedyn22p/citations.bib b/vermouth/data/force_fields/elnedyn22p/citations.bib index 6d4c40f46..d8ea38234 100644 --- a/vermouth/data/force_fields/elnedyn22p/citations.bib +++ b/vermouth/data/force_fields/elnedyn22p/citations.bib @@ -52,3 +52,13 @@ @article{M2polarizable year={2010}, publisher={Public Library of Science} } +@article{MDTraj, + title={MDTraj: A Modern Open Library for the Analysis of Molecular Dynamics Trajectories}, + author={McGibbon, Robert T. and Beauchamp, Kyle A. and Harrigan, Matthew P. and Klein, Christoph and Swails, Jason M. and Hernández, Carlos X. and Schwantes, Christian R. and Wang, Lee-Ping and Lane, Thomas J. and Pande, Vijay S.}, + journal={Biophysical Journal}, + volume={109}, + number={8}, + pages={1528 -- 1532}, + year={2015}, + doi={10.1016/j.bpj.2015.08.015} +} \ No newline at end of file diff --git a/vermouth/data/force_fields/martini22/citations.bib b/vermouth/data/force_fields/martini22/citations.bib index 6d4c40f46..d8ea38234 100644 --- a/vermouth/data/force_fields/martini22/citations.bib +++ b/vermouth/data/force_fields/martini22/citations.bib @@ -52,3 +52,13 @@ @article{M2polarizable year={2010}, publisher={Public Library of Science} } +@article{MDTraj, + title={MDTraj: A Modern Open Library for the Analysis of Molecular Dynamics Trajectories}, + author={McGibbon, Robert T. and Beauchamp, Kyle A. and Harrigan, Matthew P. and Klein, Christoph and Swails, Jason M. and Hernández, Carlos X. and Schwantes, Christian R. and Wang, Lee-Ping and Lane, Thomas J. and Pande, Vijay S.}, + journal={Biophysical Journal}, + volume={109}, + number={8}, + pages={1528 -- 1532}, + year={2015}, + doi={10.1016/j.bpj.2015.08.015} +} \ No newline at end of file diff --git a/vermouth/data/force_fields/martini22p/citations.bib b/vermouth/data/force_fields/martini22p/citations.bib index 6d4c40f46..d8ea38234 100644 --- a/vermouth/data/force_fields/martini22p/citations.bib +++ b/vermouth/data/force_fields/martini22p/citations.bib @@ -52,3 +52,13 @@ @article{M2polarizable year={2010}, publisher={Public Library of Science} } +@article{MDTraj, + title={MDTraj: A Modern Open Library for the Analysis of Molecular Dynamics Trajectories}, + author={McGibbon, Robert T. and Beauchamp, Kyle A. and Harrigan, Matthew P. and Klein, Christoph and Swails, Jason M. and Hernández, Carlos X. and Schwantes, Christian R. and Wang, Lee-Ping and Lane, Thomas J. and Pande, Vijay S.}, + journal={Biophysical Journal}, + volume={109}, + number={8}, + pages={1528 -- 1532}, + year={2015}, + doi={10.1016/j.bpj.2015.08.015} +} \ No newline at end of file diff --git a/vermouth/data/force_fields/martini3001/citations.bib b/vermouth/data/force_fields/martini3001/citations.bib index 3799052d1..3d4f843c2 100644 --- a/vermouth/data/force_fields/martini3001/citations.bib +++ b/vermouth/data/force_fields/martini3001/citations.bib @@ -18,3 +18,13 @@ @article{Martini3smallmolecules year={2022}, publisher={Wiley Online Library} } +@article{MDTraj, + title={MDTraj: A Modern Open Library for the Analysis of Molecular Dynamics Trajectories}, + author={McGibbon, Robert T. and Beauchamp, Kyle A. and Harrigan, Matthew P. and Klein, Christoph and Swails, Jason M. and Hernández, Carlos X. and Schwantes, Christian R. and Wang, Lee-Ping and Lane, Thomas J. and Pande, Vijay S.}, + journal={Biophysical Journal}, + volume={109}, + number={8}, + pages={1528 -- 1532}, + year={2015}, + doi={10.1016/j.bpj.2015.08.015} +} \ No newline at end of file diff --git a/vermouth/data/force_fields/martini30b32/citations.bib b/vermouth/data/force_fields/martini30b32/citations.bib index efd3f42d8..41dd25b7f 100644 --- a/vermouth/data/force_fields/martini30b32/citations.bib +++ b/vermouth/data/force_fields/martini30b32/citations.bib @@ -3,3 +3,13 @@ @misc{Martini3Beta author={Souza, Paulo C T and Marrink, Siewert Jan}, year={2020} } +@article{MDTraj, + title={MDTraj: A Modern Open Library for the Analysis of Molecular Dynamics Trajectories}, + author={McGibbon, Robert T. and Beauchamp, Kyle A. and Harrigan, Matthew P. and Klein, Christoph and Swails, Jason M. and Hernández, Carlos X. and Schwantes, Christian R. and Wang, Lee-Ping and Lane, Thomas J. and Pande, Vijay S.}, + journal={Biophysical Journal}, + volume={109}, + number={8}, + pages={1528 -- 1532}, + year={2015}, + doi={10.1016/j.bpj.2015.08.015} +} \ No newline at end of file diff --git a/vermouth/data/force_fields/martini30dev/citations.bib b/vermouth/data/force_fields/martini30dev/citations.bib index efd3f42d8..41dd25b7f 100644 --- a/vermouth/data/force_fields/martini30dev/citations.bib +++ b/vermouth/data/force_fields/martini30dev/citations.bib @@ -3,3 +3,13 @@ @misc{Martini3Beta author={Souza, Paulo C T and Marrink, Siewert Jan}, year={2020} } +@article{MDTraj, + title={MDTraj: A Modern Open Library for the Analysis of Molecular Dynamics Trajectories}, + author={McGibbon, Robert T. and Beauchamp, Kyle A. and Harrigan, Matthew P. and Klein, Christoph and Swails, Jason M. and Hernández, Carlos X. and Schwantes, Christian R. and Wang, Lee-Ping and Lane, Thomas J. and Pande, Vijay S.}, + journal={Biophysical Journal}, + volume={109}, + number={8}, + pages={1528 -- 1532}, + year={2015}, + doi={10.1016/j.bpj.2015.08.015} +} \ No newline at end of file diff --git a/vermouth/dssp/dssp.py b/vermouth/dssp/dssp.py index fe2207599..79f89edc9 100644 --- a/vermouth/dssp/dssp.py +++ b/vermouth/dssp/dssp.py @@ -17,6 +17,7 @@ """ import collections +from functools import partial import logging import os import subprocess @@ -31,6 +32,13 @@ from .. import utils from ..log_helpers import StyleAdapter, get_logger +try: + import mdtraj +except ImportError: + HAVE_MDTRAJ = False +else: + HAVE_MDTRAJ = True + LOGGER = StyleAdapter(get_logger(__name__)) SUPPORTED_DSSP_VERSIONS = ("2.2.1", "3.0.0") @@ -145,7 +153,67 @@ def read_dssp2(lines): return secstructs -def run_dssp(system, executable='dssp', savefile=None, defer_writing=True): +def run_mdtraj(system): + """ + Compute DSSP secondary structure assignments for the system by using + ``mdtraj.compute_dssp``. + + During processing, a PDB file is produced. Therefore, all the molecules + in the system must contain the required attributes for such a file to be + generated. Also, the atom names are assumed to be compatible with the + 'charmm' force field for MDTraj to recognize them. + However, the molecules do not require the edges to be defined. + + Parameters + ---------- + system: System + The system to process + + Returns + ------- + list[str] + The assigned secondary structures as a list of one-letter codes. + The secondary structure sequences of all the molecules are combined + in a single list without delimitation. + """ + tmpfile_handle, tmpfile_name = tempfile.mkstemp(suffix='.pdb', text=True, + dir='.', prefix='dssp_in_') + tmpfile_handle = os.fdopen(tmpfile_handle, mode='w') + tmpfile_handle.write(pdb.write_pdb_string(system, conect=False)) + tmpfile_handle.close() + + try: + struct = mdtraj.load_pdb(tmpfile_name) + dssp = mdtraj.compute_dssp(struct, simplified=False) + except Exception as error: + # Don't delete the temporary file + message = "MDTraj encountered an error. The message was {err}. "\ + "The input file provided to MDTraj can be found at {file}." + raise DSSPError(message.format(err=str(error), file=tmpfile_name)) from error + else: + dssp = ['C' if ss == ' ' else ss for mol in dssp for ss in mol] + if LOGGER.getEffectiveLevel() > logging.DEBUG: + os.remove(tmpfile_name) + return dssp + + +def _savefile_path(system, savedir=None): + savefile = None + if savedir is not None: + chains = set() + for molecule in system.molecules: + first_atom = list(molecule.nodes.keys())[0] + chain = molecule.nodes[first_atom].get('chain') + if chain is not None: + chains.add(chain) + if not chains: + msg = 'The "savedir" argument can only be used if chains are set.' + raise ValueError(msg) + savefile = os.path.join(savedir, 'chain_{}.ssd'.format(','.join(sorted(chains)))) + return savefile + + +def run_dssp(system, executable='dssp', savedir=None, defer_writing=True): """ Run DSSP on a system and return the assigned secondary structures. @@ -156,26 +224,24 @@ def run_dssp(system, executable='dssp', savefile=None, defer_writing=True): In order to call DSSP, a PDB file is produced. Therefore, all the molecules in the system must contain the required attributes for such a file to be generated. Also, the atom names are assumed to be compatible with the - 'universal' force field for DSSP to recognize them. + 'charmm' force field for DSSP to recognize them. However, the molecules do not require the edges to be defined. DSSP is assumed to be in version 2 or 3. The secondary structure codes are described in :func:`read_dssp2`. - If "savefile" is set to a path, then the output of DSSP is written in - that file. - Parameters ---------- system: System executable: str Where to find the DSSP executable. savefile: None or str or pathlib.Path - If set to a path, the output of DSSP is written in that file. + If set to a path, the output of DSSP is written in this directory. defer_writing: bool Whether to use :meth:`~vermouth.file_writer.DeferredFileWriter.write` for writing data Returns + ------- list[str] The assigned secondary structures as a list of one-letter codes. The secondary structure sequences of all the molecules are combined @@ -193,6 +259,13 @@ def run_dssp(system, executable='dssp', savefile=None, defer_writing=True): read_dssp2 Parse a DSSP output. """ + if savedir: + # I don't love this. A system could contain multiple molecules, a mol + # could contain multiple chains. Adapt _savefile_path to iterate over + # all atoms to collect all chains sounds expensive though. + savefile = _savefile_path(system, savedir) + else: + savefile = None # check version process = subprocess.run([executable, "--version"], stdout=subprocess.PIPE, stderr=subprocess.PIPE) match = re.search('\d+\.\d+\.\d+', process.stdout.decode('UTF8')) @@ -239,22 +312,11 @@ def run_dssp(system, executable='dssp', savefile=None, defer_writing=True): open = deferred_open with open(str(savefile), 'w') as outfile: outfile.write(process.stdout) - return read_dssp2(process.stdout.split('\n')) - -def _savefile_path(molecule, savedir=None): - savefile = None - if savedir is not None: - first_atom = list(molecule.nodes.keys())[0] - chain = molecule.nodes[first_atom].get('chain') - if chain is None: - msg = 'The "savedir" argument can only be used if chains are set.' - raise ValueError(msg) - savefile = os.path.join(savedir, 'chain_{}.ssd'.format(chain)) - return savefile + return read_dssp2(process.stdout.split('\n')) -def annotate_dssp(molecule, executable='dssp', savedir=None, attribute='secstruct'): +def annotate_dssp(molecule, callable=None, attribute='secstruct'): """ Adds the DSSP assignation to the atoms of a molecule. @@ -279,18 +341,15 @@ def annotate_dssp(molecule, executable='dssp', savedir=None, attribute='secstruc The molecule to annotate. Its atoms must have the attributes required to write a PDB file; other atom attributes, edges, or molecule attributes are not used. - executable: str - The path or name in the research PATH of the DSSP executable. - savedir: None or str - If set to a path, the DSSP output will be written in this **directory**. - The option is only available if chains are defined with the 'chain' - atom attribute. + callable: Callable + The function to call to generate DSSP secondary structure assignments. + See also: :func:`run_dssp`, :func:`run_mdtraj` attribute: str The name of the atom attribute in which to store the annotation. See Also -------- - run_dssp, read_dssp2 + run_mdtraj, run_dssp, read_dssp2 """ if not is_protein(molecule): return @@ -303,11 +362,9 @@ def annotate_dssp(molecule, executable='dssp', savedir=None, attribute='secstruc if not clean_pos: return - savefile = _savefile_path(molecule, savedir) - system = System() system.add_molecule(clean_pos) - secstructs = run_dssp(system, executable, savefile) + secstructs = callable(system) annotate_residues_from_sequence(molecule, attribute, secstructs) @@ -470,13 +527,21 @@ def convert_dssp_annotation_to_martini( class AnnotateDSSP(Processor): name = 'AnnotateDSSP' - def __init__(self, executable='dssp', savedir=None): + def __init__(self, executable=None, savedir=None): super().__init__() - self.executable = executable - self.savedir = savedir + if executable is None: + if HAVE_MDTRAJ: + self.dssp = run_mdtraj + else: + self.dssp = partial(run_dssp, executable='dssp', savedir=savedir) + elif isinstance(executable, str): + self.dssp = partial(run_dssp, executable=executable, savedir=savedir) + else: + self.dssp = executable def run_molecule(self, molecule): - annotate_dssp(molecule, self.executable, self.savedir) + annotate_dssp(molecule, self.dssp) + molecule.citations.add('MDTraj') return molecule diff --git a/vermouth/processors/do_mapping.py b/vermouth/processors/do_mapping.py index 072e439d3..38b25fd18 100644 --- a/vermouth/processors/do_mapping.py +++ b/vermouth/processors/do_mapping.py @@ -557,6 +557,7 @@ def do_mapping(molecule, mappings, to_ff, attribute_keep=(), attribute_must=(), # Transferring the meta maybe should be a copy, or a deep copy... # If it breaks we look at this line. graph_out = Molecule(force_field=to_ff, meta=molecule.meta) + graph_out.citations.update(molecule.citations) mappings = build_graph_mapping_collection(molecule.force_field, to_ff, mappings) block_matches = [] for mapping in mappings: diff --git a/vermouth/tests/data/integration_tests/tier-0/mini-protein3_trp-cage/martinize2/command b/vermouth/tests/data/integration_tests/tier-0/mini-protein3_trp-cage/martinize2/command index 9369ebebf..b2182ed04 100644 --- a/vermouth/tests/data/integration_tests/tier-0/mini-protein3_trp-cage/martinize2/command +++ b/vermouth/tests/data/integration_tests/tier-0/mini-protein3_trp-cage/martinize2/command @@ -3,6 +3,6 @@ martinize2 -x cg.pdb -o topol.top -ff elnedyn22 --dssp dssp +-dssp -eu 0.7 -ef 800.0 diff --git a/vermouth/tests/data/integration_tests/tier-1/3i40/martinize2/command b/vermouth/tests/data/integration_tests/tier-1/3i40/martinize2/command index 355107db7..3a3f3e25d 100644 --- a/vermouth/tests/data/integration_tests/tier-1/3i40/martinize2/command +++ b/vermouth/tests/data/integration_tests/tier-1/3i40/martinize2/command @@ -1 +1 @@ -martinize2 -f ../3i40.pdb -o cg.top -x cg.pdb -dssp dssp -p backbone -ff martini3001 -scfix -cys auto -ignore HOH +martinize2 -f ../3i40.pdb -o cg.top -x cg.pdb -dssp -p backbone -ff martini3001 -scfix -cys auto -ignore HOH diff --git a/vermouth/tests/data/integration_tests/tier-1/6LFO_gap/martinize2/command b/vermouth/tests/data/integration_tests/tier-1/6LFO_gap/martinize2/command index f9ed622c7..a4b4c5c7c 100644 --- a/vermouth/tests/data/integration_tests/tier-1/6LFO_gap/martinize2/command +++ b/vermouth/tests/data/integration_tests/tier-1/6LFO_gap/martinize2/command @@ -1 +1 @@ -martinize2 -f ../6LFO_gap.pdb -o cg.top -x cg.pdb -dssp dssp -p backbone -ff martini3001 -scfix -cys auto -ignore HOH -elastic -eunit all -resid input +martinize2 -f ../6LFO_gap.pdb -o cg.top -x cg.pdb -dssp -p backbone -ff martini3001 -scfix -cys auto -ignore HOH -elastic -eunit all -resid input diff --git a/vermouth/tests/data/integration_tests/tier-1/EN_chain/martinize2/command b/vermouth/tests/data/integration_tests/tier-1/EN_chain/martinize2/command index a0cada1df..58b93113a 100644 --- a/vermouth/tests/data/integration_tests/tier-1/EN_chain/martinize2/command +++ b/vermouth/tests/data/integration_tests/tier-1/EN_chain/martinize2/command @@ -1 +1 @@ -martinize2 -f ../aa.pdb -o cg.top -x cg.pdb -dssp dssp -p backbone -ff martini3001 -scfix -cys auto -ignore HOH -elastic -eunit chain +martinize2 -f ../aa.pdb -o cg.top -x cg.pdb -dssp -p backbone -ff martini3001 -scfix -cys auto -ignore HOH -elastic -eunit chain diff --git a/vermouth/tests/data/integration_tests/tier-1/EN_region/martinize2/command b/vermouth/tests/data/integration_tests/tier-1/EN_region/martinize2/command index 0d039f134..437d789de 100644 --- a/vermouth/tests/data/integration_tests/tier-1/EN_region/martinize2/command +++ b/vermouth/tests/data/integration_tests/tier-1/EN_region/martinize2/command @@ -1 +1 @@ -martinize2 -f ../aa.pdb -o cg.top -x cg.pdb -dssp dssp -p backbone -ff martini3001 -scfix -cys auto -ignore HOH -elastic -eunit 8:18 +martinize2 -f ../aa.pdb -o cg.top -x cg.pdb -dssp -p backbone -ff martini3001 -scfix -cys auto -ignore HOH -elastic -eunit 8:18 diff --git a/vermouth/tests/gmx/test_gro.py b/vermouth/tests/gmx/test_gro.py index edb620eec..17c584ab8 100644 --- a/vermouth/tests/gmx/test_gro.py +++ b/vermouth/tests/gmx/test_gro.py @@ -161,25 +161,25 @@ @pytest.fixture(params=[True, False]) -def gro_reference(request, tmpdir_factory): +def gro_reference(request, tmp_path_factory): """ Generate a GRO file and the corresponding molecule. """ - filename = tmpdir_factory.mktemp("data").join("tmp.gro") - with open(str(filename), 'w') as outfile: + filename = tmp_path_factory.mktemp("data") / "tmp.gro" + with open(filename, 'w') as outfile: write_ref_gro(outfile, velocities=request.param, box='10.0 11.1 12.2') molecule = build_ref_molecule(velocities=request.param) return filename, molecule @pytest.fixture(params=[43, 45]) -def gro_wrong_length(request, gro_reference, tmpdir_factory): # pylint: disable=redefined-outer-name +def gro_wrong_length(request, gro_reference, tmp_path_factory): # pylint: disable=redefined-outer-name """ Generate a GRO file with a wrong number of atoms on line 2. """ path_in, _ = gro_reference - path_out = tmpdir_factory.mktemp("data").join("wrong.gro") - with open(str(path_in)) as infile, open(str(path_out), 'w') as outfile: + path_out = tmp_path_factory.mktemp("data") / "wrong.gro" + with open(path_in) as infile, open(path_out, 'w') as outfile: outfile.write(next(infile)) outfile.write('{}\n'.format(request.param)) for line in infile: @@ -523,7 +523,7 @@ def test_filter_molecule_order(gro_reference): # pylint: disable=redefined-oute _, molecule = gro_reference filter_molecule(molecule, exclude=('SOL', ), ignh=True) keys = list(molecule.nodes) - assert sorted(keys) == keys + assert sorted(keys) == keys @pytest.mark.parametrize('exclude', ( @@ -551,14 +551,14 @@ def test_read_gro_wrong_atom_number(gro_wrong_length): # pylint: disable=redefi gro.read_gro(gro_wrong_length) -def test_write_gro(gro_reference, tmpdir): +def test_write_gro(gro_reference, tmp_path): """ Test writing GRO file. """ filename, molecule = gro_reference system = vermouth.System() system.molecules.append(molecule) - outname = tmpdir / 'out_test.gro' + outname = tmp_path / 'out_test.gro' gro.write_gro( system, outname, @@ -566,5 +566,5 @@ def test_write_gro(gro_reference, tmpdir): title='Just a title', ) DeferredFileWriter().write() - with open(str(filename)) as ref, open(str(outname)) as out: + with open(filename) as ref, open(outname) as out: assert out.read() == ref.read() diff --git a/vermouth/tests/gmx/test_itp.py b/vermouth/tests/gmx/test_itp.py index 6125c2d5e..c3ae1a59b 100644 --- a/vermouth/tests/gmx/test_itp.py +++ b/vermouth/tests/gmx/test_itp.py @@ -50,19 +50,19 @@ def dummy_molecule(): return molecule -def test_no_header(tmpdir, dummy_molecule): +def test_no_header(tmp_path, dummy_molecule): """ Test that no header is written if none is provided. """ - outpath = tmpdir / 'out.itp' - with open(str(outpath), 'w') as outfile: + outpath = tmp_path / 'out.itp' + with open(outpath, 'w') as outfile: write_molecule_itp(dummy_molecule, outfile) - with open(str(outpath)) as infile: + with open(outpath) as infile: assert next(infile) == '[ moleculetype ]\n' -def test_header(tmpdir, dummy_molecule): +def test_header(tmp_path, dummy_molecule): """ Test that the header is written. """ @@ -75,11 +75,11 @@ def test_header(tmpdir, dummy_molecule): '; It contains more than one line.\n', '\n', ) - outpath = tmpdir / 'out.itp' - with open(str(outpath), 'w') as outfile: + outpath = tmp_path / 'out.itp' + with open(outpath, 'w') as outfile: write_molecule_itp(dummy_molecule, outfile, header=header) - with open(str(outpath)) as infile: + with open(outpath) as infile: for line, expected_line in zip(infile, expected): assert line == expected_line diff --git a/vermouth/tests/test_dssp.py b/vermouth/tests/test_dssp.py index f722f7a12..89c52fe01 100644 --- a/vermouth/tests/test_dssp.py +++ b/vermouth/tests/test_dssp.py @@ -20,6 +20,7 @@ import glob import itertools +import networkx as nx import numpy as np import pytest @@ -27,13 +28,14 @@ from vermouth.file_writer import DeferredFileWriter from vermouth.forcefield import get_native_force_field from vermouth.dssp import dssp -from vermouth.dssp.dssp import DSSPError +from vermouth.dssp.dssp import DSSPError, AnnotateDSSP from vermouth.pdb.pdb import read_pdb from vermouth.tests.datafiles import ( PDB_PROTEIN, DSSP_OUTPUT, DSSP_SS_OUTPUT, PDB_ALA5_CG, + PDB_ALA5, ) DSSP_EXECUTABLE = os.environ.get("VERMOUTH_TEST_DSSP", "dssp") @@ -360,7 +362,7 @@ def test_read_dssp2(input_file, expected): @pytest.mark.parametrize("savefile", [True, False]) -def test_run_dssp(savefile, tmpdir): +def test_run_dssp(savefile, tmp_path): """ Test that :func:`vermouth.molecule.dssp.dssp.run_dssp` runs as expected and generate a save file only if requested. @@ -369,17 +371,17 @@ def test_run_dssp(savefile, tmpdir): # saving the DSSP output to file, and once with savefile set t False so we # do not generate the file. The "savefile" argument is set by # pytest.mark.parametrize. - # The "tmpdir" argument is set by pytest and is the path to a temporary + # The "tmp_path" argument is set by pytest and is the path to a temporary # directory that exists only for one iteration of the test. if savefile: - path = tmpdir.join("dssp_output") + path = tmp_path else: path = None system = vermouth.System() for molecule in read_pdb(str(PDB_PROTEIN)): system.add_molecule(molecule) secondary_structure = dssp.run_dssp( - system, executable=DSSP_EXECUTABLE, savefile=path + system, executable=DSSP_EXECUTABLE, savedir=path ) # Make sure we produced the expected sequence of secondary structures @@ -393,9 +395,11 @@ def test_run_dssp(savefile, tmpdir): if savefile: DeferredFileWriter().write() assert path.exists() - with open(str(path), encoding="utf-8") as genfile, open( - str(DSSP_OUTPUT), encoding="utf-8" - ) as reffile: + foundfile = list(path.glob('chain_*.ssd')) + assert len(foundfile) == 1 + foundfile = foundfile[0] + + with open(foundfile, encoding="utf-8") as genfile, open(str(DSSP_OUTPUT), encoding="utf-8") as reffile: # DSSP 3 is outputs mostly the same thing as DSSP2, though there # are some differences in non significant whitespaces, and an extra # field header. We need to normalize these differences to be able @@ -410,7 +414,7 @@ def test_run_dssp(savefile, tmpdir): assert gen == ref else: # Is the directory empty? - assert not os.listdir(str(tmpdir)) + assert not list(tmp_path.iterdir()) @pytest.mark.parametrize( @@ -423,7 +427,7 @@ def test_run_dssp(savefile, tmpdir): (PDB_ALA5_CG, 30, True), # WARNING ], ) -def test_run_dssp_input_file(tmpdir, caplog, pdb, loglevel, expected): +def test_run_dssp_input_file(tmp_path, caplog, pdb, loglevel, expected): """ Test that the DSSP input file is preserved (only) in the right conditions """ @@ -431,20 +435,46 @@ def test_run_dssp_input_file(tmpdir, caplog, pdb, loglevel, expected): system = vermouth.System() for molecule in read_pdb(str(pdb)): system.add_molecule(molecule) - with tmpdir.as_cwd(): - try: - dssp.run_dssp(system, executable=DSSP_EXECUTABLE) - except DSSPError: - pass - if expected: - target = 1 - else: - target = 0 - matches = glob.glob("dssp_in*.pdb") - assert len(matches) == target, matches - if matches: - # Make sure it's a valid PDB file. Mostly anyway. - list(read_pdb(matches[0])) + os.chdir(tmp_path) + try: + dssp.run_dssp(system, executable=DSSP_EXECUTABLE) + except DSSPError: + pass + if expected: + target = 1 + else: + target = 0 + matches = glob.glob("dssp_in*.pdb") + assert len(matches) == target, matches + if matches: + # Make sure it's a valid PDB file. Mostly anyway. + list(read_pdb(matches[0])) + + +@pytest.mark.parametrize('ss_struct, expected', ( + (list('ABCDE'), list('ABCDE')), + (list('AB DE'), list('ABCDE')), + ([['A'], ['B'], ['C'], ['F'], ['G']], list('ABCFG')), + ([['A'], [' '], ['E'], ['F'], [' ']], list('ACEFC')), +)) +def test_mdtraj(monkeypatch, ss_struct, expected): + # We don't want to test mdtraj.compute_dssp, so mock it. + compute_dssp = lambda *_, **__: np.array(ss_struct) + monkeypatch.setattr(vermouth.dssp.dssp.mdtraj, "compute_dssp", compute_dssp) + system = vermouth.System() + for molecule in read_pdb(str(PDB_ALA5)): + system.add_molecule(molecule) + + processor = AnnotateDSSP(executable=None) + processor.run_system(system) + + found = [] + for mol in system.molecules: + residues = mol.iter_residues() + for residue in residues: + found.append(mol.nodes[residue[0]]['secstruct']) + + assert found == expected def test_cterm_atomnames(): diff --git a/vermouth/tests/test_file_writer.py b/vermouth/tests/test_file_writer.py index a384f7a46..104d1a0ae 100644 --- a/vermouth/tests/test_file_writer.py +++ b/vermouth/tests/test_file_writer.py @@ -36,12 +36,12 @@ def test_is_singleton(): ('a.txt', ['a.txt', '#a.txt.1#'], ['#a.txt.2#', '#a.txt.1#']), ('a.txt', ['a.txt', '#a.txt.2#'], ['#a.txt.1#', '#a.txt.2#']), ]) -def test_backup(tmpdir, monkeypatch, name, existing_files, expected): +def test_backup(tmp_path, monkeypatch, name, existing_files, expected): """ Ensure the DeferredFileWriter backs up existing files correctly, and at the correct moment """ - monkeypatch.chdir(tmpdir) + monkeypatch.chdir(tmp_path) for idx, file in enumerate(existing_files): with open(file, 'w') as handle: handle.write(str(idx)) @@ -61,11 +61,11 @@ def test_backup(tmpdir, monkeypatch, name, existing_files, expected): assert file.read() == str(idx) -def test_deferred_writing(tmpdir, monkeypatch): +def test_deferred_writing(tmp_path, monkeypatch): """ Ensure the DeferredFileWriter writes changes to files at the correct moment """ - monkeypatch.chdir(tmpdir) + monkeypatch.chdir(tmp_path) file_name = Path('my_file.txt') writer = DeferredFileWriter() @@ -75,14 +75,14 @@ def test_deferred_writing(tmpdir, monkeypatch): assert not file_name.exists() os.chdir('..') writer.write() - os.chdir(str(tmpdir)) + os.chdir(tmp_path) assert file_name.exists() assert file_name.read_text() == 'hello' -def test_binary_writing(tmpdir, monkeypatch): +def test_binary_writing(tmp_path, monkeypatch): """Ensure the DeferredFileWriter can write and append to binary files""" - monkeypatch.chdir(tmpdir) + monkeypatch.chdir(tmp_path) file_name = Path('my_file.txt') writer = DeferredFileWriter() assert not file_name.exists() @@ -92,7 +92,7 @@ def test_binary_writing(tmpdir, monkeypatch): assert not file_name.exists() os.chdir('..') writer.write() - os.chdir(str(tmpdir)) + os.chdir(tmp_path) assert file_name.exists() assert file_name.read_text() == 'Hello' @@ -104,9 +104,9 @@ def test_binary_writing(tmpdir, monkeypatch): assert file_name.read_text() == 'Hello world!' -def test_rw_plus(tmpdir, monkeypatch): +def test_rw_plus(tmp_path, monkeypatch): """Ensure the DeferredFileWriter can deal with mode r+""" - monkeypatch.chdir(tmpdir) + monkeypatch.chdir(tmp_path) path = Path('file.txt') path.write_text('123') writer = DeferredFileWriter() @@ -134,9 +134,9 @@ def test_mode_errors(mode, exception): writer.open('somefile.txt', mode) -def test_append(tmpdir, monkeypatch): +def test_append(tmp_path, monkeypatch): """Ensure the DeferredFileWriter can append""" - monkeypatch.chdir(tmpdir) + monkeypatch.chdir(tmp_path) path = Path('file.txt') path.write_text('123') @@ -151,34 +151,34 @@ def test_append(tmpdir, monkeypatch): assert path.read_text() == '123abc' -def test_closing(tmpdir, monkeypatch): +def test_closing(tmp_path, monkeypatch): """ Ensure the DeferredFileWriter's close method doesn't prompt writing and removes any temporary files. """ - monkeypatch.chdir(tmpdir) - tmpdir = Path(str(tmpdir)) + monkeypatch.chdir(tmp_path) + tmp_path = tmp_path writer = DeferredFileWriter() - monkeypatch.setattr(writer, '_tmpdir', str(tmpdir)) + monkeypatch.setattr(writer, '_tmpdir', str(tmp_path)) - assert not [p.name for p in tmpdir.iterdir()] + assert not [p.name for p in tmp_path.iterdir()] with writer.open('file.txt', 'w') as file: file.write('abc') writer.write() - assert [p.name for p in tmpdir.iterdir()] == ['file.txt'] + assert [p.name for p in tmp_path.iterdir()] == ['file.txt'] with writer.open('file2.txt', 'w') as file: file.write('abc') writer.close() - assert [p.name for p in tmpdir.iterdir()] == ['file.txt'] + assert [p.name for p in tmp_path.iterdir()] == ['file.txt'] -def test_reopen(tmpdir, monkeypatch): - monkeypatch.chdir(tmpdir) +def test_reopen(tmp_path, monkeypatch): + monkeypatch.chdir(tmp_path) path = Path('file.txt') writer = DeferredFileWriter() diff --git a/vermouth/tests/test_forcefield.py b/vermouth/tests/test_forcefield.py index 78d2bb925..467647074 100644 --- a/vermouth/tests/test_forcefield.py +++ b/vermouth/tests/test_forcefield.py @@ -99,14 +99,15 @@ def test_has_feature_false(force_field_with_features): assert not force_field_with_features.has_feature('absent') -def test_create_ff_from_dir_name(tmpdir): +def test_create_ff_from_dir_name(tmp_path): """ Creates a force field from a directory AND a name, assure that the correct name is used. The name provided with the 'name' argument should be used. """ - directory = tmpdir.mkdir('dirname') + directory = tmp_path / 'dirname' + directory.mkdir() name = 'the_name' ff = vermouth.forcefield.ForceField( directory=str(directory), @@ -126,13 +127,15 @@ def test_create_ff_from_name(): @pytest.mark.parametrize('path_type', (str, pathlib.Path)) -def test_create_ff_from_dir(tmpdir, path_type): +def test_create_ff_from_dir(tmp_path, path_type): """ Creates a force field from a directory, assure that the name is correct. """ ff_name = 'name' - # Depending on the version of python, the type of tmpdir may differ. - directory = path_type(str(tmpdir.mkdir(ff_name))) + # Depending on the version of python, the type of tmp_path may differ. + directory = tmp_path / ff_name + directory.mkdir() + directory = path_type(directory) ff = vermouth.forcefield.ForceField(directory=directory) assert ff.name == ff_name diff --git a/vermouth/tests/test_map_input.py b/vermouth/tests/test_map_input.py index 87a3fe337..c21847450 100644 --- a/vermouth/tests/test_map_input.py +++ b/vermouth/tests/test_map_input.py @@ -406,12 +406,13 @@ def test_read_mapping_file_multiple(reference_multi): @pytest.fixture(scope='session') -def ref_mapping_directory(tmpdir_factory): +def ref_mapping_directory(tmp_path_factory): """ Build a file tree with mapping files. """ - basedir = tmpdir_factory.mktemp('data') - mapdir = basedir.mkdir('mappings') + basedir = tmp_path_factory.mktemp('data') + mapdir = basedir / 'mappings' + mapdir.mkdir() template = textwrap.dedent(""" [ molecule ] @@ -436,7 +437,7 @@ def ref_mapping_directory(tmpdir_factory): iterate_on = itertools.product(force_fields_from, force_fields_to, range(3)) for idx, (from_ff, to_ff, _) in enumerate(iterate_on): mapfile = mapdir / 'file{}.map'.format(idx) - with open(str(mapfile), 'w') as outfile: + with open(mapfile, 'w') as outfile: outfile.write(template.format(idx, from_ff, to_ff)) mapping = { @@ -495,20 +496,22 @@ def test_read_mapping_directory_not_dir(): vermouth.map_input.read_mapping_directory('not a directory', {}) -def test_read_mapping_directory_error(tmpdir): +def test_read_mapping_directory_error(tmp_path): """ Test that :func:`vermouth.map_input.read_mapping_directory` raises an exception when a file could not be read. """ - mapdir = Path(str(tmpdir.mkdir('mappings'))) - with open(str(mapdir / 'valid.backmap'), 'w') as outfile: + mapdir = tmp_path / 'mappings' + mapdir.mkdir() + + with open(mapdir / 'valid.backmap', 'w') as outfile: outfile.write(textwrap.dedent(""" [ molecule ] valid [ atoms ] 0 A B """)) - with open(str(mapdir / 'not_valid.map'), 'w') as outfile: + with open(mapdir / 'not_valid.map', 'w') as outfile: outfile.write('invalid content') with pytest.raises(IOError): vermouth.map_input.read_mapping_directory(mapdir, {}) diff --git a/vermouth/tests/test_name_moltype.py b/vermouth/tests/test_name_moltype.py index f962328cc..7f93670ce 100644 --- a/vermouth/tests/test_name_moltype.py +++ b/vermouth/tests/test_name_moltype.py @@ -138,7 +138,7 @@ def test_name_moltype(mols_and_moltypes, deduplicate): @pytest.mark.parametrize('deduplicate', (True, False)) -def test_martinize2_moltypes(tmpdir, deduplicate): +def test_martinize2_moltypes(tmp_path, deduplicate): """ Run martinize2 and make sure the ITP file produced have the expected names. """ @@ -160,8 +160,8 @@ def test_martinize2_moltypes(tmpdir, deduplicate): n_outputs = 4 expected = ['molecule_{}.itp'.format(i) for i in range(n_outputs)] - proc = subprocess.run(command, cwd=str(tmpdir), timeout=90, check=False) + proc = subprocess.run(command, cwd=tmp_path, timeout=90, check=False) assert proc.returncode == 0 - itp_files = sorted(os.path.basename(fname) for fname in glob(str(tmpdir / '*.itp'))) + itp_files = sorted(os.path.basename(fname) for fname in tmp_path.glob('*.itp')) assert itp_files == expected