diff --git a/Manual.html b/Manual.html index bdd10f0..41efd33 100644 --- a/Manual.html +++ b/Manual.html @@ -281,5 +281,34 @@

Examples

fixer.replaceNonstandardResidues() +

Nonstandard Residues

+ +PDBFixer knows how to build all the standard amino acids and nucleotides. If you want to apply it to other residues, you need to tell it how. This is done by defining templates. + +A template is a description of a residue: what atoms it contains, how they are bonded to each other, and what conformation they take on. For residues defined in the PDB Chemical Component Dictionary, you can simply tell PDBFixer to download the definition of whatever residue you need. For example, if you want to model a protein containing a selenocysteine (PDB code SEC), call + +
+fixer.downloadTemplate('SEC')
+
+ +Now you can call other methods such as findMissingAtoms(), addMissingAtoms(), and addMissingHydrogens() in the usual way. PDBFixer will fix problems in the SEC residue exactly as it does for standard ones. +

+If your structure contains molecules or residues that are not present in the PDB Chemical Component Dictionary, you can still add templates for them, but you need to define them yourself. This is done by calling registerTemplate(). It takes two required arguments: an OpenMM Topology object describing the structure of the residue, and a list of positions describing a typical conformation of the residue. These can be easily obtained from, for example, a PDB or PDBx/mmCIF file containing just the single residue. + +

+pdbx = PDBxFile('MOL.cif')
+fixer.registerTemplate(pdbx.topology, pdbx.positions)
+
+ +For residues that appear as part of larger molecules rather than in isolation, you need to provide one more piece of information: which atoms should only be present in terminal residues. For example, a C-terminal amino acid is capped with OXT and HXT atoms, but these atoms are omitted when the residue appears in the middle of a chain and is bonded to another residue. Likewise, the nitrogen in an N-terminal amino acid has two hydrogens attached to it (H and H2), but when it appears in the middle of a chain and the nitrogen is bonded to another residue, the H2 hydrogen is omitted. You can pass a third argument containing a list of boolean flags for each atom, specifying which ones should be omitted from non-terminal residues. + +
+pdbx = PDBxFile('RES.cif')
+terminal = [atom.name in ('H2', 'OXT', 'HXT') for atom in pdbx.topology.atoms()]
+fixer.registerTemplate(pdbx.topology, pdbx.positions, terminal)
+
+ +Strictly speaking, it is the bonds that matter, not the position in the chain. For example, the sulfur in a CYS residue has a hydrogen that must be omitted when it forms a disulfide bond to another residue. What matters is whether the sulfur has a bond connecting it to another residue, not the position of the CYS residue in its chain. + diff --git a/pdbfixer/pdbfixer.py b/pdbfixer/pdbfixer.py index d958e57..534ce54 100644 --- a/pdbfixer/pdbfixer.py +++ b/pdbfixer/pdbfixer.py @@ -6,7 +6,7 @@ Biological Structures at Stanford, funded under the NIH Roadmap for Medical Research, grant U54 GM072970. See https://simtk.org. -Portions copyright (c) 2013-2023 Stanford University and the Authors. +Portions copyright (c) 2013-2024 Stanford University and the Authors. Authors: Peter Eastman Contributors: @@ -54,6 +54,7 @@ import os import os.path import math +from collections import defaultdict from pkg_resources import resource_filename @@ -98,6 +99,15 @@ def __init__(self, chainId, number, residueName, standardName): self.residueName = residueName self.standardName = standardName +class Template: + """Template represents a standard residue, or a nonstandard one registered with registerTemplate().""" + def __init__(self, topology, positions, terminal=None): + self.topology = topology + self.positions = positions + if terminal is None: + terminal = [False]*topology.getNumAtoms() + self.terminal = terminal + def _guessFileFormat(file, filename): """Guess whether a file is PDB or PDBx/mmCIF based on its filename and contents.""" filename = filename.lower() @@ -276,7 +286,7 @@ def __init__(self, filename=None, pdbfile=None, pdbxfile=None, url=None, pdbid=N for file in os.listdir(templatesPath): templatePdb = app.PDBFile(os.path.join(templatesPath, file)) name = next(templatePdb.topology.residues()).name - self.templates[name] = templatePdb + self.templates[name] = Template(templatePdb.topology, templatePdb.positions) def _initializeFromPDB(self, file): """Initialize this object by reading a PDB file.""" @@ -344,6 +354,96 @@ def _initializeFromPDBx(self, file): for row in modData.getRowList(): self.modifiedResidues.append(ModifiedResidue(row[asymIdCol], int(row[resNumCol]), row[resNameCol], row[standardResCol])) + def _getTemplate(self, name): + """Return the template with a name. If none has been registered, this will return None.""" + if name in self.templates: + return self.templates[name] + return None + + def registerTemplate(self, topology, positions, terminal=None): + """Register a template for a nonstandard residue. This allows PDBFixer to add missing residues of this type, + to add missing atoms to existing residues, and to mutate other residues to it. + Parameters + ---------- + topology: openmm.app.Topology + A Topology containing a single chain with a single residue, describing the nonstandard residue + being registered. + positions: array of shape (n_atoms, 3) + The positions of the atoms in the residue in a typical conformation. These positions are used + when adding missing atoms or residues. + terminal: optional list of bool + If this is present, it should be a list of length equal to the number of atoms in the residue. + If an element is True, that indicates the corresponding atom should only be added to terminal + residues. + """ + residues = list(topology.residues()) + if len(residues) != 1: + raise ValueError('The Topology must contain a single residue') + if topology.getNumAtoms() != len(positions): + raise ValueError('The number of positions does not match the number of atoms in the Topology') + if terminal is not None and len(terminal) != topology.getNumAtoms(): + raise ValueError('The number of terminal flags does not match the number of atoms in the Topology') + self.templates[residues[0].name] = Template(topology, positions, terminal) + + def downloadTemplate(self, name): + """Attempt to download a residue definition from the PDB and register a template for it. + + Parameters + ---------- + name: str + The name of the residue, as specified in the PDB Chemical Component Dictionary. + + Returns + ------- + True if a template was successfully registered, false otherwise. + """ + name = name.upper() + try: + file = urlopen(f'https://files.rcsb.org/ligands/download/{name}.cif') + contents = file.read().decode('utf-8') + file.close() + except: + return False + + # Load the atoms. + + from openmm.app.internal.pdbx.reader.PdbxReader import PdbxReader + reader = PdbxReader(StringIO(contents)) + data = [] + reader.read(data) + block = data[0] + atomData = block.getObj('chem_comp_atom') + atomNameCol = atomData.getAttributeIndex('atom_id') + symbolCol = atomData.getAttributeIndex('type_symbol') + leavingCol = atomData.getAttributeIndex('pdbx_leaving_atom_flag') + xCol = atomData.getAttributeIndex('pdbx_model_Cartn_x_ideal') + yCol = atomData.getAttributeIndex('pdbx_model_Cartn_y_ideal') + zCol = atomData.getAttributeIndex('pdbx_model_Cartn_z_ideal') + topology = app.Topology() + chain = topology.addChain() + residue = topology.addResidue(name, chain) + positions = [] + atomByName = {} + terminal = [] + for row in atomData.getRowList(): + atomName = row[atomNameCol] + atom = topology.addAtom(atomName, app.Element.getBySymbol(row[symbolCol]), residue) + atomByName[atomName] = atom + terminal.append(row[leavingCol] == 'Y') + positions.append(mm.Vec3(float(row[xCol]), float(row[yCol]), float(row[zCol]))*0.1) + positions = positions*unit.nanometers + + # Load the bonds. + + bondData = block.getObj('chem_comp_bond') + if bondData is not None: + atom1Col = bondData.getAttributeIndex('atom_id_1') + atom2Col = bondData.getAttributeIndex('atom_id_2') + for row in bondData.getRowList(): + topology.addBond(atomByName[row[atom1Col]], atomByName[row[atom2Col]]) + self.registerTemplate(topology, positions, terminal) + return True + def _addAtomsToTopology(self, heavyAtomsOnly, omitUnknownMolecules): """Create a new Topology in which missing atoms have been added. @@ -375,7 +475,7 @@ def _addAtomsToTopology(self, heavyAtomsOnly, omitUnknownMolecules): addedOXT = [] residueCenters = [self._computeResidueCenter(res).value_in_unit(unit.nanometers) for res in self.topology.residues()]*unit.nanometers for chain in self.topology.chains(): - if omitUnknownMolecules and not any(residue.name in self.templates for residue in chain.residues()): + if omitUnknownMolecules and all(self._getTemplate(residue.name) is None for residue in chain.residues()): continue chainResidues = list(chain.residues()) newChain = newTopology.addChain(chain.id) @@ -413,7 +513,7 @@ def _addAtomsToTopology(self, heavyAtomsOnly, omitUnknownMolecules): # Find corresponding atoms in the residue and the template. - template = self.templates[residue.name] + template = self._getTemplate(residue.name) atomPositions = dict((atom.name, self.positions[atom.index]) for atom in residue.atoms()) points1 = [] points2 = [] @@ -508,7 +608,7 @@ def _addMissingResiduesToChain(self, chain, residueNames, startPosition, endPosi # Add the residues. for i, residueName in enumerate(residueNames): - template = self.templates[residueName] + template = self._getTemplate(residueName) # Find a translation that best matches the adjacent residue. @@ -703,7 +803,7 @@ def findNonstandardResidues(self): replacement = modres[key] if replacement == 'DU': replacement = 'DT' - if replacement in self.templates: + if self._getTemplate(replacement) != None: nonstandard[residue] = replacement self.nonstandardResidues = [(r, nonstandard[r]) for r in sorted(nonstandard, key=lambda r: r.index)] @@ -731,7 +831,7 @@ def replaceNonstandardResidues(self): for residue, replaceWith in self.nonstandardResidues: residue.name = replaceWith - template = self.templates[replaceWith] + template = self._getTemplate(replaceWith) standardAtoms = set(atom.name for atom in template.topology.atoms()) for atom in residue.atoms(): if atom.element in (None, hydrogen) or atom.name not in standardAtoms: @@ -760,6 +860,10 @@ def applyMutations(self, mutations, chain_id): Notes ----- + If a target residue is not a standard amino acid, and if no template + has been registered for it with registerTemplate(), this function + attempts to look it up from the PDB and create a new template for it. + We require three letter codes to avoid possible ambiguitities. We can't guarantee that the resulting model is a good one; for significant changes in sequence, you should probably be using @@ -800,10 +904,11 @@ def applyMutations(self, mutations, chain_id): if residue.name != old_name: raise(ValueError("You asked to mutate chain %s residue %d name %s, but that residue is actually %s!" % (chain_id, resSeq, old_name, residue.name))) - try: - template = self.templates[new_name] - except KeyError: - raise(KeyError("Cannot find residue %s in template library!" % new_name)) + if self._getTemplate(new_name) is None: + # Try to download a template from the PDB. + self.downloadTemplate(new_name) + if self._getTemplate(new_name) is None: + raise(KeyError("Cannot find residue %s in template library!" % new_name)) # Store mutation residue_map[residue] = new_name @@ -816,7 +921,7 @@ def applyMutations(self, mutations, chain_id): for residue in residue_map.keys(): replaceWith = residue_map[residue] residue.name = replaceWith - template = self.templates[replaceWith] + template = self._getTemplate(replaceWith) standardAtoms = set(atom.name for atom in template.topology.atoms()) for atom in residue.atoms(): if atom.element in (None, hydrogen) or atom.name not in standardAtoms: @@ -858,23 +963,60 @@ def findMissingAtoms(self): missingAtoms = {} missingTerminals = {} + # Determine which atoms have an external bond to another residue. + + hasExternal = defaultdict(bool) + for atom1, atom2 in self.topology.bonds(): + if atom1.residue != atom2.residue: + hasExternal[(atom1.residue, atom1.name)] = True + hasExternal[(atom2.residue, atom2.name)] = True + for chain in self.topology.chains(): + chainResidues = list(chain.residues()) + for residue in chain.residues(): + atomNames = [atom.name for atom in residue.atoms()] + if all(name in atomNames for name in ['C', 'O', 'CA']): + # We'll be adding peptide bonds. + if residue != chainResidues[0]: + hasExternal[(residue, 'N')] = True + if residue != chainResidues[-1]: + hasExternal[(residue, 'C')] = True + # Loop over residues. for chain in self.topology.chains(): + nucleic = any(res.name in dnaResidues or res.name in rnaResidues for res in chain.residues()) chainResidues = list(chain.residues()) for residue in chain.residues(): - if residue.name in self.templates: - template = self.templates[residue.name] + template = self._getTemplate(residue.name) + if template is not None: + # If an atom is marked as terminal only, and if it is bonded to any atom that has an external bond + # to another residue, we need to omit that atom and any other terminal-only atom bonded to it. + + bondedTo = defaultdict(set) + for atom1, atom2 in template.topology.bonds(): + bondedTo[atom1].add(atom2) + bondedTo[atom2].add(atom1) + skip = set() + for atom, terminal in zip(template.topology.atoms(), template.terminal): + if terminal: + for atom2 in bondedTo[atom]: + if hasExternal[(residue, atom2.name)]: + skip.add(atom) + for atom, terminal in zip(template.topology.atoms(), template.terminal): + if terminal: + for atom2 in bondedTo[atom]: + if atom2 in skip: + skip.add(atom) atomNames = set(atom.name for atom in residue.atoms()) - templateAtoms = list(template.topology.atoms()) - if residue == chainResidues[0] and (chain.index, 0) not in self.missingResidues: + templateAtoms = [atom for atom in template.topology.atoms() if atom not in skip] + if nucleic and residue == chainResidues[0] and (chain.index, 0) not in self.missingResidues: templateAtoms = [atom for atom in templateAtoms if atom.name not in ('P', 'OP1', 'OP2')] # Add atoms from the template that are missing. missing = [] for atom in templateAtoms: - if atom.name not in atomNames: + if atom.name not in atomNames and atom.element != app.element.hydrogen: missing.append(atom) if len(missing) > 0: missingAtoms[residue] = missing @@ -1118,9 +1260,22 @@ def _downloadNonstandardDefinitions(self): def _describeVariant(self, residue, definitions): """Build the variant description to pass to addHydrogens() for a residue.""" - if residue.name not in definitions: + if residue.name not in app.PDBFile._standardResidues and self._getTemplate(residue.name) is not None: + # The user has registered a template for this residue. Use the hydrogens from it. + template = self._getTemplate(residue.name) + atoms = [(atom.name, atom.element.symbol.upper(), terminal) for atom, terminal in zip(template.topology.atoms(), template.terminal)] + resAtoms = dict((atom.name, atom) for atom in residue.atoms()) + bonds = [] + for atom1, atom2 in template.topology.bonds(): + if atom1.element == app.element.hydrogen and atom2.name in resAtoms: + bonds.append((atom1.name, atom2.name)) + elif atom2.element == app.element.hydrogen and atom1.name in resAtoms: + bonds.append((atom2.name, atom1.name)) + elif residue.name in definitions: + # We downloaded a definition. + atoms, bonds = definitions[residue.name] + else: return None - atoms, bonds = definitions[residue.name] # See if the heavy atoms are identical. diff --git a/pdbfixer/tests/data/CAS.pdb b/pdbfixer/tests/data/CAS.pdb new file mode 100644 index 0000000..4f377e8 --- /dev/null +++ b/pdbfixer/tests/data/CAS.pdb @@ -0,0 +1,45 @@ +ATOM 1 N CAS A 1 -1.377 -1.201 -2.417 1.00 10.00 N +ATOM 2 CA CAS A 1 -0.664 0.081 -2.349 1.00 10.00 C +ATOM 3 CB CAS A 1 0.084 0.179 -1.018 1.00 10.00 C +ATOM 4 C CAS A 1 0.319 0.169 -3.487 1.00 10.00 C +ATOM 5 O CAS A 1 0.824 -0.836 -3.928 1.00 10.00 O +ATOM 6 OXT CAS A 1 0.635 1.364 -4.010 1.00 10.00 O +ATOM 7 SG CAS A 1 -1.100 0.073 0.350 1.00 10.00 S +ATOM 8 AS CAS A 1 0.299 0.246 2.090 1.00 10.00 AS +ATOM 9 CE1 CAS A 1 -0.737 0.161 3.787 1.00 10.00 C +ATOM 10 CE2 CAS A 1 1.610 -1.249 2.038 1.00 10.00 C +ATOM 11 H CAS A 1 -0.678 -1.925 -2.343 1.00 10.00 H +ATOM 12 H2 CAS A 1 -1.945 -1.261 -1.585 1.00 10.00 H +ATOM 13 HA CAS A 1 -1.380 0.899 -2.424 1.00 10.00 H +ATOM 14 HB2 CAS A 1 0.613 1.131 -0.969 1.00 10.00 H +ATOM 15 HB3 CAS A 1 0.800 -0.638 -0.943 1.00 10.00 H +ATOM 16 HXT CAS A 1 1.267 1.420 -4.740 1.00 10.00 H +ATOM 17 HE11 CAS A 1 -0.056 0.245 4.633 1.00 10.00 H +ATOM 18 HE12 CAS A 1 -1.455 0.980 3.815 1.00 10.00 H +ATOM 19 HE13 CAS A 1 -1.268 -0.789 3.841 1.00 10.00 H +ATOM 20 HE21 CAS A 1 2.178 -1.202 1.109 1.00 10.00 H +ATOM 21 HE22 CAS A 1 2.291 -1.165 2.884 1.00 10.00 H +ATOM 22 HE23 CAS A 1 1.079 -2.200 2.092 1.00 10.00 H +CONECT 1 2 11 12 +CONECT 2 1 3 4 13 +CONECT 3 2 7 14 15 +CONECT 4 2 5 6 +CONECT 5 4 +CONECT 6 4 16 +CONECT 7 3 8 +CONECT 8 7 9 10 +CONECT 9 8 17 18 19 +CONECT 10 8 20 21 22 +CONECT 11 1 +CONECT 12 1 +CONECT 13 2 +CONECT 14 3 +CONECT 15 3 +CONECT 16 6 +CONECT 17 9 +CONECT 18 9 +CONECT 19 9 +CONECT 20 10 +CONECT 21 10 +CONECT 22 10 +END diff --git a/pdbfixer/tests/test_add_hydrogens.py b/pdbfixer/tests/test_add_hydrogens.py index 4ba7ee3..802e49e 100644 --- a/pdbfixer/tests/test_add_hydrogens.py +++ b/pdbfixer/tests/test_add_hydrogens.py @@ -1,11 +1,10 @@ import pdbfixer +import openmm.app as app from pathlib import Path -from io import StringIO def test_nonstandard(): """Test adding hydrogens to nonstandard residues.""" - content = (Path(__file__).parent / "data" / "4JSV.pdb").read_text() - fixer = pdbfixer.PDBFixer(pdbfile=StringIO(content)) + fixer = pdbfixer.PDBFixer(filename=(Path(__file__).parent / "data" / "4JSV.pdb").as_posix()) fixer.removeChains(chainIndices=[0, 1, 2]) fixer.addMissingHydrogens() for residue in fixer.topology.residues(): @@ -17,10 +16,29 @@ def test_nonstandard(): def test_leaving_atoms(): """Test adding hydrogens to a nonstandard residue with leaving atoms.""" - content = (Path(__file__).parent / "data" / "1BHL.pdb").read_text() - fixer = pdbfixer.PDBFixer(pdbfile=StringIO(content)) + fixer = pdbfixer.PDBFixer(filename=(Path(__file__).parent / "data" / "1BHL.pdb").as_posix()) fixer.addMissingHydrogens() for residue in fixer.topology.residues(): count = sum(1 for atom in residue.atoms() if atom.element.symbol == 'H') if residue.name == 'CAS': assert count == 10 + +def test_registered_template(): + """Test adding hydrogens based on a template registered by the user.""" + fixer = pdbfixer.PDBFixer(filename=(Path(__file__).parent / "data" / "1BHL.pdb").as_posix()) + + # Register a template for CAS from which a single hydrogen has been removed. + + pdb = app.PDBFile((Path(__file__).parent / "data" / "CAS.pdb").as_posix()) + modeller = app.Modeller(pdb.topology, pdb.positions) + modeller.delete([list(modeller.topology.atoms())[-1]]) + terminal = [atom.name in ('H2', 'OXT', 'HXT') for atom in modeller.topology.atoms()] + fixer.registerTemplate(modeller.topology, modeller.positions, terminal) + + # See if the correct hydrogens get added. + + fixer.addMissingHydrogens() + for residue in fixer.topology.residues(): + count = sum(1 for atom in residue.atoms() if atom.element.symbol == 'H') + if residue.name == 'CAS': + assert count == 9 diff --git a/pdbfixer/tests/test_build_and_simulate.py b/pdbfixer/tests/test_build_and_simulate.py index 1c4f356..a56d608 100644 --- a/pdbfixer/tests/test_build_and_simulate.py +++ b/pdbfixer/tests/test_build_and_simulate.py @@ -1,7 +1,7 @@ from __future__ import print_function -import pdbfixer -import openmm +from pdbfixer.pdbfixer import PDBFixer +from openmm import app import os import os.path @@ -97,7 +97,7 @@ def test_build_and_simulate(): ] # DEBUG: A few small test cases. - pdbcodes_to_build = ['110D', '116D', '117D', '118D', '134D', '135D', '136D', '138D', '143D', '148D', '151D', '152D', '159D', '177D', '17RA', '183D', '184D', '186D', '187D', '188D', '189D', '1A11', '1A13', '1A1P', '1A3P', '1A51', '1A60', '1A83', '1A9L', '1AAF', '1AB1', '1ABZ', '1AC7', '1ACW', '1AD7', '1ADX', '1AFP', '1AFT', '1AFX', '1AG7', '1AGG', '1AGL', '1AGT', '1AHL', '1AIE', '1AJ1', '1AJF', '1AJJ', '1AJU', '1AKG', '1AKX', '1AL1', '1ALE', '1ALF', '1ALG', '1AM0', '1AMB', '1AMC', '1AML', '1ANP', '1ANR', '1ANS', '1AOO'] + pdbcodes_to_build = ['110D', '116D', '117D', '118D', '134D', '135D', '136D', '138D', '143D', '148D', '151D', '152D', '177D', '17RA', '183D', '184D', '186D', '187D', '188D', '189D', '1A11', '1A13', '1A1P', '1A3P', '1A51', '1A60', '1A83', '1A9L', '1AAF', '1AB1', '1ABZ', '1AC7', '1ACW', '1AD7', '1ADX', '1AFP', '1AFT', '1AFX', '1AG7', '1AGG', '1AGL', '1AGT', '1AHL', '1AIE', '1AJ1', '1AJF', '1AJJ', '1AKG', '1AL1', '1ALE', '1ALF', '1ALG', '1AM0', '1AMB', '1AMC', '1AML', '1ANP', '1ANR', '1ANS', '1AOO', '1BH7', '1BX8', '1CEK'] # Don't simulate any. pdbcodes_to_simulate = [] @@ -105,20 +105,12 @@ def test_build_and_simulate(): # Keep track of list of failures. failures = list() + forcefield = app.ForceField('amber14-all.xml', 'amber14/tip3pfb.xml') for pdbcode in pdbcodes_to_build: print("------------------------------------------------") print(pdbcode) - output_pdb_filename = 'output.pdb' - - # PDB setup parameters. - # TODO: Try several combinations? - from openmm import unit pH = 7.0 - ionic = 50.0 * unit.millimolar - box = 10.0 * unit.angstrom - positiveIon = 'Na+' - negativeIon = 'Cl-' outfile = tempfile.NamedTemporaryFile(mode='w', delete=False) output_pdb_filename = outfile.name @@ -126,11 +118,17 @@ def test_build_and_simulate(): timeout_seconds = 30 watchdog = Watchdog(timeout_seconds) build_successful = False - try: - from pdbfixer.pdbfixer import PDBFixer - from openmm import app + try: stage = "Creating PDBFixer..." fixer = PDBFixer(pdbid=pdbcode) + stage = "Deleting hydrogens..." + if pdbcode in ['135D', '136D', '177D', '1A83', '1AGG', '1AJ1']: + # These input files include extra hydrogens that aren't supported by the force field. + # To avoid problems, delete all pre-existing hydrogens. + modeller = app.Modeller(fixer.topology, fixer.positions) + modeller.delete([a for a in fixer.topology.atoms() if a.element == app.element.hydrogen]) + fixer.topology = modeller.topology + fixer.positions = modeller.positions stage = "Finding missing residues..." fixer.findMissingResidues() stage = "Finding nonstandard residues..." @@ -147,6 +145,8 @@ def test_build_and_simulate(): fixer.addMissingHydrogens(pH) stage = "Writing PDB file..." app.PDBFile.writeFile(fixer.topology, fixer.positions, outfile) + stage = "Create System..." + forcefield.createSystem(fixer.topology) stage = "Done." outfile.close() build_successful = True diff --git a/pdbfixer/tests/test_mutate.py b/pdbfixer/tests/test_mutate.py index 394bbf8..8efdb8b 100644 --- a/pdbfixer/tests/test_mutate.py +++ b/pdbfixer/tests/test_mutate.py @@ -1,7 +1,7 @@ import openmm.app as app import pdbfixer -import tempfile from pytest import raises +from pathlib import Path def test_mutate_1(): fixer = pdbfixer.PDBFixer(pdbid='1VII') @@ -10,11 +10,7 @@ def test_mutate_1(): fixer.findMissingAtoms() fixer.addMissingAtoms() fixer.addMissingHydrogens(7.0) - with tempfile.NamedTemporaryFile(mode='w+') as temp_pdb: - app.PDBFile.writeFile(fixer.topology, fixer.positions, temp_pdb) - temp_pdb.flush() - pdb = app.PDBFile(temp_pdb.name) - + new_residue57 = list(fixer.topology.residues())[16] assert new_residue57.name == "GLY", "Name of mutated residue did not change correctly!" assert len(list(new_residue57.atoms())) == 7, "Should have 7 atoms in GLY 56" @@ -31,7 +27,6 @@ def test_mutate_2(): fixer.findMissingAtoms() fixer.addMissingAtoms() fixer.addMissingHydrogens(7.0) - temp_pdb = tempfile.NamedTemporaryFile(mode='w+') new_residue57 = list(fixer.topology.residues())[16] new_residue56 = list(fixer.topology.residues())[15] assert new_residue57.name == "LEU", "Name of mutated residue did not change correctly!" @@ -54,12 +49,6 @@ def test_mutate_3_fails(): fixer.applyMutations(["ALA-57-GLY", "SER-57-ALA"], "A") def test_mutate_4_fails(): - with raises(KeyError): - fixer = pdbfixer.PDBFixer(pdbid='1VII') - fixer.applyMutations(["ALA-57-WTF", "SER-56-ALA"], "A") - - -def test_mutate_5_fails(): with raises(KeyError): fixer = pdbfixer.PDBFixer(pdbid='1VII') fixer.applyMutations(["ALA-1000-GLY", "SER-56-ALA"], "A") @@ -68,3 +57,36 @@ def test_mutate_multiple_copies_of_chain_A(): fixer = pdbfixer.PDBFixer(pdbid='1OL5') fixer.applyMutations(['TPO-287-THR','TPO-288-THR'], "A") +def test_mutate_to_nonstandard(): + """Test mutating to a nonstandard residue defined with registerTemplate().""" + fixer = pdbfixer.PDBFixer(filename=(Path(__file__).parent / "data" / "1BHL.pdb").as_posix()) + pdb = app.PDBFile((Path(__file__).parent / "data" / "CAS.pdb").as_posix()) + terminal = [atom.name in ('H2', 'OXT', 'HXT') for atom in pdb.topology.atoms()] + fixer.registerTemplate(pdb.topology, pdb.positions, terminal) + fixer.applyMutations(["SER-57-CAS", "ILE-60-CAS", "ASP-207-CAS"], "A") + fixer.findMissingResidues() + fixer.findMissingAtoms() + fixer.addMissingAtoms() + fixer.addMissingHydrogens(7.0) + residues = list(fixer.topology.residues()) + for i in (0, 3, 134): + assert residues[i].name == "CAS" + atoms = list(residues[i].atoms()) + assert sum(1 for a in atoms if a.name == 'H2') == (1 if i == 0 else 0) + assert sum(1 for a in atoms if a.name == 'OXT') == (1 if i == 134 else 0) + assert sum(1 for a in atoms if a.name == 'HXT') == (1 if i == 134 else 0) + +def test_download_template(): + """Test mutating to a nonstandard residue defined in the PDB.""" + fixer = pdbfixer.PDBFixer(filename=(Path(__file__).parent / "data" / "1BHL.pdb").as_posix()) + fixer.applyMutations(["SER-57-SEP", "ILE-60-SEP", "ASP-207-SEP"], "A") + fixer.findMissingResidues() + fixer.findMissingAtoms() + fixer.addMissingAtoms() + fixer.addMissingHydrogens(7.0) + residues = list(fixer.topology.residues()) + for i in (0, 3, 134): + assert residues[i].name == "SEP" + atoms = list(residues[i].atoms()) + assert sum(1 for a in atoms if a.element == app.element.phosphorus) == 1 + assert sum(1 for a in atoms if a.name == 'OXT') == (1 if i == 134 else 0)