Skip to content

Commit

Permalink
Merge pull request #34 from timbernat/DOP-hotfix
Browse files Browse the repository at this point in the history
Polymer builder overhaul
  • Loading branch information
timbernat authored Dec 12, 2024
2 parents 2b160ee + 5c282d7 commit ce9e052
Show file tree
Hide file tree
Showing 34 changed files with 1,456 additions and 281 deletions.
21 changes: 20 additions & 1 deletion polymerist/genutils/importutils/dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
__author__ = 'Timotej Bernat'
__email__ = '[email protected]'

from typing import Callable, ParamSpec, TypeVar
from typing import Callable, Optional, ParamSpec, TypeVar

Params = ParamSpec('Params')
ReturnType = TypeVar('ReturnType')
Expand All @@ -14,6 +14,25 @@
from functools import wraps


class MissingPrerequisitePackage(Exception):
'''Raised when a package dependency cannot be found and the user should be alerted with install instructions'''
def __init__(self,
importing_package_name : str,
use_case : str,
install_link : str,
dependency_name : str,
dependency_name_formal : Optional[str]=None
):
if dependency_name_formal is None:
dependency_name_formal = dependency_name

message = f'''
{use_case.capitalize()} require(s) {dependency_name_formal}, which was not found in the current environment
Please install `{dependency_name}` by following the installation instructions at {install_link}
Then try importing from "{importing_package_name}" again'''

super().__init__(message)

def module_installed(module_name : str) -> bool:
'''
Check whether a module of the given name is present on the system
Expand Down
41 changes: 0 additions & 41 deletions polymerist/genutils/textual/strsearch.py

This file was deleted.

81 changes: 81 additions & 0 deletions polymerist/genutils/textual/substrings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
'''For identifying and concatenating substrings of other strings with unique properties'''

__author__ = 'Timotej Bernat'
__email__ = '[email protected]'


def unique_string(string : str, preserve_order : bool=True) -> str:
'''
Accepts a string and returns another string containing
only the UNIQUE characters in the origin string
Can specify whether order is important with the "preserve_order" keyword
Parameters
----------
string : str
An arbitrary string on wants the unique characters from
preserve_order : bool, default True
Whether or not to keep the unique characters in the order they are found
For example:
unique_string("balaclava", preserve_order=False) -> "bcavl"
unique_string("balaclava", preserve_order=True) -> "balcv"
Returns
-------
uniquified_str : str
Another string containing only the unique characters in "string"
Order depends on the value of the "preserve_order" parameter
'''
if not preserve_order:
unique_chars = set(string)
else:
unique_chars = []
for char in string:
if char not in unique_chars:
unique_chars.append(char)

return ''.join(unique_chars)

def shortest_repeating_substring(string : str) -> str:
'''Return the shortest substring such that the passed string can be written as some number of repeats (including 1) of the substring
Will return the original string if no simpler decomposition exists'''
i = (2*string).find(string, 1, -1) # check if string matches itself in a cycle in non-trivial way (i.e more than just the two repeats)
return string if (i == -1) else string[:i]

def repeat_string_to_length(string : str, target_length : int, joiner : str='') -> str:
'''
Takes a string and repeats it cyclically to produce another string of a given length
The number of times the original string occurs in the new string may be fractional
for example:
>> repeat_string_to_length("CAT", 6) -> "CATCAT"
>> repeat_string_to_length("BACA", 10) -> "BACABACABA"
Parameters
----------
string : str
An arbitrary string to repeat
target_length : int
The length of the final desired string
This does NOT have to be an integer multiple of the length of "string"
E.g. repeat_string_to_length("BACA", 10) -> "BACABACABA"
Nor does it have to be greater than the length of "string"
E.g. repeat_string_to_length("BACA", 3) -> "BAC"
Returns
-------
rep_string : str
A new string which has the desired target length and consists of cycles of the initial string
'''
if not string:
raise ValueError(f'Cannot generate nonempty string from any amount of repeats of the empty string')
if not isinstance(target_length, int):
raise TypeError(f'Only integer target string lengths are allowed, not non-integer type "{type(target_length).__name__}"')
if target_length < 0:
raise IndexError(f'Cannot generate a string of negative length (requested length of {target_length} character(s))')

num_str_reps, num_extra_chars = divmod(target_length, len(string))
remainder = (string[:num_extra_chars],) if num_extra_chars else () # empty container avoids extra joiner at end when remainder string is empty

return joiner.join(num_str_reps*(string,) + remainder) # tuples here are ~2 OOM faster than moral equivalent with lists

13 changes: 7 additions & 6 deletions polymerist/mdtools/openfftools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,14 @@
__email__ = '[email protected]'

# Subpackage-wide precheck to see if OpenFF is even usable in the first place
from ...genutils.importutils.dependencies import modules_installed
from ...genutils.importutils.dependencies import modules_installed, MissingPrerequisitePackage
if not modules_installed('openff', 'openff.toolkit'):
raise ModuleNotFoundError(
f'''
OpenFF packages which are required to utilitize {__name__} not found in current environment
Please follow installation instructions at https://docs.openforcefield.org/projects/toolkit/en/stable/installation.html, then retry import
'''
raise MissingPrerequisitePackage(
importing_package_name=__spec__.name,
use_case='OpenFF addons',
install_link='https://docs.openforcefield.org/projects/toolkit/en/stable/installation.html',
dependency_name='openff-toolkit',
dependency_name_formal='the OpenFF software stack',
)

# Import of toplevel OpenFF object registries
Expand Down
2 changes: 1 addition & 1 deletion polymerist/mdtools/openfftools/boxvectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from openff.toolkit import Topology
from openff.interchange.components._packmol import _box_vectors_are_in_reduced_form

from .omminter.unitsys import allow_openmm_units, openff_to_openmm
from .unitsys import allow_openmm_units, openff_to_openmm


# CUSTOM TYPES FOR CLARITY, ESPECIALLY WITH UNITS
Expand Down
6 changes: 0 additions & 6 deletions polymerist/mdtools/openfftools/omminter/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,3 @@
__email__ = '[email protected]'

from .mdobjects import forcefield_flexible, openff_topology_to_openmm
from .unitsys import (
openmm_to_openff,
openff_to_openmm,
allow_openmm_units,
allow_openff_units,
)
11 changes: 8 additions & 3 deletions polymerist/mdtools/openfftools/omminter/mdobjects.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from openmm.app import Topology as OMMTopology
from openmm.unit import Quantity

from .unitsys import openff_to_openmm
from ..unitsys import openff_to_openmm
from .. import FFDIR
from ..boxvectors import box_vectors_flexible, VectorQuantity, BoxVectorsQuantity

Expand All @@ -39,8 +39,13 @@ def forcefield_flexible(forcefield : Union[ForceField, str, Path]) -> ForceField

return ForceField(ff_path)

def openff_topology_to_openmm(offtop : OFFTopology, forcefield : Union[ForceField, str, Path], box_vecs : Optional[Union[VectorQuantity, BoxVectorsQuantity]]=None,
combine_nonbonded_forces : bool=False, add_constrained_forces : bool=False) -> tuple[OMMTopology, System, Quantity]:
def openff_topology_to_openmm(
offtop : OFFTopology,
forcefield : Union[ForceField, str, Path],
box_vecs : Optional[Union[VectorQuantity, BoxVectorsQuantity]]=None,
combine_nonbonded_forces : bool=False,
add_constrained_forces : bool=False
) -> tuple[OMMTopology, System, Quantity]:
'''Converts an OpenFF Topology to an OpenMM Topology, System, and Positions'''
if box_vecs is not None:
offtop.box_vectors = box_vectors_flexible(box_vecs)
Expand Down
2 changes: 1 addition & 1 deletion polymerist/mdtools/openfftools/solvation/physprops.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from openff.units import Quantity as OFFQuantity

from ....unitutils.dimensions import is_volume
from ..omminter.unitsys import allow_openff_units, openff_to_openmm
from ..unitsys import allow_openff_units, openff_to_openmm


# MASS
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
from openff.units import unit as offunit

from ... import topology
from ... import TKREGS


def generate_water_TIP3P() -> Molecule:
Expand Down
File renamed without changes.
27 changes: 15 additions & 12 deletions polymerist/mdtools/openmmtools/serialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from ...genutils.fileutils.pathutils import assemble_path
from ...genutils.fileutils.jsonio.jsonify import make_jsonifiable
from ...genutils.fileutils.jsonio.serialize import PathSerializer
from ...molfiles.pdb import SerialAtomLabeller


# DEFINING AND STORING SIMULATION PATHS
Expand Down Expand Up @@ -119,12 +120,18 @@ def serialize_system(sys_path : Path, system : System) -> None:
file.write(XmlSerializer.serialize(system))

@allow_string_paths
def serialize_openmm_pdb(pdb_path : Path, topology : OpenMMTopology, positions : Union[NDArray, list[Vec3]], keep_chain_and_res_ids : bool=True,
uniquify_atom_ids : bool=True, num_atom_id_digits : int=2, resname_repl : Optional[dict[str, str]]=None) -> None:
def serialize_openmm_pdb(
pdb_path : Path,
topology : OpenMMTopology,
positions : Union[NDArray, list[Vec3]],
keep_chain_and_res_ids : bool=True,
atom_labeller : Optional[SerialAtomLabeller]=SerialAtomLabeller(),
resname_map : Optional[dict[str, str]]=None,
) -> None:
'''Configure and write an Protein DataBank File from an OpenMM Topology and array of positions
Provides options to configure atom ID numbering, residue numbering, and residue naming'''
if resname_repl is None:
resname_repl = {} # avoids mutable default
if resname_map is None:
resname_map = {} # avoids mutable default

# chain config
for chain in topology.chains():
Expand All @@ -133,18 +140,14 @@ def serialize_openmm_pdb(pdb_path : Path, topology : OpenMMTopology, positions :
# residue config
for residue in topology.residues():
residue.id = str(residue.id) # avoids TypeError when specifying keepIds during PDB write
repl_res_name = resname_repl.get(residue.name, None) # lookup current residue name to see if a replacement is called for
repl_res_name = resname_map.get(residue.name, None) # lookup current residue name to see if a replacement is called for
if repl_res_name is not None:
residue.name = repl_res_name

# individual atom config
element_counter = Counter() # for keeping track of the running index of each distinct element - could be used to produce a Hill formula
for atom in topology.atoms():
symbol = atom.element.symbol
atom_id = element_counter[symbol]
if uniquify_atom_ids:
atom.name = f'{symbol}{atom_id:0{num_atom_id_digits}d}' # extend atom name with ordered integer with specified number of digits (including leading zeros)
element_counter[symbol] += 1
if atom_labeller: # implicitly, preserves extant atom names if a labeller is not given
for atom in topology.atoms():
atom.name = atom_labeller.get_atom_label(atom.element.symbol)

# file write
with pdb_path.open('w') as file:
Expand Down
4 changes: 4 additions & 0 deletions polymerist/molfiles/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
'''Utilities for reading from and writing to various molecular file formats'''

__author__ = 'Timotej Bernat'
__email__ = '[email protected]'
75 changes: 75 additions & 0 deletions polymerist/molfiles/pdb.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
'''PDB file formatting tools'''

__author__ = 'Timotej Bernat'
__email__ = '[email protected]'

from dataclasses import dataclass, field
from collections import Counter


@dataclass(frozen=True)
class SerialAtomLabeller:
'''
For assigning unique numbered atom names based on their
order of appearance within a molecule and elemental class
Useful, for example, in generating unique atom names for a PDB file
Parameters
----------
atom_label_width : int , default 4
Exact length alloted for any generated atom label
Labels shorter than this are right-padded with spaces,
while labels longer than this are truncated
Default of 4 is the chosen to be compatible with the PDB specification ("Atom name: lines 13-16, left-justified")
https://www.cgl.ucsf.edu/chimera/docs/UsersGuide/tutorials/pdbintro.html
include_elem_idx : bool, default True
Whether to attach a numerical element-index postfix to atom labels
E.g. with atom_label_width=4, the fifth carbon in a topology
will be labelled as "C004" with include_elem_idx=True,
while labelled as "C " with include_elem_idx=False,
default_elem_idx : int, default 0
Starting index for each element category
By default, is 0-indexed; MUST BE POSITIVE
'''
atom_label_width : int = 4
include_elem_idx : bool = True
default_elem_idx : int = 0

element_counter : Counter = field(init=False, default_factory=Counter)

def __post_init__(self) -> None:
'''Check ranges on input values'''
if self.atom_label_width < 0:
raise ValueError(f'Must provide a non-negative number of index digits to include (provided {self.atom_label_width})')

if self.default_elem_idx < 0:
raise ValueError(f'Must provide a non-negative starting index for element indices (provided {self.default_elem_idx})')

def get_atom_label(self, elem_symbol : str) -> str:
'''
Obtain a numbered atom label for an atom based on its element,
updating the underlying element context in the process
'''
if not isinstance(elem_symbol, str):
raise TypeError(f'Must pass symbol of atom\'s element as str (not type {type(elem_symbol).__name__})')

if elem_symbol not in self.element_counter: # initialize first occurence to starting value
self.element_counter[elem_symbol] = self.default_elem_idx

atom_idx_label : str = ''
if self.include_elem_idx:
atom_idx = self.element_counter[elem_symbol]
num_idx_digits = max(self.atom_label_width - len(elem_symbol), 0) # number of symbols left over for an atom index
atom_idx_label = f'{atom_idx:0{num_idx_digits}d}'

atom_name = f'{elem_symbol}{atom_idx_label}'
atom_name = atom_name.ljust(self.atom_label_width, ' ')[:self.atom_label_width] # pad with spaces if too short, or truncate if too long
assert(len(atom_name) <= self.atom_label_width) # perfunctory check to make sure things are working as expected

self.element_counter[elem_symbol] += 1 # update tally with addition of new occurence of a particular element

return atom_name

Loading

0 comments on commit ce9e052

Please sign in to comment.