From b1d26605b65a5b2981a0d17858b858ef7c1f4449 Mon Sep 17 00:00:00 2001 From: Oufan75 Date: Sun, 2 Oct 2022 04:58:01 -0700 Subject: [PATCH 01/22] add phosphorated residues --- .gitignore | 5 + src/mcsce/cli.py | 2 +- src/mcsce/core/build_definitions.py | 195 +-- src/mcsce/core/data/phosaa14SB.xml | 1075 +++++++++++++++++ src/mcsce/core/data/protein.ff14SB.xml | 9 + src/mcsce/core/data/ptm_rotamer.lib | 211 ++++ src/mcsce/core/definitions.py | 16 +- src/mcsce/core/rotamer_library.py | 136 ++- src/mcsce/core/side_chain_builder.py | 13 +- .../ptm_templates/.DS_Store | Bin 0 -> 6148 bytes .../sidechain_templates/ptm_templates/agm.pdb | 28 + .../sidechain_templates/ptm_templates/aly.pdb | 28 + .../sidechain_templates/ptm_templates/bhd.pdb | 14 + .../sidechain_templates/ptm_templates/cgu.pdb | 18 + .../sidechain_templates/ptm_templates/h1d.pdb | 23 + .../sidechain_templates/ptm_templates/h1e.pdb | 23 + .../sidechain_templates/ptm_templates/h2d.pdb | 22 + .../sidechain_templates/ptm_templates/h2e.pdb | 22 + .../sidechain_templates/ptm_templates/hyp.pdb | 16 + .../sidechain_templates/ptm_templates/kcx.pdb | 26 + .../sidechain_templates/ptm_templates/lyz.pdb | 24 + .../sidechain_templates/ptm_templates/m3l.pdb | 32 + .../sidechain_templates/ptm_templates/men.pdb | 18 + .../sidechain_templates/ptm_templates/mgn.pdb | 21 + .../sidechain_templates/ptm_templates/ptr.pdb | 25 + .../sidechain_templates/ptm_templates/s1p.pdb | 16 + .../sidechain_templates/ptm_templates/sep.pdb | 15 + .../sidechain_templates/ptm_templates/smc.pdb | 15 + .../sidechain_templates/ptm_templates/t1p.pdb | 19 + .../sidechain_templates/ptm_templates/tpo.pdb | 18 + .../sidechain_templates/ptm_templates/tys.pdb | 26 + .../sidechain_templates/ptm_templates/y1p.pdb | 26 + src/mcsce/libs/libenergy.py | 13 +- src/mcsce/libs/libparse.py | 8 +- src/mcsce/libs/libscbuild.py | 207 ++-- src/mcsce/libs/libstructure.py | 5 +- src/mcsce/log.csv | 2 + src/mcsce/mcsce_sidechain.py | 366 +----- src/mcsce/test_func.py | 9 + 39 files changed, 2204 insertions(+), 543 deletions(-) create mode 100644 src/mcsce/core/data/phosaa14SB.xml create mode 100644 src/mcsce/core/data/ptm_rotamer.lib create mode 100644 src/mcsce/core/sidechain_templates/ptm_templates/.DS_Store create mode 100644 src/mcsce/core/sidechain_templates/ptm_templates/agm.pdb create mode 100644 src/mcsce/core/sidechain_templates/ptm_templates/aly.pdb create mode 100644 src/mcsce/core/sidechain_templates/ptm_templates/bhd.pdb create mode 100644 src/mcsce/core/sidechain_templates/ptm_templates/cgu.pdb create mode 100644 src/mcsce/core/sidechain_templates/ptm_templates/h1d.pdb create mode 100644 src/mcsce/core/sidechain_templates/ptm_templates/h1e.pdb create mode 100644 src/mcsce/core/sidechain_templates/ptm_templates/h2d.pdb create mode 100644 src/mcsce/core/sidechain_templates/ptm_templates/h2e.pdb create mode 100644 src/mcsce/core/sidechain_templates/ptm_templates/hyp.pdb create mode 100644 src/mcsce/core/sidechain_templates/ptm_templates/kcx.pdb create mode 100644 src/mcsce/core/sidechain_templates/ptm_templates/lyz.pdb create mode 100644 src/mcsce/core/sidechain_templates/ptm_templates/m3l.pdb create mode 100644 src/mcsce/core/sidechain_templates/ptm_templates/men.pdb create mode 100644 src/mcsce/core/sidechain_templates/ptm_templates/mgn.pdb create mode 100644 src/mcsce/core/sidechain_templates/ptm_templates/ptr.pdb create mode 100644 src/mcsce/core/sidechain_templates/ptm_templates/s1p.pdb create mode 100644 src/mcsce/core/sidechain_templates/ptm_templates/sep.pdb create mode 100644 src/mcsce/core/sidechain_templates/ptm_templates/smc.pdb create mode 100644 src/mcsce/core/sidechain_templates/ptm_templates/t1p.pdb create mode 100644 src/mcsce/core/sidechain_templates/ptm_templates/tpo.pdb create mode 100644 src/mcsce/core/sidechain_templates/ptm_templates/tys.pdb create mode 100644 src/mcsce/core/sidechain_templates/ptm_templates/y1p.pdb create mode 100644 src/mcsce/log.csv create mode 100644 src/mcsce/test_func.py diff --git a/.gitignore b/.gitignore index 7f96b37..3e42cde 100644 --- a/.gitignore +++ b/.gitignore @@ -112,3 +112,8 @@ venv.bak/ # Dunbrack library files SimpleOpt1-5/ + +# in development files +src/mcsce/*.pdb +src/mcsce/core/data/glycam_06j.xml +src/mcsce/core/sidechain_templates/cap_templates/ diff --git a/src/mcsce/cli.py b/src/mcsce/cli.py index de2f9bb..f3b6b6d 100644 --- a/src/mcsce/cli.py +++ b/src/mcsce/cli.py @@ -68,7 +68,7 @@ def main(input_structure, n_conf, n_worker, output_dir, logfile, mode, batch_siz f.write("PDB name,Succeeded,Time used\n") ff = forcefields["Amberff14SB"] - ff_obj = ff(add_OXT=True, add_Nterminal_H=True) + ff_obj = ff(Cterminal='OXT', Nterminal='HN') if n_worker is None: import multiprocessing diff --git a/src/mcsce/core/build_definitions.py b/src/mcsce/core/build_definitions.py index 95e86ba..9cbfc40 100644 --- a/src/mcsce/core/build_definitions.py +++ b/src/mcsce/core/build_definitions.py @@ -26,7 +26,12 @@ amber_pdbs = sorted(list( _filepath.joinpath('sidechain_templates', 'amber_names').glob('*.pdb'))) _amber14sb = _filepath.joinpath('data', 'protein.ff14SB.xml') +_amberphosaa = _filepath.joinpath('data', 'phosaa14SB.xml') +#_glycam06 = _filepath.joinpath('data', 'glycam_06j.xml') +ptm_pdbs = sorted(list( + _filepath.joinpath('sidechain_templates', 'ptm_templates').glob('*.pdb'))) +sidechain_pdbs = amber_pdbs + ptm_pdbs # amino-acids atom labels # from: http://www.bmrb.wisc.edu/ref_info/atom_nom.tbl @@ -42,23 +47,27 @@ def _read_labels(pdbs): s.build() a_labels = tuple(s.data_array[:, col_name]) pdb_name = pdb.stem.upper() - pdb_1letter = aa3to1[pdb_name] - - # labels are duplicated for 1-letter and 3-letter codes to avoid - # double dictionary lookup in other implementations + try: + pdb_1letter = aa3to1[pdb_name] + labels_d[pdb_1letter] = a_labels + # labels are duplicated for 1-letter and 3-letter codes to avoid + # double dictionary lookup in other implementations + except KeyError: + # ptm codes + pass labels_d[pdb_name] = a_labels - labels_d[pdb_1letter] = a_labels return labels_d # support figure, for the different histidine protonation states. # https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3639364/figure/Fig1/ atom_names_amber = _read_labels(amber_pdbs) +sc_atom_names = _read_labels(sidechain_pdbs) def read_ff14SB_params(): """ - Read Amber protein.ff14SB.xml parameters to a dictionary. - + Read Amber protein.ff14SB.xml phosaaSB14 parameters to a dictionary. + `protein.ff14SB.xml` is in `src.idpconfgen.core.data` folder. Dictionary structure: @@ -103,61 +112,65 @@ def read_ff14SB_params(): ------- dict """ - with open(_amber14sb, 'r') as fin: ff14sb = ET.fromstring(fin.read()) + with open(_amberphosaa, 'r') as fin: + phosaa14sb = ET.fromstring(fin.read()) + #with open(_glycam06, 'r') as fin: + # glycam = ET.fromstring(fin.read()) + parm_libs = [ff14sb, phosaa14sb] ff14SB_params = defaultdict(dict) - - for child in ff14sb: - if child.tag == 'AtomTypes': - for atomtype in child: - key = atomtype.attrib['name'] - ff14SB_params[key].update(atomtype.attrib) - ff14SB_params[key].pop('name') - - elif child.tag == 'Residues': - for residue in child: - for atom in filter(lambda x: x.tag == 'Atom', residue): - key = residue.attrib['name'] - atom_name = atom.attrib['name'] - - atom_par = ff14SB_params[key].setdefault(atom_name, {}) - atom_par.update(atom.attrib) - ff14SB_params[key][atom_name].pop('name') - - elif child.tag == 'NonbondedForce': - ff14SB_params.update(child.attrib) - for atom in child: - if atom.tag == 'Atom': - key = atom.attrib['type'] - ff14SB_params[key].update(atom.attrib) - ff14SB_params[key].pop('type') - - elif child.tag == 'HarmonicAngleForce': - for angle in child: - key = (angle.attrib['type1'], angle.attrib['type2'], angle.attrib['type3']) - value = {k: angle.attrib[k] for k in angle.attrib if 'type' not in k} - - assert key not in ff14SB_params - ff14SB_params[key] = value - - elif child.tag == 'PeriodicTorsionForce': - for torsion in child: - key = (torsion.attrib['type1'], torsion.attrib['type2'], torsion.attrib['type3'], torsion.attrib['type4']) - value = {k: torsion.attrib[k] for k in torsion.attrib if 'type' not in k} - value["tag"] = torsion.tag - - assert key not in ff14SB_params - ff14SB_params[key] = value + + for lib in parm_libs: + for child in lib: + if child.tag == 'AtomTypes': + for atomtype in child: + key = atomtype.attrib['name'] + ff14SB_params[key].update(atomtype.attrib) + ff14SB_params[key].pop('name') + + elif child.tag == 'Residues': + for residue in child: + #if lib is glycam and residue not in ['0YB', '0MA', '0fB']: continue + for atom in filter(lambda x: x.tag == 'Atom', residue): + key = residue.attrib['name'] + atom_name = atom.attrib['name'] + + atom_par = ff14SB_params[key].setdefault(atom_name, {}) + atom_par.update(atom.attrib) + ff14SB_params[key][atom_name].pop('name') + + elif child.tag == 'NonbondedForce': + ff14SB_params.update(child.attrib) + for atom in child: + if atom.tag == 'Atom': + key = atom.attrib['type'] + ff14SB_params[key].update(atom.attrib) + ff14SB_params[key].pop('type') + + elif child.tag == 'HarmonicAngleForce': + for angle in child: + key = (angle.attrib['type1'], angle.attrib['type2'], angle.attrib['type3']) + value = {k: angle.attrib[k] for k in angle.attrib if 'type' not in k} + assert key not in ff14SB_params + ff14SB_params[key] = value + + elif child.tag == 'PeriodicTorsionForce': + for torsion in child: + key = (torsion.attrib['type1'], torsion.attrib['type2'], torsion.attrib['type3'], torsion.attrib['type4']) + value = {k: torsion.attrib[k] for k in torsion.attrib if 'type' not in k} + value["tag"] = torsion.tag + assert key not in ff14SB_params + ff14SB_params[key] = value return ff14SB_params def generate_residue_template_topology( pdb_files, residue_labels, - add_OXT=True, - add_Nterminal_H=True, + Cterminal = 'OXT', + Nterminal = 'HN', ): """ Generate topology for the residue templates. @@ -167,6 +180,9 @@ def generate_residue_template_topology( Generated topology represents all vs. all. Meaning, if atom A is covalently bond to atom B, B appears in A records and A in B records, as well. + + Nterminal options {'HN', 'ACE', 'PCA'} + Cterminal options {'OXT', 'NME'} Returns ------- @@ -176,6 +192,7 @@ def generate_residue_template_topology( # Creates topoloty of amino acids templates, that is, a dictionary of # all vs. all covalent bond pairs res_covalent_bonds = defaultdict(dict) + for pdb in pdb_files: pdbname = pdb.stem.upper() @@ -195,9 +212,9 @@ def generate_residue_template_topology( ) all_dists = pdist(coords) - # note that 1.6 AA wont capture the S bonds which are 1.8 A away + # note that 1.6 AA wont capture the S-O S-C bonds which are 1.8 A away # the Cys and Met special cases are treated after the loop - cov_bonds = all_dists <= 1.6 + cov_bonds = all_dists <= 1.65 # all vs. all atom pairs atom_pairs = ( @@ -216,37 +233,43 @@ def generate_residue_template_topology( # special cases: CYS and MET res_covalent_bonds['CYS']['CB'].append('SG') - res_covalent_bonds['CYS']['SG'] = [] - res_covalent_bonds['CYS']['SG'].extend(('CB', 'HG')) - res_covalent_bonds['CYS']['HG'] = [] - res_covalent_bonds['CYS']['HG'].append('SG') + res_covalent_bonds['CYS']['SG'].append('CB') res_covalent_bonds['MET']['CG'].append('SD') - res_covalent_bonds['MET']['SD'] = [] - res_covalent_bonds['MET']['SD'].extend(('CG', 'CE')) + res_covalent_bonds['MET']['SD'] = ['CG', 'CE'] res_covalent_bonds['MET']['CE'].append('SD') + + # ptm special cases: SMC TYS phosphorated + res_covalent_bonds['SMC']['CB'].append('SG') + res_covalent_bonds['SMC']['SG'] = ['CB', 'CS'] + res_covalent_bonds['SMC']['CS'].append('SG') + + res_covalent_bonds['TYS']['OH'].append('S') + res_covalent_bonds['TYS']['S'].extend(('HO3', 'O3S')) + + # special terminal case + if Nterminal == 'ACE': + res_covalent_bonds['ACE']['C'] = ['O', 'CH3'] + res_covalent_bonds['ACE']['O'] = ['C'] + res_covalent_bonds['ACE']['CH3'] = ['C', 'HH31', 'HH32', 'HH33'] + for h in ('HH31', 'HH32', 'HH33'): + res_covalent_bonds['ACE'][h] = ["CH3"] + if Cterminal == 'NME': + res_covalent_bonds['NME']['N'] = ['CH3'] + res_covalent_bonds['NME']['CH3'] = ['N', 'HH31', 'HH32', 'HH33'] + for h in ('HH31', 'HH32', 'HH33'): + res_covalent_bonds['NME'][h] = ["CH3"] # asserts all atoms are considered for k1, v1 in res_covalent_bonds.items(): assert len(v1) == len(residue_labels[k1]), k1 # add OXT connectivity - if add_OXT: + if Cterminal == 'OXT': add_OXT_to_connectivity(v1) - ## added 'OXT' connectivity - #for atom, connects in v1.items(): - # if 'O' in connects: - # connects.append('OXT') - - ## this should be only 'C' - #v1['OXT'] = copy(v1['O']) - # if k1 == 'PRO': # why should proline be skipped? - # continue - - if add_Nterminal_H: + if Nterminal == 'HN': add_Nterm_H_connectivity(v1) - return res_covalent_bonds @@ -268,6 +291,15 @@ def add_Nterm_H_connectivity(connectivity_dict): # connectivity_dict[h] = copy(connectivity_dict['H']) connectivity_dict[h] = ["N"] +def add_Nterm_methyl_conn(connectivity_dict): + for atom, list_of_connects in connectivity_dict.items(): + # if 'H' in list_of_connects: + if atom == "N": + list_of_connects.append('CH3') + + connectivity_dict['CH3'] = ['HH31', 'HH32', 'HH33'] + for h in ('HH31', 'HH32', 'HH33'): + connectivity_dict[h] = ["CH3"] def add_OXT_to_connectivity(connectivity_dict): """Add OXT connectivity to residue.""" @@ -360,7 +392,6 @@ def expand_topology_bonds_apart(cov_bond_dict, bonds_apart): res_d = expanded_topology.setdefault(res, {}) for atom in cov_bond_dict[res]: - atoms_X_bonds_apart = res_d.setdefault(atom, []) cov_bonded = copy(cov_bond_dict[res][atom]) @@ -427,16 +458,17 @@ def topology_3_bonds_apart(covalent_bond_dict): # interresidue exact 3 bonds connectivity bonds_equal_1_inter = {'C': ['N']} - +# TODO: check Nterminal caps bonds_equal_3_inter = { 'N': ['N'], 'CA': ['H', 'CA'], - 'C': ['HA', 'HA2', 'HA3', 'CB', 'C'], + 'C': ['HA', 'HA2', 'HA3', 'CB', 'C', 'CH3'], 'O': ['CA', 'H'], 'HA': ['N'], 'HA2': ['N'], 'HA3': ['N'], 'CB': ['N'], + #'CH3': ['HA', 'HA2', 'HA3', 'CB', 'C'] } # / @@ -519,13 +551,13 @@ def __new__(cls, *args, **kwargs): def __init__(self, *args, **kwargs): - self.atom_names = atom_names_amber + self.atom_names = sc_atom_names self.forcefield = read_ff14SB_params() self.res_topology = generate_residue_template_topology( - amber_pdbs, - atom_names_amber, + sidechain_pdbs, + sc_atom_names, **kwargs) self.bonds_eq3_intra = topology_3_bonds_apart(self.res_topology) @@ -558,7 +590,7 @@ def __init__(self, *args, **kwargs): 3: bonds_le_3_inter, #4: inter_4_connect, } - +#TODO: add ptm residues stats # bend angles are in radians # bend angle for the CA-C-O bond was virtually the same, so will be computed # as a single value. Changes in CA-C-Np1 create variations in Np1-C-O according @@ -752,9 +784,10 @@ def _get_structure_coords(path_): sidechain_templates = { pdb.stem.upper(): _get_structure_coords(pdb) - for pdb in amber_pdbs + for pdb in sidechain_pdbs } + # these template coordinates were created using Chimera-X daily given # a N-terminal at 0,0,0 and a CA along the X axis. n_terminal_h_coords_at_origin = np.array([ diff --git a/src/mcsce/core/data/phosaa14SB.xml b/src/mcsce/core/data/phosaa14SB.xml new file mode 100644 index 0000000..42203d8 --- /dev/null +++ b/src/mcsce/core/data/phosaa14SB.xml @@ -0,0 +1,1075 @@ + + + 2021-01-26 + leaprc.phosaa14SB + L. Raguette; A. Cuomo; K. Belfon; C. Tian; Q. Wu; C. Simmerling. Updated Amber force field parameters for phosphorylated amino acids for ff14SB and ff19SB. In Prep, 2020; Other residues generated using antechamber by oz + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/mcsce/core/data/protein.ff14SB.xml b/src/mcsce/core/data/protein.ff14SB.xml index 909706d..d623bc5 100644 --- a/src/mcsce/core/data/protein.ff14SB.xml +++ b/src/mcsce/core/data/protein.ff14SB.xml @@ -3440,5 +3440,14 @@ + + + + + + + + + diff --git a/src/mcsce/core/data/ptm_rotamer.lib b/src/mcsce/core/data/ptm_rotamer.lib new file mode 100644 index 0000000..6a205d5 --- /dev/null +++ b/src/mcsce/core/data/ptm_rotamer.lib @@ -0,0 +1,211 @@ +# https://download.igb.uci.edu/rotamer_lib.txt +# K. Nagata, A. Randall, & P. Baldi. SIDEpro: a novel machine learning approach for the fast and accurate prediction of side-chain conformations. Proteins , vol. 80, 1, (2012) + +# SEP +# Number of chi angles: 3 +# Number of additional chi angles: 2 +# Dependence: chi 1 +# +#AA Chi1-range Prob chi2Val chi3Val chi2Sig chi3Sig +SEP 0 120 0.09677 101.8 180 14.76 10 +SEP 0 120 0.6452 177.6 180 33.09 10 +SEP 0 120 0.2581 277.6 180 32.18 10 +SEP 120 240 0.2188 101.8 180 14.76 10 +SEP 120 240 0.5625 177.6 180 33.09 10 +SEP 120 240 0.2188 277.6 180 32.18 10 +SEP 240 360 0.4062 101.8 180 14.76 10 +SEP 240 360 0.5625 177.6 180 33.09 10 +SEP 240 360 0.03125 277.6 180 32.18 10 + +# TPO +# Number of chi angles: 3 +# Number of additional chi angles: 2 +# Dependence: chi 1 +# +#AA Chi1-range Prob chi2Val chi3Val chi2Sig chi3Sig +TPO 0 120 0.2308 107 180 15.23 10 +TPO 0 120 0.5385 148.2 180 19.58 10 +TPO 0 120 0.2308 298.1 180 15.35 10 +TPO 120 240 0.5 107 180 15.23 10 +TPO 120 240 0.25 148.2 180 19.58 10 +TPO 120 240 0.25 298.1 180 15.35 10 +TPO 240 360 0.5909 107 180 15.23 10 +TPO 240 360 0.3636 148.2 180 19.58 10 +TPO 240 360 0.04545 298.1 180 15.35 10 + +# PTR +# Number of chi angles: 4 +# Number of additional chi angles: 2 +# Dependence: chi 1 +# +#AA Chi1-range Prob chi3Val chi4Val chi3Sig chi4Sig +PTR 0 120 0.5 -112.6 180 25.84 10 +PTR 0 120 0.375 8.698 180 33.41 10 +PTR 0 120 0.125 96.21 180 18.23 10 +PTR 120 240 0.4 -112.6 180 25.84 10 +PTR 120 240 0.05 8.698 180 33.41 10 +PTR 120 240 0.55 96.21 180 18.23 10 +PTR 240 360 0.44 -112.6 180 25.84 10 +PTR 240 360 0.22 8.698 180 33.41 10 +PTR 240 360 0.34 96.21 180 18.23 10 + +# ABA +# Number of chi angles: 1 +# Number of additional chi angles: 1 +# No Dependence +# +#AA Prob chi1Val chi1Sig +ABA 0.5217 -62.309 23.7762 +ABA 0.2996 174.310 33.0081 +ABA 0.1787 60.8882 32.0212 + +# CSO +# Number of chi angles: 2 +# Number of additional chi angles: 1 +# Dependence: chi 1 +# +#AA Chi1-range Prob chi2Val chi2Sig +CSO 0 120 0.1667 77.52 20.4 +CSO 0 120 0.3333 183.4 32.83 +CSO 0 120 0.5 285.4 22.18 +CSO 120 240 0.6538 77.52 20.4 +CSO 120 240 0.1667 183.4 32.83 +CSO 120 240 0.1795 285.4 22.18 +CSO 240 360 0.3206 77.52 20.4 +CSO 240 360 0.1069 183.4 32.83 +CSO 240 360 0.5725 285.4 22.18 + +# CSD +# Number of chi angles: 2 +# Number of additional chi angles: 1 +# Dependence: chi 1 +# +#AA Chi1-range Prob chi2Val chi2Sig +CSD 0 120 0.381 64.29 27.26 +CSD 0 120 0.4286 179 20.24 +CSD 0 120 0.1905 297.8 15.46 +CSD 120 240 0.25 64.29 27.26 +CSD 120 240 0.15 179 20.24 +CSD 120 240 0.6 297.8 15.46 +CSD 240 360 0.4118 64.29 27.26 +CSD 240 360 0.1569 179 20.24 +CSD 240 360 0.4314 297.8 15.46 + +# CME +# Number of chi angles: 5 +# Number of additional chi angles: 4 +# Dependence: chi 1 +# +#AA Chi1-range Prob chi2Val chi3Val chi4Val chi5Val chi2Sig chi3Sig +chi4Sig chi5Sig +CME 0 120 0.05 101.4 83.31 180 300 95.19 41.85 +10 10 +CME 0 120 0.2 101.4 83.31 180 300 257.7 41.85 +10 10 +CME 0 120 0.05 101.4 272.6 180 300 95.19 41.85 +10 10 +CME 0 120 0.05 101.4 272.6 180 300 257.7 41.85 +10 10 +CME 0 120 0.35 260.6 83.31 180 300 95.19 51.5 +10 10 +CME 0 120 0.05 260.6 83.31 180 300 257.7 51.5 +10 10 +CME 0 120 0.2 260.6 272.6 180 300 95.19 51.5 +10 10 +CME 0 120 0.05 260.6 272.6 180 300 257.7 51.5 +10 10 +CME 120 240 0.2558 101.4 83.31 180 300 95.19 41.85 +10 10 +CME 120 240 0.3488 101.4 83.31 180 300 257.7 41.85 +10 10 +CME 120 240 0.04651 101.4 272.6 180 300 95.19 41.85 +10 10 +CME 120 240 0.06977 101.4 272.6 180 300 257.7 41.85 +10 10 +CME 120 240 0.02326 260.6 83.31 180 300 95.19 51.5 +10 10 +CME 120 240 0.09302 260.6 83.31 180 300 257.7 51.5 +10 10 +CME 120 240 0.09302 260.6 272.6 180 300 95.19 51.5 +10 10 +CME 120 240 0.06977 260.6 272.6 180 300 257.7 51.5 +10 10 +CME 240 360 0.02326 101.4 83.31 180 300 95.19 41.85 +10 10 +CME 240 360 0.04651 101.4 83.31 180 300 257.7 41.85 +10 10 +CME 240 360 0.09302 101.4 272.6 180 300 95.19 41.85 +10 10 +CME 240 360 0.06977 101.4 272.6 180 300 257.7 41.85 +10 10 +CME 240 360 0.4186 260.6 83.31 180 300 95.19 51.5 +10 10 +CME 240 360 0.02326 260.6 83.31 180 300 257.7 51.5 +10 10 +CME 240 360 0.1395 260.6 272.6 180 300 95.19 51.5 +10 10 +CME 240 360 0.186 260.6 272.6 180 300 257.7 51.5 +10 10 + +# OCS +# Number of chi angles: 2 +# Number of additional chi angles: 1 +# Dependence: chi 1 +# +#AA Chi1-range Prob chi2Val chi2Sig +OCS 0 120 1 180 10 +OCS 120 240 1 180 10 +OCS 240 360 1 180 10 + +# KCX +# Number of chi angles: 6 +# Number of additional chi angles: 2 +# Dependence: chi 4 +# +#AA Chi4-range Prob chi5Val chi6Val chi5Sig chi6Sig +KCX 0 120 0.05882 80.47 0 18.55 10 +KCX 0 120 0.2941 185 0 18.3 10 +KCX 0 120 0.6471 261.4 0 10.69 10 +KCX 120 240 0.1364 80.47 0 18.55 10 +KCX 120 240 0.6364 185 0 18.3 10 +KCX 120 240 0.2273 261.4 0 10.69 10 +KCX 240 360 0.05 80.47 0 18.55 10 +KCX 240 360 0.75 185 0 18.3 10 +KCX 240 360 0.2 261.4 0 10.69 10 + +# MLY +# Number of chi angles: 5 +# Number of additional chi angles: 1 +# Dependence: chi 4 +# +#AA Chi4-range Prob chi5Val chi5Sig +MLY 0 120 0.75 65.93 26.82 +MLY 0 120 0.15 167.1 22.44 +MLY 0 120 0.1 296 26.45 +MLY 120 240 0.4273 65.93 26.82 +MLY 120 240 0.4053 167.1 22.44 +MLY 120 240 0.1674 296 26.45 +MLY 240 360 0.5714 65.93 26.82 +MLY 240 360 0.3333 167.1 22.44 +MLY 240 360 0.09524 296 26.45 + +# M3L +# Number of chi angles: 5 +# Number of additional chi angles: 1 +# Dependence: chi 4 +# +#AA Chi4-range Prob chi5Val chi5Sig +M3L 0 120 1 180 10 +M3L 120 240 1 180 10 +M3L 240 360 1 180 10 + +# HYP +# Number of chi angles: 2 +# Number of additional chi angles: 2 +# No Dependence +# +#AA Prob chi1Val chi2Val chi1Sig chi2Sig +HYP 0.017467 21.374 32.968 9.7573 10.801 +HYP 0.19651 21.374 333.5 9.7573 11.163 +HYP 0.78603 336.38 32.968 7.9058 10.801 + diff --git a/src/mcsce/core/definitions.py b/src/mcsce/core/definitions.py index 3142241..7643831 100644 --- a/src/mcsce/core/definitions.py +++ b/src/mcsce/core/definitions.py @@ -29,7 +29,13 @@ bgeo_NCaC = 'N_Ca_C' bgeo_CaCNp1 = 'Ca_C_Np1' - +#TODO: ptm codes test +ptm_aa = {'AGM':'ARG', 'MEN':'ASN', 'BHD':'ASP', 'SMC':'CYS', 'MGN':'GLN', 'CGU':'GLU', + 'H2E':'HIS', 'H2D':'HIS', 'H1E':'HIS', 'H1D':'HIS', + 'HYP':'PRO', 'SEP':'SER', 'TPO':'THR', 'S1P':'SER', 'T1P':'THR', 'Y1P':'TYR', + 'PTR':'TYR', 'TYS':'TYR', 'LYZ':'LYS', 'ALY':'LYS', 'M3L':'LYS', 'KCX':'LYS' + } +ptm_h = {'H1E':'H2E', 'H1D':'H2D', 'S1P':'SEP', 'T1P':'TPO', 'Y1P':'PTR'} # Amino-acid 3 to 1 letter code dictionary aa3to1 = { 'ALA': 'A', @@ -504,6 +510,14 @@ } } +# residue sidechain numbers +# 1: ['VAL', 'SER', 'THR', 'CYS'] +# 2: ['ASP', 'ASN' ,'HIS', 'ILE', 'TRP', 'PRO', 'PHE', 'LEU', 'TYR', +# 'SEP', 'TPO', 'TYS', 'HYP', 'BHD', 'PTR', 'SMC', 'H2D', 'H2E'] +# 3: ['GLN', 'GLU', 'MET', 'CGU', 'MEN', 'MGN'] +# 4: ['LYS', 'LYZ', 'M3L'] +# 5: ['ARG', 'ALY', 'AGM', 'KCX'] + # Builder Definitions ### # average values of the backbone angles calculated from # Dunbrack PISCES diff --git a/src/mcsce/core/rotamer_library.py b/src/mcsce/core/rotamer_library.py index 7f16965..64bd941 100644 --- a/src/mcsce/core/rotamer_library.py +++ b/src/mcsce/core/rotamer_library.py @@ -5,22 +5,39 @@ Coded by Jie Li Date created: Jul 28, 2021 +Modified by Oufan Zhang to add ptm rotamers """ from pathlib import Path import numpy as np +from mcsce.core.definitions import ptm_aa, ptm_h _filepath = Path(__file__).resolve().parent # folder _library_path = _filepath.joinpath('data', 'SimpleOpt1-5', 'ALL.bbdep.rotamers.lib') +_ptm_library_path = _filepath.joinpath('data', 'ptm_rotamer.lib') -def get_closest_angle(input_angle): +def get_closest_angle(input_angle, degsep=10): """ Find closest angle in [-180, +170] with 10 degree separations """ - if input_angle > 175: + input_angle += 180 + if input_angle > 175+180: # the closest would be 180, namely -180 return -180 - return round(input_angle / 10) * 10 + return round(input_angle / degsep) * degsep - 180 + +def sample_torsion(data, nchis): + assert data.shape[1] == nchis*2 + vals = data[:, :nchis] + sigs = data[:, -nchis:] + return np.random.normal(vals, sigs) + +def wrap_angles(rot_angle): + more_mask = rot_angle > 180. + rot_angle[more_mask] -= 360. + less_mask = rot_angle < -180. + rot_angle[less_mask] += 360. + return rot_angle class DunbrakRotamerLibrary: """ @@ -34,6 +51,7 @@ def __init__(self, probability_threshold=0.001, augment_with_std=True) -> None: following the implementation in Bhowmick, Asmit, and Teresa Head-Gordon. "A Monte Carlo method for generating side chain structural ensembles." Structure 23.1 (2015): 44-55. """ self._data = {} + self.prob_threshold = probability_threshold with open(_library_path) as f: for line in f: line = line.strip() @@ -98,20 +116,120 @@ def __init__(self, probability_threshold=0.001, augment_with_std=True) -> None: # residues with chi1 to chi3 chis = chis[:, :3] probabilities = np.concatenate(self._data[item][1]) - self._data[item] = [chis, probabilities] + self._data[item] = [wrap_angles(chis), probabilities] - def retrieve_torsion_and_prob(self, residue_type, phi, psi): - if residue_type in ["HID", "HIE", "HIP"]: - residue_type = "HIS" + def retrieve_torsion_and_prob(self, residue_type, phi, psi, ptmlib): if np.isnan(phi): # This is the first residue, which do not have phi, so use phi=-180 as default phi = -180 if np.isnan(psi): # this is the last residue, which do not have psi, so use psi=-180 as default psi = -180 - # otherwise, return the library contents corresponding to closest phi and psi + + if residue_type in ptm_aa: + chis, probs = self._data[(ptm_aa[residue_type], get_closest_angle(phi), get_closest_angle(psi))] + # phosphate protonation states + if residue_type in ['S1P', 'T1P', 'Y1P', 'H1D', 'H1E']: + residue_type = ptm_h[residue_type] + if residue_type not in ptmlib._info: + print("ptm residue rotamers not provided, assumes unmodified residue") + return [chis, probs] + + # search ptm library + ptm_info = ptmlib.get_dependence(residue_type) + nchis = ptm_info[1] + + if ptm_info[-1] == 0: + ptm_data = ptmlib.retrieve_torsion_and_prob(residue_type, 360.) + ptm_probs = np.array(ptm_data)[:, 0] + torsions = wrap_angles(sample_torsion(np.array(ptm_data)[:, 1:], nchis)-180.) + assert torsions.shape[1] == ptm_info[0] + return [torsions, ptm_probs] + + dchis = chis[:, ptm_info[-1]-1] + new_chis = [] + new_probs = [] + for i in range(chis.shape[0]): + ptm_data = ptmlib.retrieve_torsion_and_prob(residue_type, dchis[i]) + ptm_probs = np.array(ptm_data)[:, 0] + torsions = wrap_angles(sample_torsion(np.array(ptm_data)[:, 1:], nchis)-180.) + for j in range(len(ptm_probs)): + p = probs[i]*ptm_probs[j] + if p > self.prob_threshold: + new_chis.append(np.concatenate((chis[i], torsions[j]))) + new_probs.append(p) + assert len(new_chis[0]) == ptm_info[0] + return [np.array(new_chis), np.array(new_probs)] + + if residue_type in ["HID", "HIE", "HIP"]: + residue_type = "HIS" + return self._data[(residue_type, get_closest_angle(phi), get_closest_angle(psi))] + +class ptmRotamerLib(): + """ + data structure: {(restype, depended chi): [np.array(N: number of rotamers, c: (probability, rotamer values, sigma values in degree)]} + info structure: {(restype: [total chis, number of additional chis, dependence])} + """ + def __init__(self, probability_threshold=0.001): + self._data = {} + self._info = {} + with open(_ptm_library_path) as f: + for line in f: + line = line.strip() + # read chi information + if len(line) == 5 and line.startswith("# "): + restype = line.split()[-1] + self._info[restype] = [] + continue + if line.startswith("# Number of chi"): + self._info[restype].append(int(line.split()[-1])) + continue + if line.startswith("# Number of additional chi"): + self._info[restype].append(int(line.split()[-1])) + continue + if line.startswith("# Dependence"): + self._info[restype].append(int(line.split()[-1])) + continue + if line.startswith("# No Dependence"): + self._info[restype].append(0) + continue + if line.startswith("#") or len(line) == 0: + # other commented line + continue + # read in rotamers + if line.startswith(restype): + items = line.split() + if self._info[restype][-1] > 0: + # with dependence + chid = float(items[1]) - 180. + label = (restype, chid) + if label in self._data: + self._data[label].append([float(n) for n in items[3:]]) + else: + self._data[label] = [[float(n) for n in items[3:]]] + else: + chid = 360. + label = (restype, chid) + if label in self._data: + self._data[label].append([float(n) for n in items[1:]]) + else: + self._data[label] = [[float(n) for n in items[1:]]] + + def get_dependence(self, residue_type): + return self._info[residue_type] + + def retrieve_torsion_and_prob(self, residue_type, dchi=360.): + if self._info[residue_type][-1] == 0: + return self._data[(residue_type, 360.)] + else: + rchi = get_closest_angle(dchi, 120.) + if rchi == 180: rchi -= 120. + return self._data[(residue_type, rchi)] + + if __name__ == "__main__": library = DunbrakRotamerLibrary() - print(library.retrieve_torsion_and_prob("ARG", 73, -54.8)) \ No newline at end of file + ptmlib = ptmRotamerLib() + print(library.retrieve_torsion_and_prob("HYP", 73, -54.8, ptmlib)) diff --git a/src/mcsce/core/side_chain_builder.py b/src/mcsce/core/side_chain_builder.py index 34fbfb6..24b5d54 100644 --- a/src/mcsce/core/side_chain_builder.py +++ b/src/mcsce/core/side_chain_builder.py @@ -12,7 +12,7 @@ import numba as nb from numba import jit, njit -from mcsce.core.rotamer_library import DunbrakRotamerLibrary +from mcsce.core.rotamer_library import DunbrakRotamerLibrary, ptmRotamerLib from mcsce.core.definitions import aa3to1 from mcsce.core.build_definitions import sidechain_templates from mcsce.libs.libcalc import calc_torsion_angles, place_sidechain_template @@ -25,6 +25,7 @@ _rotamer_library = DunbrakRotamerLibrary() +_ptm_rotamer_lib = ptmRotamerLib() sidechain_placeholders = [] energy_calculators = [] @@ -58,7 +59,7 @@ def initialize_func_calc(efunc_creator, aa_seq=None, structure=None): # declare global variabales global sidechain_placeholders global energy_calculators - + sidechain_placeholders = [] energy_calculators = [] print("Start preparing energy calculators at different sidechain completion levels") @@ -132,6 +133,7 @@ def create_side_chain_structure(inputs): all_psi = np.concatenate([all_backbone_dihedrals[::3], [np.nan]]) * 180 / np.pi structure_coords = backbone_coords accumulated_energy = energy_calculators[0](structure_coords[None], structure_coords[None, :0])[0] # energies of backbone only + for idx, resname in enumerate(structure.residue_types): # copy coordinates from the previous growing step to the current placeholder previous_coords = structure_coords @@ -150,10 +152,9 @@ def create_side_chain_structure(inputs): continue energy_func = energy_calculators[idx + 1] # get all candidate conformations (rotamers) for this side chain - candidiate_conformations, candidate_probs = _rotamer_library.retrieve_torsion_and_prob(resname, all_phi[idx], all_psi[idx]) + candidiate_conformations, candidate_probs = _rotamer_library.retrieve_torsion_and_prob(resname, all_phi[idx], all_psi[idx], _ptm_rotamer_lib) # perturb chi angles of the side chains by ~0.5 degrees candidiate_conformations += np.random.normal(scale=0.5, size=candidiate_conformations.shape) - energies = [] all_coords = np.tile(structure_coords[None], (len(candidiate_conformations), 1, 1)) @@ -164,8 +165,8 @@ def create_side_chain_structure(inputs): all_coords[tor_idx, -n_sidechain_atoms:] = sc_conformation[sidechain_atom_idx] energies = energy_func(all_coords[:, -n_sidechain_atoms:], all_coords[:, : -n_sidechain_atoms]) minimum_energy = min(energies) # Keep track of the minimum energy so that the renormalized energies can be converted back - - # print(idx, resname, len(candidate_probs), (~np.isinf(energies)).sum()) + + print(idx, resname, len(candidate_probs), np.isinf(energies).sum()) # If all energies are inf, end this growth if np.isinf(energies).all(): return None, False, None, None diff --git a/src/mcsce/core/sidechain_templates/ptm_templates/.DS_Store b/src/mcsce/core/sidechain_templates/ptm_templates/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..f5ace363dc42a2013f33bd93dcac2d7a93c1bdcf GIT binary patch literal 6148 zcmeHK%}&BV5S~F22_ivHUQGG~B)&loM{63=vgU0A zrhqB%+Z2#@w?`A2(gSVq?tZ_&DJ8f0Vw&WO1U~pc@C$JGp{CR5gEgTQCA6StaA#m| z_1&*?-@$k-agV{BBcpsNYTk5>Z2gk=8D)r{BA3hLs+`jks&0(HlCNCrc_qS9W96tJ z7^5|-#OT6%jS42{m(6*VQ=VrZo*6j85X=xGZRYZ2MWZHeXuh&Ghw=t$rndv!HB??T z*DNcwr*pOD4Q3qR>U_&C_D2kMbc{#d zFZNhCbaY~W_+X!z{RxFNJK{&`P8>V5-V`te_7ymCKL@h^--OTq`y~5i3YY?aN&#+< z@5Uoc$?vVT$;n np.pi: - rot_angle -= 2*np.pi + rot_angle -= 2*np.pi elif rot_angle < -np.pi: - rot_angle += 2*np.pi + rot_angle += 2*np.pi rotated = rotate_coordinates_Q(displaced, -unit_vector, rot_angle) return rotated + offset @@ -74,7 +74,7 @@ def rotate_sidechain(res_type, tors): Original version written by Oufan Zhang, modified by Jie Li """ - assert not res_type in ['ALA', 'GLY'] + assert not res_type in ['ALA', 'GLY', 'PCA'] # convert degrees to radians tors = tors * np.pi / 180 @@ -83,108 +83,27 @@ def rotate_sidechain(res_type, tors): template_structure, sidechain_idx = sidechain_templates[res_type] sidechain_label = template_structure.data_array[:, 2] # atom name column template = template_structure.coords - - # chi1 angles in template - if res_type == 'SER': - N_CA_CB_CG = [np.where(sidechain_label=='N')[0][0], - np.where(sidechain_label=='CA')[0][0], - np.where(sidechain_label=='CB')[0][0], - np.where(sidechain_label=='OG')[0][0]] - elif res_type == 'THR': - N_CA_CB_CG = [np.where(sidechain_label=='N')[0][0], - np.where(sidechain_label=='CA')[0][0], - np.where(sidechain_label=='CB')[0][0], - np.where(sidechain_label=='OG1')[0][0]] - elif res_type in ['ILE', 'VAL']: - N_CA_CB_CG = [np.where(sidechain_label=='N')[0][0], - np.where(sidechain_label=='CA')[0][0], - np.where(sidechain_label=='CB')[0][0], - np.where(sidechain_label=='CG1')[0][0]] - elif res_type == "CYS": - N_CA_CB_CG = [np.where(sidechain_label=='N')[0][0], - np.where(sidechain_label=='CA')[0][0], - np.where(sidechain_label=='CB')[0][0], - np.where(sidechain_label=='SG')[0][0]] - else: - N_CA_CB_CG = [np.where(sidechain_label=='N')[0][0], - np.where(sidechain_label=='CA')[0][0], - np.where(sidechain_label=='CB')[0][0], - np.where(sidechain_label=='CG')[0][0]] - ori_chi1 = calc_torsion_angles(template[N_CA_CB_CG, :])[0] # The original chi1 torsion angle value - - # chi2 angles in template - if res_type == 'MET': - CA_CB_CG_CD = [np.where(sidechain_label=='CA')[0][0], - np.where(sidechain_label=='CB')[0][0], - np.where(sidechain_label=='CG')[0][0], - np.where(sidechain_label=='SD')[0][0]] - CB_CG_CD_CE = [np.where(sidechain_label=='CB')[0][0], - np.where(sidechain_label=='CG')[0][0], - np.where(sidechain_label=='SD')[0][0], - np.where(sidechain_label=='CE')[0][0]] - elif res_type == 'ILE': - CA_CB_CG_CD = [np.where(sidechain_label=='CA')[0][0], - np.where(sidechain_label=='CB')[0][0], - np.where(sidechain_label=='CG1')[0][0], - np.where(sidechain_label=='CD1')[0][0]] - elif res_type in ['HIS', 'HIP', 'HID', 'HIE']: - CA_CB_CG_CD = [np.where(sidechain_label=='CA')[0][0], - np.where(sidechain_label=='CB')[0][0], - np.where(sidechain_label=='CG')[0][0], - np.where(sidechain_label=='ND1')[0][0]] - elif res_type in [ 'ASN', 'ASP']: - CA_CB_CG_CD = [np.where(sidechain_label=='CA')[0][0], - np.where(sidechain_label=='CB')[0][0], - np.where(sidechain_label=='CG')[0][0], - np.where(sidechain_label=='OD1')[0][0]] - elif res_type in ['TRP', 'PHE', 'LEU', 'TYR']: - CA_CB_CG_CD = [np.where(sidechain_label=='CA')[0][0], - np.where(sidechain_label=='CB')[0][0], - np.where(sidechain_label=='CG')[0][0], - np.where(sidechain_label=='CD1')[0][0]] - elif res_type not in ['SER', 'THR', 'VAL', 'CYS']: - CA_CB_CG_CD = [np.where(sidechain_label=='CA')[0][0], - np.where(sidechain_label=='CB')[0][0], - np.where(sidechain_label=='CG')[0][0], - np.where(sidechain_label=='CD')[0][0]] - - # rest of residues - if res_type in ['GLN','GLU']: - CB_CG_CD_CE = [np.where(sidechain_label=='CB')[0][0], - np.where(sidechain_label=='CG')[0][0], - np.where(sidechain_label=='CD')[0][0], - np.where(sidechain_label=='OE1')[0][0]] - elif res_type == 'LYS': - CB_CG_CD_CE = [np.where(sidechain_label=='CB')[0][0], - np.where(sidechain_label=='CG')[0][0], - np.where(sidechain_label=='CD')[0][0], - np.where(sidechain_label=='CE')[0][0]] - CG_CD_CE_CZ = [np.where(sidechain_label=='CG')[0][0], - np.where(sidechain_label=='CD')[0][0], - np.where(sidechain_label=='CE')[0][0], - np.where(sidechain_label=='NZ')[0][0]] - elif res_type == 'ARG': - CB_CG_CD_CE = [np.where(sidechain_label=='CB')[0][0], - np.where(sidechain_label=='CG')[0][0], - np.where(sidechain_label=='CD')[0][0], - np.where(sidechain_label=='NE')[0][0]] - CG_CD_CE_CZ = [np.where(sidechain_label=='CG')[0][0], - np.where(sidechain_label=='CD')[0][0], - np.where(sidechain_label=='NE')[0][0], - np.where(sidechain_label=='CZ')[0][0]] - CD_NE_CZ_NH1 = [np.where(sidechain_label=='CD')[0][0], - np.where(sidechain_label=='NE')[0][0], - np.where(sidechain_label=='CZ')[0][0], - np.where(sidechain_label=='NH1')[0][0]] - ori_chi5 = calc_torsion_angles(template[CD_NE_CZ_NH1, :])[0] - + # common atom template index + N_idx = 0 + CA_idx = 1 + CBs_idx = 4 #CB, CB1. CB2 + CGs_idx = 5 #OG, CG, CG1, OG1, SG + CDs_idx = 6 #SD, CD, CD1, ND1, OD1, CS, P + CEs_idx = 7 #CE, NE, CE1, CE2, OE11, OE1, NE1 + CZs_idx = 8 #CZ, NZ + NH_idx = 9 #NH1, CH + N_CA_CB_CG = [N_idx, CA_idx, CBs_idx, CGs_idx] + CA_CB_CG_CD = [CA_idx, CBs_idx, CGs_idx, CDs_idx] + CB_CG_CD_CE = [CBs_idx, CGs_idx, CDs_idx, CEs_idx] + CG_CD_CE_CZ = [CGs_idx, CDs_idx, CEs_idx, CZs_idx] + CD_CE_CZ_NH = [CDs_idx, CEs_idx, CZs_idx, NH_idx] + # place CB at origin for rotation, exclude bb atoms and HA - CB = template[np.where(sidechain_label=='CB')[0][0]] + ori_chi1 = calc_torsion_angles(template[N_CA_CB_CG, :])[0] # The original chi1 torsion angle value + CB = template[4] unit_vector = CB/np.linalg.norm(CB) - # HA = np.where(sidechain_label=='HA')[0][0] - # chi1_idx = np.delete(sidechain_idx, np.where(sidechain_idx==HA)[0][0]) - HA = np.where(np.isin(sidechain_label, ['HA']))[0] + HA = np.where(np.isin(sidechain_label, ['HA','CB2','HB21','HB22','HB23']))[0] #MGN chi1_idx = np.delete(sidechain_idx, np.where(np.isin(sidechain_idx, HA))[0]) template[chi1_idx, :] = rotate_tor(ori_chi1, tors[0], unit_vector, template[chi1_idx, :], CB) @@ -193,51 +112,103 @@ def rotate_sidechain(res_type, tors): ori_chi2 = calc_torsion_angles(template[CA_CB_CG_CD, :])[0] CGs = template[5] - assert sidechain_label[5] in ['CG','CG1'] unit_vector = (CGs - CB)/np.linalg.norm(CGs - CB) - HBs = np.where(np.isin(sidechain_label, ['HB','HB2','HB3','CB','CG2','HG21','HG22','HG23']))[0] + HBs = np.where(np.isin(sidechain_label, ['HB','HB2','HB3','CB','CG2','HG21','HG22','HG23','CB1','HB12','HB13','OB']))[0] #MGN BHD chi2_idx = np.delete(chi1_idx, np.where(np.isin(chi1_idx, HBs))[0]) template[chi2_idx, :] = rotate_tor(ori_chi2, tors[1], unit_vector, template[chi2_idx, :], CGs) + #print('rotated chi2:', np.degrees(calc_torsion_angles(template[CA_CB_CG_CD, :])[0])) if len(tors) == 2: return template, sidechain_idx + # special ptm case 2+2 + if res_type in ['Y1P', 'PTR']: + # chi3 + CE_CZ_OH_P = [9, 10, 11, 12] + ori_chi3 = calc_torsion_angles(template[CE_CZ_OH_P, :])[0] + unit_vector = (template[11] - template[10])/np.linalg.norm(template[11] - template[10]) + chi3_idx = [12, 13, 14, 15] + if res_type == 'Y1P': chi3_idx += [-1] + template[chi3_idx, :] = rotate_tor(ori_chi3, tors[2], unit_vector, + template[chi3_idx, :], template[11]) + if len(tors) == 3: return template, sidechain_idx + # chi4 + CZ_OH_P_OP = [10, 11, 12, 13] + ori_chi4 = calc_torsion_angles(template[CZ_OH_P_OP, :])[0] + unit_vector = (template[12] - template[11])/np.linalg.norm(template[12] - template[11]) + chi4_idx = [13, 14, 15] + if res_type == 'Y1P': chi4_idx += [-1] + template[chi4_idx, :] = rotate_tor(ori_chi4, tors[3], unit_vector, + template[chi4_idx, :], template[12]) + return template, sidechain_idx + + if res_type in ['H1D', 'H2D']: + # chi3 + CG_ND_P_OP = [5, 7, 10, 11] + ori_chi3 = calc_torsion_angles(template[CG_ND_P_OP, :])[0] + unit_vector = (template[10] - template[7])/np.linalg.norm(template[10] - template[7]) + chi3_idx = [11, 12, 13] + if res_type == 'H1D': chi3_idx += [-1] + template[chi3_idx, :] = rotate_tor(ori_chi3, tors[2], unit_vector, + template[chi3_idx, :], template[10]) + return template, sidechain_idx + + if res_type in ['H1E', 'H2E']: + # chi3 + CD_NE_P_OP = [6, 9, 10, 11] + ori_chi3 = calc_torsion_angles(template[CD_NE_P_OP, :])[0] + unit_vector = (template[10] - template[9])/np.linalg.norm(template[10] - template[9]) + chi3_idx = [11, 12, 13] + if res_type == 'H1D': chi3_idx += [-1] + template[chi3_idx, :] = rotate_tor(ori_chi3, tors[2], unit_vector, + template[chi3_idx, :], template[10]) + return template, sidechain_idx + ori_chi3 = calc_torsion_angles(template[CB_CG_CD_CE, :])[0] CDs = template[6] - assert sidechain_label[6] in ['CD','SD'] unit_vector = (CDs - CGs)/np.linalg.norm(CDs - CGs) - HGs = np.where(np.isin(sidechain_label, ['HG','HG2','HG3','CG','CG1']))[0] - chi3_idx = np.delete(chi2_idx, np.where(np.isin(chi2_idx, HGs))[0]) + HGs = ['HG','HG2','HG3','CG','CG1'] + if res_type == 'MEN': HGs += ['OD1'] + elif res_type == 'CGU': HGs += ['OE21','OE22','CD2'] + elif res_type in ['SEP', 'S1P']: HGs += ['OG'] + elif res_type in ['TPO', 'T1P']: HGs += ['OG1'] + HGs_idx = np.where(np.isin(sidechain_label, HGs))[0] + chi3_idx = np.delete(chi2_idx, np.where(np.isin(chi2_idx, HGs_idx))[0]) template[chi3_idx, :] = rotate_tor(ori_chi3, tors[2], unit_vector, template[chi3_idx, :], CDs) + #print('rotated chi3:', np.degrees(calc_torsion_angles(template[CB_CG_CD_CE, :])[0])) if len(tors) == 3: - #print('rotated chi3:', np.degrees(calc_torsion_angles(template[CB_CG_CD_CE, :])[0])) return template, sidechain_idx - + ori_chi4 = calc_torsion_angles(template[CG_CD_CE_CZ, :])[0] CEs = template[7] - assert sidechain_label[7] in ['NE','CE'] unit_vector = (CEs - CDs)/np.linalg.norm(CEs - CDs) - HDs = np.where(np.isin(sidechain_label, ['HD','HD2','HD3','CD','SD']))[0] - chi4_idx = np.delete(chi3_idx, np.where(np.isin(chi3_idx, HDs))[0]) + HDs = ['HD','HD2','HD3','CD','SD'] + if res_type == 'LYZ': HDs += ['OH','HH'] + elif res_type == 'AGM': HDs += ['CE2','HE21','HE22','HE23'] + HDs_idx = np.where(np.isin(sidechain_label, HDs))[0] + chi4_idx = np.delete(chi3_idx, np.where(np.isin(chi3_idx, HDs_idx))[0]) template[chi4_idx, :] = rotate_tor(ori_chi4, tors[3], unit_vector, template[chi4_idx, :], CEs) + #print('rotated chi4:', np.degrees(calc_torsion_angles(template[CG_CD_CE_CZ, :])[0])) if len(tors) == 4: return template, sidechain_idx + ori_chi5 = calc_torsion_angles(template[CD_CE_CZ_NH, :])[0] CZ = template[8] unit_vector = (CZ - CEs)/np.linalg.norm(CZ - CEs) - assert sidechain_label[19] == 'HE' - chi5_idx = np.delete(chi4_idx, np.where(np.isin(chi4_idx, [19, 7]))[0]) + HEs = np.where(np.isin(sidechain_label, ['HE','HE1','HE2','HE3','CD']))[0] #AGM ALY + chi5_idx = np.delete(chi4_idx, np.where(np.isin(chi4_idx, HEs))[0]) template[chi5_idx, :] = rotate_tor(ori_chi5, tors[4], unit_vector, template[chi5_idx, :], CZ) - + #print('rotated chi5:', np.degrees(calc_torsion_angles(template[CD_CE_CZ_NH, :])[0])) + return template, sidechain_idx if __name__ == "__main__": #ARG - template_coord, _ = rotate_sidechain("GLN", np.array([123, 35, -162])) - arg_temp = sidechain_templates["GLN"][0] + template_coord, _ = rotate_sidechain("H1D", np.array([-123, 173, 85])) + arg_temp = sidechain_templates["H1D"][0] arg_temp.coords = template_coord - arg_temp.write_PDB("rotated_GLN.pdb") + arg_temp.write_PDB("rotated_H1D.pdb") diff --git a/src/mcsce/libs/libstructure.py b/src/mcsce/libs/libstructure.py index e2d9a21..fec0b5a 100644 --- a/src/mcsce/libs/libstructure.py +++ b/src/mcsce/libs/libstructure.py @@ -40,7 +40,7 @@ class Structure: """ Hold structural data from PDB/mmCIF files. - Run the ``.buil()`` method to read the structure. + Run the ``.build()`` method to read the structure. Cases for PDB Files: * If there are several MODELS only the first model is considered. @@ -565,6 +565,9 @@ def parse_fasta_to_array(datastr, **kwargs): """ n_residues = len(datastr) residues_aa3 = translate_seq_to_3l(datastr, histidine_protonation='HIP') + # TODO: test ptm, comment out + #if 'HIP' in residues_aa3: residues_aa3[residues_aa3.index('HIP')] = 'H2D' + # For each residue we have N, CA, C, O, HN, then there are two added HN on N-terminal and one OXT on C terminal # Also prolines do not have HN data_array = gen_empty_structure_data_array(5 * n_residues + 3 - datastr.count('P')) diff --git a/src/mcsce/log.csv b/src/mcsce/log.csv new file mode 100644 index 0000000..78ebddc --- /dev/null +++ b/src/mcsce/log.csv @@ -0,0 +1,2 @@ +PDB name,Succeeded,Time used +bb_template.pdb,1,0:00:05.260849 diff --git a/src/mcsce/mcsce_sidechain.py b/src/mcsce/mcsce_sidechain.py index 6084c78..fa422a6 100644 --- a/src/mcsce/mcsce_sidechain.py +++ b/src/mcsce/mcsce_sidechain.py @@ -9,9 +9,10 @@ import numpy as np from mcsce.core import build_definitions +from mcsce.core.definitions import aa1to3 from mcsce.libs.libenergy import prepare_energy_function from mcsce.libs.libstructure import Structure -from mcsce.core.side_chain_builder import create_side_chain +from mcsce.core.side_chain_builder import initialize_func_calc, create_side_chain def mcsce_sidechain(input_seq, coords, n_trials=200, efunc_terms=["lj", "clash"], temperature=300, parallel_worker=16, mode="simple"): """ @@ -25,8 +26,8 @@ def mcsce_sidechain(input_seq, coords, n_trials=200, efunc_terms=["lj", "clash"] input_seq: str A FASTA string to specify the amino acid sequence - coords: np.array with shape (4L, 3) - L is the total length of the sequence, and the coordinates are in the order of N, CA, C, O + coords: np.array with shape (5L+3, 3) + L is the total length of the sequence, and the coordinates are in the order of N, CA, C, O, HN plus terminal atoms n_trials: int The total number of trials for the generation procedure @@ -52,9 +53,10 @@ def mcsce_sidechain(input_seq, coords, n_trials=200, efunc_terms=["lj", "clash"] s = Structure(fasta=input_seq) s.build() s.coords = coords - + + # handles cap ptms ff = build_definitions.forcefields["Amberff14SB"] - ff_obj = ff(add_OXT=True, add_Nterminal_H=True) + ff_obj = ff(Cterminal='OXT', Nterminal='HN') if mode == "simple": return_first_valid = True @@ -62,11 +64,14 @@ def mcsce_sidechain(input_seq, coords, n_trials=200, efunc_terms=["lj", "clash"] return_first_valid = False else: raise RuntimeError("Mode has to be either simple or exhaustive.") - - final_structure = create_side_chain(s, n_trials, partial(prepare_energy_function, forcefield=ff_obj, terms=efunc_terms), temperature=temperature, parallel_worker=parallel_worker, return_first_valid=return_first_valid) + + initialize_func_calc(partial(prepare_energy_function, batch_size=16, + forcefield=ff_obj, terms=efunc_terms), structure=s, aa_seq=['H2D' if r=='H' else aa1to3[r] for r in input_seq]) + final_structure = create_side_chain(s, n_trials, temperature=temperature, parallel_worker=parallel_worker, + return_first_valid=return_first_valid) if final_structure is not None: - # final_structure.write_PDB("final.pdb") + final_structure.write_PDB("test.pdb") return final_structure.coords else: return None @@ -74,310 +79,41 @@ def mcsce_sidechain(input_seq, coords, n_trials=200, efunc_terms=["lj", "clash"] # Tests if __name__ == "__main__": - fasta = "MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGG" - backbone_coords = np.array([[27.34 , 24.43 , 2.614], - [26.266, 25.413, 2.842], - [26.913, 26.639, 3.531], - [27.886, 26.463, 4.263], - [26.335, 27.77 , 3.258], - [26.85 , 29.021, 3.898], - [26.1 , 29.253, 5.202], - [24.865, 29.024, 5.33 ], - [26.849, 29.656, 6.217], - [26.235, 30.058, 7.497], - [26.882, 31.428, 7.862], - [27.906, 31.711, 7.264], - [26.214, 32.097, 8.771], - [26.772, 33.436, 9.197], - [27.151, 33.362, 10.65 ], - [26.35 , 32.778, 11.395], - [28.26 , 33.943, 11.096], - [28.605, 33.965, 12.503], - [28.638, 35.461, 12.9 ], - [29.522, 36.103, 12.32 ], - [27.751, 35.867, 13.74 ], - [27.691, 37.315, 14.143], - [28.469, 37.475, 15.42 ], - [28.213, 36.753, 16.411], - [29.426, 38.43 , 15.446], - [30.225, 38.643, 16.662], - [29.664, 39.839, 17.434], - [28.85 , 40.565, 16.859], - [30.132, 40.069, 18.642], - [29.607, 41.18 , 19.467], - [30.075, 42.538, 18.984], - [29.586, 43.57 , 19.483], - [30.991, 42.571, 17.998], - [31.422, 43.94 , 17.553], - [30.755, 44.351, 16.277], - [31.207, 45.268, 15.566], - [29.721, 43.673, 15.885], - [28.978, 43.96 , 14.678], - [29.604, 43.507, 13.393], - [29.219, 43.981, 12.301], - [30.563, 42.623, 13.495], - [31.191, 42.012, 12.331], - [30.459, 40.666, 12.13 ], - [30.253, 39.991, 13.133], - [30.163, 40.338, 10.886], - [29.542, 39.02 , 10.653], - [30.494, 38.261, 9.729], - [30.849, 38.85 , 8.706], - [30.795, 37.015, 10.095], - [31.72 , 36.289, 9.176], - [30.955, 35.211, 8.459], - [30.025, 34.618, 9.04 ], - [31.244, 34.986, 7.197], - [30.505, 33.884, 6.512], - [31.409, 32.68 , 6.446], - [32.619, 32.812, 6.125], - [30.884, 31.485, 6.666], - [31.677, 30.275, 6.639], - [31.022, 29.288, 5.665], - [29.809, 29.395, 5.545], - [31.834, 28.412, 5.125], - [31.22 , 27.341, 4.275], - [31.44 , 26.079, 5.08 ], - [32.576, 25.802, 5.461], - [30.31 , 25.458, 5.384], - [30.288, 24.245, 6.193], - [29.279, 23.227, 5.641], - [28.478, 23.522, 4.725], - [29.38 , 22.057, 6.232], - [28.468, 20.94 , 5.98 ], - [27.819, 20.609, 7.316], - [28.449, 20.674, 8.36 ], - [26.559, 20.22 , 7.288], - [25.829, 19.825, 8.494], - [26.541, 18.732, 9.251], - [26.333, 18.536, 10.457], - [27.361, 17.959, 8.559], - [28.054, 16.835, 9.21 ], - [29.258, 17.318, 9.984], - [29.93 , 16.477, 10.606], - [29.599, 18.599, 9.828], - [30.796, 19.083, 10.566], - [30.491, 19.162, 12.04 ], - [29.367, 19.523, 12.441], - [31.51 , 18.936, 12.852], - [31.398, 19.064, 14.286], - [31.593, 20.553, 14.655], - [32.159, 21.311, 13.861], - [31.113, 20.863, 15.86 ], - [31.288, 22.201, 16.417], - [32.776, 22.519, 16.577], - [33.233, 23.659, 16.384], - [33.548, 21.526, 16.95 ], - [35.031, 21.722, 17.069], - [35.615, 22.19 , 15.759], - [36.532, 23.046, 15.724], - [35.139, 21.624, 14.662], - [35.59 , 21.945, 13.302], - [35.238, 23.382, 12.92 ], - [36.066, 24.109, 12.333], - [34.007, 23.745, 13.25 ], - [33.533, 25.097, 12.978], - [34.441, 26.099, 13.684], - [34.883, 27.09 , 13.093], - [34.734, 25.822, 14.949], - [35.596, 26.715, 15.736], - [36.975, 26.826, 15.107], - [37.579, 27.926, 15.159], - [37.499, 25.743, 14.571], - [38.794, 25.761, 13.88 ], - [38.728, 26.591, 12.611], - [39.704, 27.346, 12.277], - [37.633, 26.543, 11.867], - [37.471, 27.391, 10.668], - [37.441, 28.882, 11.052], - [38.02 , 29.772, 10.382], - [36.811, 29.17 , 12.192], - [36.731, 30.57 , 12.645], - [38.148, 30.981, 13.069], - [38.544, 32.15 , 12.856], - [38.883, 30.11 , 13.713], - [40.269, 30.508, 14.115], - [41.092, 30.808, 12.851], - [41.828, 31.808, 12.681], - [41.001, 29.878, 11.931], - [41.718, 30.022, 10.643], - [41.399, 31.338, 9.967], - [42.26 , 32.036, 9.381], - [40.117, 31.75 , 9.988], - [39.808, 32.994, 9.233], - [39.837, 34.271, 9.995], - [40.164, 35.323, 9.345], - [39.655, 34.335, 11.285], - [39.676, 35.547, 12.072], - [40.675, 35.527, 13.2 ], - [40.814, 36.528, 13.911], - [41.317, 34.393, 13.432], - [42.345, 34.269, 14.431], - [41.949, 34.076, 15.842], - [42.829, 34. , 16.739], - [40.642, 33.916, 16.112], - [40.226, 33.716, 17.509], - [40.449, 32.278, 17.945], - [39.936, 31.336, 17.315], - [41.189, 32.085, 19.031], - [41.461, 30.751, 19.594], - [40.168, 30.026, 19.918], - [39.264, 30.662, 20.521], - [40.059, 28.758, 19.607], - [38.817, 28.02 , 19.889], - [38.421, 28.048, 21.341], - [37.213, 28.036, 21.704], - [39.374, 28.09 , 22.24 ], - [39.063, 28.063, 23.695], - [38.365, 29.335, 24.159], - [37.684, 29.39 , 25.221], - [38.419, 30.373, 23.341], - [37.738, 31.637, 23.712], - [36.334, 31.742, 23.087], - [35.574, 32.618, 23.483], - [36. , 30.86 , 22.172], - [34.738, 30.875, 21.473], - [33.589, 30.189, 22.181], - [33.58 , 29.009, 22.499], - [32.478, 30.917, 22.269], - [31.2 , 30.329, 22.78 ], - [30.21 , 30.509, 21.65 ], - [29.978, 31.726, 21.269], - [29.694, 29.436, 21.054], - [28.762, 29.573, 19.906], - [27.331, 29.317, 20.364], - [27.101, 28.346, 21.097], - [26.436, 30.232, 20.004], - [25.034, 30.17 , 20.401], - [24.101, 30.149, 19.196], - [24.196, 30.948, 18.287], - [23.141, 29.187, 19.241], - [22.126, 29.062, 18.183], - [20.835, 28.629, 18.904], - [20.821, 27.734, 19.749], - [19.81 , 29.378, 18.578], - [18.443, 29.143, 19.083], - [18.453, 28.941, 20.591], - [17.86 , 27.994, 21.128], - [19.172, 29.808, 21.243], - [19.399, 29.894, 22.655], - [20.083, 28.729, 23.321], - [19.991, 28.584, 24.561], - [20.801, 27.931, 22.578], - [21.55 , 26.796, 23.133], - [23.046, 27.087, 22.913], - [23.383, 27.627, 21.87 ], - [23.88 , 26.727, 23.851], - [25.349, 26.872, 23.643], - [25.743, 25.586, 22.922], - [25.325, 24.489, 23.378], - [26.465, 25.689, 21.833], - [26.826, 24.521, 21.012], - [27.994, 23.781, 21.643], - [28.904, 24.444, 22.098], - [27.942, 22.448, 21.648], - [29.015, 21.657, 22.288], - [29.942, 21.106, 21.24 ], - [29.47 , 20.677, 20.19 ], - [31.233, 21.09 , 21.459], - [32.262, 20.67 , 20.514], - [32.128, 19.364, 19.75 ], - [32.546, 19.317, 18.558], - [31.697, 18.311, 20.406], - [31.568, 16.962, 19.825], - [30.32 , 16.698, 19.051], - [30.198, 15.657, 18.366], - [29.34 , 17.594, 19.076], - [28.108, 17.439, 18.276], - [28.375, 17.999, 16.887], - [29.326, 18.786, 16.69 ], - [27.51 , 17.689, 15.954], - [27.574, 18.192, 14.563], - [26.482, 19.28 , 14.432], - [25.609, 19.388, 15.287], - [26.585, 20.063, 13.378], - [25.594, 21.109, 13.072], - [24.241, 20.436, 12.857], - [23.264, 20.951, 13.329], - [24.24 , 19.233, 12.246], - [22.924, 18.583, 12.025], - [22.229, 18.244, 13.325], - [20.963, 18.253, 13.395], - [22.997, 17.978, 14.366], - [22.418, 17.638, 15.693], - [21.46 , 18.737, 16.163], - [20.497, 18.506, 16.9 ], - [21.846, 19.954, 15.905], - [21.079, 21.149, 16.251], - [20.142, 21.59 , 15.149], - [19.499, 22.645, 15.321], - [19.993, 20.884, 14.049], - [19.065, 21.352, 12.999], - [19.442, 22.745, 12.51 ], - [18.571, 23.61 , 12.289], - [20.717, 22.964, 12.26 ], - [21.184, 24.263, 11.69 ], - [21.11 , 24.111, 10.173], - [21.841, 23.198, 9.686], - [20.291, 24.875, 9.507], - [20.081, 24.773, 8.033], - [20.822, 25.914, 7.332], - [21.323, 26.83 , 8.008], - [20.924, 25.862, 6.006], - [21.656, 26.847, 5.24 ], - [21.127, 28.24 , 5.574], - [19.958, 28.465, 5.842], - [22.099, 29.163, 5.605], - [21.907, 30.563, 5.881], - [21.466, 30.953, 7.261], - [21.066, 32.112, 7.533], - [21.674, 30.034, 8.191], - [21.419, 30.253, 9.62 ], - [22.504, 31.228, 10.136], - [23.579, 31.321, 9.554], - [22.241, 31.873, 11.241], - [23.212, 32.762, 11.891], - [23.509, 32.224, 13.29 ], - [22.544, 31.942, 14.034], - [24.79 , 32.021, 13.618], - [25.149, 31.609, 14.98 ], - [25.698, 32.876, 15.669], - [26.158, 33.73 , 14.894], - [25.621, 32.945, 16.95 ], - [26.179, 34.127, 17.65 ], - [27.475, 33.651, 18.304], - [27.507, 32.587, 18.958], - [28.525, 34.447, 18.189], - [29.801, 34.145, 18.829], - [30.052, 35.042, 20.004], - [30.105, 36.305, 19.788], - [30.124, 34.533, 21.191], - [30.479, 35.369, 22.374], - [31.901, 34.91 , 22.728], - [32.19 , 33.696, 22.635], - [32.763, 35.831, 23.09 ], - [34.145, 35.472, 23.481], - [34.239, 35.353, 24.979], - [33.707, 36.197, 25.728], - [34.93 , 34.384, 25.451], - [35.161, 34.174, 26.896], - [36.671, 34.296, 27.089], - [37.305, 33.233, 26.795], - [37.197, 35.397, 27.513], - [38.668, 35.502, 27.68 ], - [39.076, 34.931, 29.031], - [38.297, 34.946, 29.996], - [40.294, 34.412, 29.045], - [40.873, 33.802, 30.253], - [41.765, 34.829, 30.944], - [42.945, 34.994, 30.583], - [41.165, 35.531, 31.898], - [41.845, 36.55 , 32.686], - [41.251, 37.941, 32.588], - [41.102, 38.523, 31.5 ], - [40.946, 38.472, 33.757], - [40.373, 39.813, 33.944], - [40.031, 39.992, 35.432], - [38.933, 40.525, 35.687]]) - result = mcsce_sidechain(fasta, backbone_coords, n_trials=25, efunc_terms=["lj", "clash"]) - print(result) \ No newline at end of file + fasta = "MQCKHD" + backbone_coords = np.array([[0.0, 0.0, 0.001], + [1.442, 0.0, 0.001], + [2.218, 1.281, 0.001], + [3.374, 1.3, 0.418], + [-0.337, -0.952, 0.001], + [-0.337, 0.476, 0.826], + [-0.337, 0.476, -0.824], + [1.466, 2.321, -0.503], + [2.103, 3.61, -0.622], + [1.541, 4.826, 0.048], + [0.512, 5.344, -0.381], + [0.506, 2.15, -0.768], + [2.304, 5.227, 1.124], + [1.864, 6.453, 1.742], + [2.722, 7.178, 2.733], + [2.528, 7.031, 3.938], + [3.099, 4.682, 1.427], + [3.647, 7.939, 2.049], + [4.573, 8.665, 2.882], + [4.213, 9.916, 3.625], + [4.113, 10.982, 3.021], + [3.646, 7.962, 1.039], + [4.057, 9.572, 4.951], + [3.61, 10.631, 5.822], + [2.271, 11.29, 5.698], + [2.174, 12.388, 5.155], + [4.252, 8.629, 5.257], + [1.317, 10.472, 6.266], + [-0.042, 10.937, 6.134], + [-0.934, 10.212, 7.159], + [-2.113, 10.257, 6.941], [1.586, 9.617, 6.732], + [-0.503, 9.619, 8.139]]) + result = mcsce_sidechain(fasta, backbone_coords, n_trials=20, efunc_terms=["lj", "clash"]) + print(result) + + + diff --git a/src/mcsce/test_func.py b/src/mcsce/test_func.py new file mode 100644 index 0000000..1e47660 --- /dev/null +++ b/src/mcsce/test_func.py @@ -0,0 +1,9 @@ +import numpy as np +from mcsce.libs.libcalc import calc_angle, calc_torsion_angles, rotate_coordinates_Q +from mcsce.core.build_definitions import sidechain_templates +from mcsce.libs.libscbuild import rotate_sidechain + +template_coord, _ = rotate_sidechain("PTR", np.array([123, 135])) #, -67, 118, -5])) +tempdb = sidechain_templates["PTR"][0] +tempdb.coords = template_coord +tempdb.write_PDB("rotated_PTR.pdb") From d5ad0448ba837ff979376c4c2e5ac26f2a27dd06 Mon Sep 17 00:00:00 2001 From: Oufan75 Date: Sun, 2 Oct 2022 05:00:47 -0700 Subject: [PATCH 02/22] add phosphorated residues --- src/mcsce/log.csv | 2 -- src/mcsce/test_func.py | 9 --------- 2 files changed, 11 deletions(-) delete mode 100644 src/mcsce/log.csv delete mode 100644 src/mcsce/test_func.py diff --git a/src/mcsce/log.csv b/src/mcsce/log.csv deleted file mode 100644 index 78ebddc..0000000 --- a/src/mcsce/log.csv +++ /dev/null @@ -1,2 +0,0 @@ -PDB name,Succeeded,Time used -bb_template.pdb,1,0:00:05.260849 diff --git a/src/mcsce/test_func.py b/src/mcsce/test_func.py deleted file mode 100644 index 1e47660..0000000 --- a/src/mcsce/test_func.py +++ /dev/null @@ -1,9 +0,0 @@ -import numpy as np -from mcsce.libs.libcalc import calc_angle, calc_torsion_angles, rotate_coordinates_Q -from mcsce.core.build_definitions import sidechain_templates -from mcsce.libs.libscbuild import rotate_sidechain - -template_coord, _ = rotate_sidechain("PTR", np.array([123, 135])) #, -67, 118, -5])) -tempdb = sidechain_templates["PTR"][0] -tempdb.coords = template_coord -tempdb.write_PDB("rotated_PTR.pdb") From 7d14211d61f4a8f783d7229f6b1586ee86bdedc2 Mon Sep 17 00:00:00 2001 From: Oufan75 Date: Sun, 2 Oct 2022 05:18:13 -0700 Subject: [PATCH 03/22] clean files & update readme --- .gitignore | 2 ++ README.rst | 12 +++++++++++- src/mcsce/core/side_chain_builder.py | 2 +- .../sidechain_templates/ptm_templates/.DS_Store | Bin 6148 -> 0 bytes 4 files changed, 14 insertions(+), 2 deletions(-) delete mode 100644 src/mcsce/core/sidechain_templates/ptm_templates/.DS_Store diff --git a/.gitignore b/.gitignore index 3e42cde..36d5ae6 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ __pycache__/ *.py[cod] *$py.class +.DS_Store # C extensions *.so @@ -115,5 +116,6 @@ SimpleOpt1-5/ # in development files src/mcsce/*.pdb +src/mcsce/test_func.py src/mcsce/core/data/glycam_06j.xml src/mcsce/core/sidechain_templates/cap_templates/ diff --git a/README.rst b/README.rst index 51e5874..831e5ea 100644 --- a/README.rst +++ b/README.rst @@ -20,7 +20,17 @@ MCSCE - Sidechain packing library Monte Carlo Side Chain Entropy package for generating side chain packing for fixed protein backbone. -v0.0.0 +Updated supports for phosphorated residues; other ptms in development. + +Phosphoralytion +| SER | SEP S1P | +| THR | TPO T1P | +| TYR | PTR Y1P | +| HID | H1D H2D | +| HIE | H1E H2E | + + +v0.1.0 References ========== diff --git a/src/mcsce/core/side_chain_builder.py b/src/mcsce/core/side_chain_builder.py index 24b5d54..1b1e725 100644 --- a/src/mcsce/core/side_chain_builder.py +++ b/src/mcsce/core/side_chain_builder.py @@ -166,7 +166,7 @@ def create_side_chain_structure(inputs): energies = energy_func(all_coords[:, -n_sidechain_atoms:], all_coords[:, : -n_sidechain_atoms]) minimum_energy = min(energies) # Keep track of the minimum energy so that the renormalized energies can be converted back - print(idx, resname, len(candidate_probs), np.isinf(energies).sum()) + #print(idx, resname, len(candidate_probs), np.isinf(energies).sum()) # If all energies are inf, end this growth if np.isinf(energies).all(): return None, False, None, None diff --git a/src/mcsce/core/sidechain_templates/ptm_templates/.DS_Store b/src/mcsce/core/sidechain_templates/ptm_templates/.DS_Store deleted file mode 100644 index f5ace363dc42a2013f33bd93dcac2d7a93c1bdcf..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHK%}&BV5S~F22_ivHUQGG~B)&loM{63=vgU0A zrhqB%+Z2#@w?`A2(gSVq?tZ_&DJ8f0Vw&WO1U~pc@C$JGp{CR5gEgTQCA6StaA#m| z_1&*?-@$k-agV{BBcpsNYTk5>Z2gk=8D)r{BA3hLs+`jks&0(HlCNCrc_qS9W96tJ z7^5|-#OT6%jS42{m(6*VQ=VrZo*6j85X=xGZRYZ2MWZHeXuh&Ghw=t$rndv!HB??T z*DNcwr*pOD4Q3qR>U_&C_D2kMbc{#d zFZNhCbaY~W_+X!z{RxFNJK{&`P8>V5-V`te_7ymCKL@h^--OTq`y~5i3YY?aN&#+< z@5Uoc$?vVT$;n Date: Sun, 2 Oct 2022 05:24:26 -0700 Subject: [PATCH 04/22] update readme --- README.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/README.rst b/README.rst index 831e5ea..7a29abb 100644 --- a/README.rst +++ b/README.rst @@ -23,6 +23,7 @@ fixed protein backbone. Updated supports for phosphorated residues; other ptms in development. Phosphoralytion +|:--- |:------- | | SER | SEP S1P | | THR | TPO T1P | | TYR | PTR Y1P | From c8d676367810c720231c477dc9a58d47adb4d9a8 Mon Sep 17 00:00:00 2001 From: Oufan75 Date: Sun, 2 Oct 2022 05:27:05 -0700 Subject: [PATCH 05/22] update readme --- README.rst | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/README.rst b/README.rst index 7a29abb..3ad50da 100644 --- a/README.rst +++ b/README.rst @@ -23,12 +23,11 @@ fixed protein backbone. Updated supports for phosphorated residues; other ptms in development. Phosphoralytion -|:--- |:------- | -| SER | SEP S1P | -| THR | TPO T1P | -| TYR | PTR Y1P | -| HID | H1D H2D | -| HIE | H1E H2E | +- SER | SEP S1P +- THR | TPO T1P +- TYR | PTR Y1P +- HID | H1D H2D +- HIE | H1E H2E v0.1.0 From 9dd8e781c1cce066d0e8d895156e698de77ea26a Mon Sep 17 00:00:00 2001 From: Oufan75 Date: Sun, 2 Oct 2022 05:28:42 -0700 Subject: [PATCH 06/22] update readme --- README.rst | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.rst b/README.rst index 3ad50da..2364509 100644 --- a/README.rst +++ b/README.rst @@ -23,11 +23,11 @@ fixed protein backbone. Updated supports for phosphorated residues; other ptms in development. Phosphoralytion -- SER | SEP S1P -- THR | TPO T1P -- TYR | PTR Y1P -- HID | H1D H2D -- HIE | H1E H2E +* SER | SEP S1P +* THR | TPO T1P +* TYR | PTR Y1P +* HID | H1D H2D +* HIE | H1E H2E v0.1.0 From 514406ec7693f6e90ced82c25768f52240c8c673 Mon Sep 17 00:00:00 2001 From: Oufan75 Date: Mon, 3 Oct 2022 16:58:08 -0700 Subject: [PATCH 07/22] include coulomb energy --- .gitignore | 1 + README.rst | 22 +++++++++++++++------- src/mcsce/cli.py | 4 ++-- src/mcsce/core/data/ptm_rotamer.lib | 10 ++++++++++ src/mcsce/core/definitions.py | 2 +- src/mcsce/core/rotamer_library.py | 4 ++-- src/mcsce/libs/libenergy.py | 5 ++--- src/mcsce/libs/libstructure.py | 4 ++-- 8 files changed, 35 insertions(+), 17 deletions(-) diff --git a/.gitignore b/.gitignore index 36d5ae6..7ed0cac 100644 --- a/.gitignore +++ b/.gitignore @@ -116,6 +116,7 @@ SimpleOpt1-5/ # in development files src/mcsce/*.pdb +src/mcsce/*_mcsce/ src/mcsce/test_func.py src/mcsce/core/data/glycam_06j.xml src/mcsce/core/sidechain_templates/cap_templates/ diff --git a/README.rst b/README.rst index 2364509..028593a 100644 --- a/README.rst +++ b/README.rst @@ -20,15 +20,23 @@ MCSCE - Sidechain packing library Monte Carlo Side Chain Entropy package for generating side chain packing for fixed protein backbone. -Updated supports for phosphorated residues; other ptms in development. +Updated supports for phosphorated residues and other listed modifications; other ptms in development. -Phosphoralytion -* SER | SEP S1P -* THR | TPO T1P -* TYR | PTR Y1P -* HID | H1D H2D -* HIE | H1E H2E +#### Phosphoralytion(unprotonated, protonated) +- SER: SEP S1P +- THR: TPO T1P +- TYR: PTR Y1P +- HID: H2D H1D +- HIE: H2E H1E +#### Methylation +- LYS: M3L + +#### N6-carboxylysine +- LYS: KCX + +#### Hydroxylation +- PRO: HYP v0.1.0 diff --git a/src/mcsce/cli.py b/src/mcsce/cli.py index f3b6b6d..a467fdd 100644 --- a/src/mcsce/cli.py +++ b/src/mcsce/cli.py @@ -92,7 +92,7 @@ def main(input_structure, n_conf, n_worker, output_dir, logfile, mode, batch_siz s.build() s = s.remove_side_chains() initialize_func_calc(partial(prepare_energy_function, batch_size=batch_size, - forcefield=ff_obj, terms=["lj", "clash"]), + forcefield=ff_obj, terms=["lj", "clash", "coulomb"]), structure=s) if mode == "simple" and same_structure and n_worker > 1: # parallel executing sequential trials on the same structure (different conformations) @@ -139,7 +139,7 @@ def main(input_structure, n_conf, n_worker, output_dir, logfile, mode, batch_siz if not same_structure: initialize_func_calc(partial(prepare_energy_function, batch_size=batch_size, - forcefield=ff_obj, terms=["lj", "clash"]), structure=s) + forcefield=ff_obj, terms=["lj", "clash", "coulomb"]), structure=s) if mode == "ensemble": conformations, success_indicator = create_side_chain_ensemble( diff --git a/src/mcsce/core/data/ptm_rotamer.lib b/src/mcsce/core/data/ptm_rotamer.lib index 6a205d5..8448450 100644 --- a/src/mcsce/core/data/ptm_rotamer.lib +++ b/src/mcsce/core/data/ptm_rotamer.lib @@ -49,6 +49,16 @@ PTR 240 360 0.44 -112.6 180 25.84 10 PTR 240 360 0.22 8.698 180 33.41 10 PTR 240 360 0.34 96.21 180 18.23 10 +# H2D +# Number of chi angles: 3 +# Number of additional chi angles: 1 +# Dependence: chi 2 +# +#AA Chi1-range Prob chi3Val chi3Sig +H2D 0 120 1 180 10 +H2D 120 240 1 180 10 +H2D 240 360 1 180 10 + # ABA # Number of chi angles: 1 # Number of additional chi angles: 1 diff --git a/src/mcsce/core/definitions.py b/src/mcsce/core/definitions.py index 7643831..7cbed5c 100644 --- a/src/mcsce/core/definitions.py +++ b/src/mcsce/core/definitions.py @@ -35,7 +35,7 @@ 'HYP':'PRO', 'SEP':'SER', 'TPO':'THR', 'S1P':'SER', 'T1P':'THR', 'Y1P':'TYR', 'PTR':'TYR', 'TYS':'TYR', 'LYZ':'LYS', 'ALY':'LYS', 'M3L':'LYS', 'KCX':'LYS' } -ptm_h = {'H1E':'H2E', 'H1D':'H2D', 'S1P':'SEP', 'T1P':'TPO', 'Y1P':'PTR'} +ptm_h = {'H1E':'H2D', 'H1D':'H2D', 'H2E':'H2D', 'S1P':'SEP', 'T1P':'TPO', 'Y1P':'PTR'} # Amino-acid 3 to 1 letter code dictionary aa3to1 = { 'ALA': 'A', diff --git a/src/mcsce/core/rotamer_library.py b/src/mcsce/core/rotamer_library.py index 64bd941..73e7b2b 100644 --- a/src/mcsce/core/rotamer_library.py +++ b/src/mcsce/core/rotamer_library.py @@ -129,10 +129,10 @@ def retrieve_torsion_and_prob(self, residue_type, phi, psi, ptmlib): if residue_type in ptm_aa: chis, probs = self._data[(ptm_aa[residue_type], get_closest_angle(phi), get_closest_angle(psi))] # phosphate protonation states - if residue_type in ['S1P', 'T1P', 'Y1P', 'H1D', 'H1E']: + if residue_type in ['S1P', 'T1P', 'Y1P', 'H1D', 'H1E', 'H2E']: residue_type = ptm_h[residue_type] if residue_type not in ptmlib._info: - print("ptm residue rotamers not provided, assumes unmodified residue") + #print("ptm residue rotamers not provided, assumes unmodified residue") return [chis, probs] # search ptm library diff --git a/src/mcsce/libs/libenergy.py b/src/mcsce/libs/libenergy.py index 2374b17..2524d40 100644 --- a/src/mcsce/libs/libenergy.py +++ b/src/mcsce/libs/libenergy.py @@ -92,7 +92,6 @@ def prepare_energy_function( bonds_equal_3_inter, )[order_in_upper_diagonal] - ''' bonds_1_mask = create_bonds_apart_mask_for_ij_pairs( residue_data, N, @@ -103,7 +102,7 @@ def prepare_energy_function( bonds_2_mask = (bonds_le_2_mask.astype(int) - bonds_1_mask.astype(int)).astype(bool) - ''' + # Convert 2-bonds separated mask 1d array into the upper triangle of the 2d connecitivity matrix # connectivity_matrix = np.zeros((N, N)) @@ -149,7 +148,7 @@ def prepare_energy_function( if clash_term: vdw_radii_sum = calc_vdw_radii_sum(atom_labels[new_indices], atom_labels[old_indices]) vdw_radii_sum *= 0.6 # The clash check parameter as defined in the SI of the MCSCE paper - vdw_radii_sum[bonds_le_2_mask] = 0 + vdw_radii_sum[bonds_1_mask] = 0 vdw_radii_sum = vdw_radii_sum[None] else: vdw_radii_sum = None diff --git a/src/mcsce/libs/libstructure.py b/src/mcsce/libs/libstructure.py index fec0b5a..6d3db2b 100644 --- a/src/mcsce/libs/libstructure.py +++ b/src/mcsce/libs/libstructure.py @@ -423,11 +423,11 @@ def check_backbone_atom_completeness(self): for idx in all_residue_atoms: if idx == n_term_idx: expected_atoms = ["N", "CA", "C", "O", "H1", "H2"] - if all_residue_atoms[idx]["label"] != "PRO": + if all_residue_atoms[idx]["label"] not in ["PRO", "HYP"]: expected_atoms.append("H3") else: expected_atoms = ["N", "CA", "C", "O"] - if all_residue_atoms[idx]["label"] != "PRO": + if all_residue_atoms[idx]["label"] not in ["PRO", "HYP"]: expected_atoms.append("H") if idx == c_term_idx: expected_atoms.append("OXT") From 3cae3efebb74d8e8b8329ecf7698b9d024d22fa5 Mon Sep 17 00:00:00 2001 From: Oufan75 Date: Mon, 3 Oct 2022 17:05:15 -0700 Subject: [PATCH 08/22] update readme --- README.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.rst b/README.rst index 028593a..cb315a7 100644 --- a/README.rst +++ b/README.rst @@ -23,6 +23,7 @@ fixed protein backbone. Updated supports for phosphorated residues and other listed modifications; other ptms in development. #### Phosphoralytion(unprotonated, protonated) + - SER: SEP S1P - THR: TPO T1P - TYR: PTR Y1P @@ -30,12 +31,15 @@ Updated supports for phosphorated residues and other listed modifications; other - HIE: H2E H1E #### Methylation + - LYS: M3L #### N6-carboxylysine + - LYS: KCX #### Hydroxylation + - PRO: HYP v0.1.0 From 9885e99e1b3fce0d475614c49caecfb6b40c77c9 Mon Sep 17 00:00:00 2001 From: Oufan75 Date: Mon, 3 Oct 2022 17:06:02 -0700 Subject: [PATCH 09/22] update readme --- README.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.rst b/README.rst index cb315a7..9972ebd 100644 --- a/README.rst +++ b/README.rst @@ -22,7 +22,7 @@ fixed protein backbone. Updated supports for phosphorated residues and other listed modifications; other ptms in development. -#### Phosphoralytion(unprotonated, protonated) +Phosphoralytion(unprotonated, protonated) - SER: SEP S1P - THR: TPO T1P @@ -30,15 +30,15 @@ Updated supports for phosphorated residues and other listed modifications; other - HID: H2D H1D - HIE: H2E H1E -#### Methylation +Methylation - LYS: M3L -#### N6-carboxylysine +N6-carboxylysine - LYS: KCX -#### Hydroxylation +Hydroxylation - PRO: HYP From 36fe1abe879c65f691268484016f3ef0c76aec85 Mon Sep 17 00:00:00 2001 From: Oufan75 Date: Wed, 5 Oct 2022 15:32:48 -0700 Subject: [PATCH 10/22] correct angle bin --- src/mcsce/core/rotamer_library.py | 54 +++++++++++++++++++------------ src/mcsce/log.csv | 2 ++ 2 files changed, 35 insertions(+), 21 deletions(-) create mode 100644 src/mcsce/log.csv diff --git a/src/mcsce/core/rotamer_library.py b/src/mcsce/core/rotamer_library.py index 73e7b2b..149b163 100644 --- a/src/mcsce/core/rotamer_library.py +++ b/src/mcsce/core/rotamer_library.py @@ -16,15 +16,31 @@ _library_path = _filepath.joinpath('data', 'SimpleOpt1-5', 'ALL.bbdep.rotamers.lib') _ptm_library_path = _filepath.joinpath('data', 'ptm_rotamer.lib') + +def wrap_angles(rot_angle): + more_mask = rot_angle > 180. + rot_angle[more_mask] -= 360. + less_mask = rot_angle < -180. + rot_angle[less_mask] += 360. + return rot_angle + def get_closest_angle(input_angle, degsep=10): """ - Find closest angle in [-180, +170] with 10 degree separations + Find closest angle in [-180, +170] with n degree separations """ - input_angle += 180 - if input_angle > 175+180: + if input_angle > 180 - degsep/2.: # the closest would be 180, namely -180 return -180 - return round(input_angle / degsep) * degsep - 180 + return round(input_angle/degsep)*degsep + +def get_floor_angle(input_angle, degsep=120): + """ + Round to angle in [0, 360) at the floor of n degree separations + """ + if input_angle == 360.: + return 0 + return np.floor(input_angle/degsep)*degsep + def sample_torsion(data, nchis): assert data.shape[1] == nchis*2 @@ -32,12 +48,6 @@ def sample_torsion(data, nchis): sigs = data[:, -nchis:] return np.random.normal(vals, sigs) -def wrap_angles(rot_angle): - more_mask = rot_angle > 180. - rot_angle[more_mask] -= 360. - less_mask = rot_angle < -180. - rot_angle[less_mask] += 360. - return rot_angle class DunbrakRotamerLibrary: """ @@ -140,9 +150,9 @@ def retrieve_torsion_and_prob(self, residue_type, phi, psi, ptmlib): nchis = ptm_info[1] if ptm_info[-1] == 0: - ptm_data = ptmlib.retrieve_torsion_and_prob(residue_type, 360.) + ptm_data = ptmlib.retrieve_torsion_and_prob(residue_type, -360.) ptm_probs = np.array(ptm_data)[:, 0] - torsions = wrap_angles(sample_torsion(np.array(ptm_data)[:, 1:], nchis)-180.) + torsions = wrap_angles(sample_torsion(np.array(ptm_data)[:, 1:], nchis)) assert torsions.shape[1] == ptm_info[0] return [torsions, ptm_probs] @@ -152,7 +162,7 @@ def retrieve_torsion_and_prob(self, residue_type, phi, psi, ptmlib): for i in range(chis.shape[0]): ptm_data = ptmlib.retrieve_torsion_and_prob(residue_type, dchis[i]) ptm_probs = np.array(ptm_data)[:, 0] - torsions = wrap_angles(sample_torsion(np.array(ptm_data)[:, 1:], nchis)-180.) + torsions = wrap_angles(sample_torsion(np.array(ptm_data)[:, 1:], nchis)) for j in range(len(ptm_probs)): p = probs[i]*ptm_probs[j] if p > self.prob_threshold: @@ -203,33 +213,35 @@ def __init__(self, probability_threshold=0.001): items = line.split() if self._info[restype][-1] > 0: # with dependence - chid = float(items[1]) - 180. + chid = float(items[1]) label = (restype, chid) if label in self._data: self._data[label].append([float(n) for n in items[3:]]) else: self._data[label] = [[float(n) for n in items[3:]]] else: - chid = 360. + chid = -360. label = (restype, chid) if label in self._data: self._data[label].append([float(n) for n in items[1:]]) else: self._data[label] = [[float(n) for n in items[1:]]] - + #if restype == 'SEP': print(label, self._data[label]) + def get_dependence(self, residue_type): return self._info[residue_type] - def retrieve_torsion_and_prob(self, residue_type, dchi=360.): + def retrieve_torsion_and_prob(self, residue_type, dchi=-360.): if self._info[residue_type][-1] == 0: - return self._data[(residue_type, 360.)] + return self._data[(residue_type, -360.)] else: - rchi = get_closest_angle(dchi, 120.) - if rchi == 180: rchi -= 120. + # convert to [0, 360) scale + if dchi < 0: dchi += 360. + rchi = get_floor_angle(dchi, 120.) return self._data[(residue_type, rchi)] if __name__ == "__main__": library = DunbrakRotamerLibrary() ptmlib = ptmRotamerLib() - print(library.retrieve_torsion_and_prob("HYP", 73, -54.8, ptmlib)) + print(library.retrieve_torsion_and_prob("S1P", 73, -54.8, ptmlib)) diff --git a/src/mcsce/log.csv b/src/mcsce/log.csv new file mode 100644 index 0000000..c86237c --- /dev/null +++ b/src/mcsce/log.csv @@ -0,0 +1,2 @@ +PDB name,Succeeded,Time used +bb_template.pdb,5,0:00:07.267240 From 6c68b2c1379d09b43f12347100995f336cc15f96 Mon Sep 17 00:00:00 2001 From: Oufan75 Date: Wed, 5 Oct 2022 15:33:42 -0700 Subject: [PATCH 11/22] correct angle bin --- src/mcsce/log.csv | 2 -- 1 file changed, 2 deletions(-) delete mode 100644 src/mcsce/log.csv diff --git a/src/mcsce/log.csv b/src/mcsce/log.csv deleted file mode 100644 index c86237c..0000000 --- a/src/mcsce/log.csv +++ /dev/null @@ -1,2 +0,0 @@ -PDB name,Succeeded,Time used -bb_template.pdb,5,0:00:07.267240 From 57214ab75aa13db898d0ad51df4abb8207ec3026 Mon Sep 17 00:00:00 2001 From: Oufan75 Date: Fri, 21 Oct 2022 23:03:59 -0700 Subject: [PATCH 12/22] add options to retain sidechains read in --- src/mcsce/cli.py | 54 ++++++++++++++++++++++++---- src/mcsce/core/side_chain_builder.py | 29 +++++++++------ src/mcsce/libs/libstructure.py | 9 +++-- 3 files changed, 72 insertions(+), 20 deletions(-) diff --git a/src/mcsce/cli.py b/src/mcsce/cli.py index a467fdd..435c356 100644 --- a/src/mcsce/cli.py +++ b/src/mcsce/cli.py @@ -2,7 +2,7 @@ import os from datetime import datetime -from numpy import mod +from numpy import mod, any, array from tqdm.std import tqdm from mcsce.libs.libstructure import Structure @@ -17,6 +17,7 @@ parser.add_argument("-s", "--same_structure", action="store_true", default=False, help="When generating PDB files in a folder, whether each structure in the folder has the same amino acid sequence. When this is set to True, the energy function preparation will be done only once.") parser.add_argument("-b", "--batch_size", default=16, type=int, help="The batch size used for calculating energies for conformations. Consider decreasing batch size when encountered OOM in building.") parser.add_argument("-m", "--mode", choices=["ensemble", "simple", "exhaustive"], default="ensemble", help="This option controls whether a structural ensemble or just a single structure is created for every input structure. The default behavior is to create a structural ensemble. Simple/exhaustive modes are for creating single structure. In simple mode, n_conf structures are tried sequentially and the first valid structure is returned, and in exhaustive mode, a total number of n_conf structures will be created and the lowest energy conformation is returned.") +parser.add_argument("-f", "--fix", default=None, help="list of residue ids whose side chains are retained. Specify start to stop with dash, and seperate id ranges with plus. E.g. 2-5+10-13. Only supports same structure mode and input structure should contain original side chains") parser.add_argument("-l", "--logfile", default="log.csv", help="File name of the log file") @@ -38,7 +39,7 @@ def maincli(): def read_structure_and_check(file_name): """ Helper function for reading a structure from a given filename and returns the structure object - Also checks whether there are missing atoms in the structure + checks whether there are missing atoms in the structure and """ s = Structure(file_name) s.build() @@ -48,13 +49,15 @@ def read_structure_and_check(file_name): for resid, atom_name in missing_backbone_atoms: message += f"\n{resid} {atom_name}" print(message + "\n") + #if sc_mask is not None: + # s.coords = s.coords[sc_mask] return s -def main(input_structure, n_conf, n_worker, output_dir, logfile, mode, batch_size=4, same_structure=False): +def main(input_structure, n_conf, n_worker, output_dir, logfile, mode, fix, batch_size=4, same_structure=False): # antipattern to save time from mcsce.core.side_chain_builder import initialize_func_calc, create_side_chain_ensemble, create_side_chain @@ -70,6 +73,16 @@ def main(input_structure, n_conf, n_worker, output_dir, logfile, mode, batch_siz ff = forcefields["Amberff14SB"] ff_obj = ff(Cterminal='OXT', Nterminal='HN') + if isinstance(fix, str): + if fix.find('-') + fix.find('+') <= -2 : + print('--fix argument syntax error. Abort') + return + elif not same_structure: + print('--fix argument ignored') + if not isinstance(fix, str) and fix is not None: + print('--fix argument syntax error. Abort') + return + if n_worker is None: import multiprocessing n_worker = multiprocessing.cpu_count() - 1 @@ -85,15 +98,40 @@ def main(input_structure, n_conf, n_worker, output_dir, logfile, mode, batch_siz if not input_folder.endswith("/"): input_folder += "/" all_pdbs = [input_folder + f for f in os.listdir(input_folder) if f[-3:].upper() == "PDB"] - + + fix_idxs = [] + struct_mask = None if same_structure: # Assume all structures in a folder are the same: the energy creation step can be done only once s = Structure(all_pdbs[0]) s.build() - s = s.remove_side_chains() + #print(s.data_array.shape, s._data_array.shape) + if fix is not None: + fix = fix.strip() + if fix.find('-') == -1: + fix_idxs = [int(n) for n in fix.split('+')] + else: + if fix.find('+') == -1: + fix_id_chunks = [fix] + else: + fix_id_chunks = fix.split('+') + for chunk in fix_id_chunks: + fix_range = [int(n) for n in chunk.split('-')] + if len(fix_range) > 2: + print('--fix argument syntax error. Abort') + return + start, stop = fix_range + fix_idxs += list(range(start, stop+1)) + if any(array(fix_idxs) > len(s.residue_types)): + print('--fix residue id out of range') + return + #print('fixed residues:', fix_idxs) + #TODO input structure contains unnecessary sidechains + #s = s.remove_side_chains(retain_idxs=fix_idxs) + initialize_func_calc(partial(prepare_energy_function, batch_size=batch_size, - forcefield=ff_obj, terms=["lj", "clash", "coulomb"]), - structure=s) + forcefield=ff_obj, terms=["lj", "clash", "coulomb"]), + structure=s, retain_idxs=fix_idxs) if mode == "simple" and same_structure and n_worker > 1: # parallel executing sequential trials on the same structure (different conformations) t0 = datetime.now() @@ -101,6 +139,7 @@ def main(input_structure, n_conf, n_worker, output_dir, logfile, mode, batch_siz side_chain_parallel_creator = partial(create_side_chain, n_trials=n_conf, temperature=300, + retain_resi=fix_idxs, parallel_worker=1, return_first_valid=True) import multiprocessing @@ -146,6 +185,7 @@ def main(input_structure, n_conf, n_worker, output_dir, logfile, mode, batch_siz s, n_conf, temperature=300, + retain_resi=fix_idxs, save_path=output_dir, parallel_worker=n_worker, ) diff --git a/src/mcsce/core/side_chain_builder.py b/src/mcsce/core/side_chain_builder.py index 1b1e725..ef852fe 100644 --- a/src/mcsce/core/side_chain_builder.py +++ b/src/mcsce/core/side_chain_builder.py @@ -39,7 +39,7 @@ def choose_random(array): return idx return len(array) - 1 -def initialize_func_calc(efunc_creator, aa_seq=None, structure=None): +def initialize_func_calc(efunc_creator, aa_seq=None, structure=None, retain_idxs=[]): """ Helper function for initializing the energy function calculators according to the specified amino acid sequence or a structure object. @@ -55,6 +55,8 @@ def initialize_func_calc(efunc_creator, aa_seq=None, structure=None): structure: Structure object The structure with backbone atoms only + retain_idxs: list + List of residue ids not to pack sidechains """ # declare global variabales global sidechain_placeholders @@ -77,10 +79,12 @@ def initialize_func_calc(efunc_creator, aa_seq=None, structure=None): structure.res_nums, structure.res_labels)) for idx, resname in tqdm(enumerate(aa_seq), total=len(aa_seq)): - template = sidechain_templates[resname] - structure.add_side_chain(idx + 1, template) + if idx + 1 not in retain_idxs: + template = sidechain_templates[resname] + structure.add_side_chain(idx + 1, template) sidechain_placeholders.append(deepcopy(structure)) - if resname not in ["GLY", "ALA"]: + + if resname not in ["GLY", "ALA"] and idx + 1 not in retain_idxs: n_sidechain_atoms = len(template[1]) all_indices = np.arange(len(structure.atom_labels)) energy_func = efunc_creator(structure.atom_labels, @@ -123,9 +127,10 @@ def create_side_chain_structure(inputs): energy: The energy for the generated conformation """ - backbone_coords, beta, save_addr = inputs + backbone_coords, beta, retain_idxs, save_addr = inputs assert len(sidechain_placeholders) > 0, "Energy functions have not yet initialized!" structure = deepcopy(sidechain_placeholders[0]) + assert structure.coords.shape[0] == backbone_coords.shape[0], "Input structures contain extra sidechain atoms or miss sidechain atoms if fix argument is used" structure.coords = backbone_coords N_CA_C_coords = structure.get_sorted_minimal_backbone_coords() all_backbone_dihedrals = calc_torsion_angles(N_CA_C_coords) @@ -135,6 +140,8 @@ def create_side_chain_structure(inputs): accumulated_energy = energy_calculators[0](structure_coords[None], structure_coords[None, :0])[0] # energies of backbone only for idx, resname in enumerate(structure.residue_types): + # if residue is to retain, skip building sidechain + if idx + 1 in retain_idxs: continue # copy coordinates from the previous growing step to the current placeholder previous_coords = structure_coords # structure = deepcopy(sidechain_placeholders[idx]) @@ -188,7 +195,7 @@ def create_side_chain_structure(inputs): structure.write_PDB(save_addr) return structure, True, accumulated_energy, save_addr -def create_side_chain(structure, n_trials, temperature, parallel_worker=16, return_first_valid=False): +def create_side_chain(structure, n_trials, temperature, retain_resi=[], parallel_worker=16, return_first_valid=False): """ Using the MCSCE workflow to add sidechains to a backbone-only PDB structure. The building process will be repeated for n_trial times, but only the lowest energy conformation will be returned @@ -227,7 +234,7 @@ def create_side_chain(structure, n_trials, temperature, parallel_worker=16, retu if return_first_valid: # Sequential execution with maximal n_trial times, but return the first valid structure for _ in range(n_trials): - conf, succeeded, energy, _ = create_side_chain_structure([structure.coords, beta, None]) + conf, succeeded, energy, _ = create_side_chain_structure([structure.coords, beta, retain_resi, None]) if succeeded: return conf return None @@ -236,7 +243,7 @@ def create_side_chain(structure, n_trials, temperature, parallel_worker=16, retu if parallel_worker == 1: # sequential execution for idx in tqdm(range(n_trials)): - conf, succeeded, energy, _ = create_side_chain_structure([structure.coords, beta, None]) + conf, succeeded, energy, _ = create_side_chain_structure([structure.coords, beta, retain_resi, None]) if succeeded: conformations.append(conf) energies.append(energy) @@ -265,7 +272,7 @@ def create_side_chain(structure, n_trials, temperature, parallel_worker=16, retu return conformations[lowest_energy_idx] -def create_side_chain_ensemble(structure, n_conformations, temperature, save_path, parallel_worker=16): +def create_side_chain_ensemble(structure, n_conformations, temperature, save_path, retain_resi=[], parallel_worker=16): """ Create a given number of conformation ensemble for the backbone-only structure of a protein @@ -302,7 +309,7 @@ def create_side_chain_ensemble(structure, n_conformations, temperature, save_pat if parallel_worker == 1: for idx in tqdm(range(n_conformations)): - conf, succeeded, energy, save_dir = create_side_chain_structure([structure.coords, beta, save_path + f"/{idx}.pdb"]) + conf, succeeded, energy, save_dir = create_side_chain_structure([structure.coords, beta, retain_resi, save_path + f"/{idx}.pdb"]) conformations.append(conf) success_indicator.append(succeeded) if succeeded: @@ -319,7 +326,7 @@ def create_side_chain_ensemble(structure, n_conformations, temperature, save_pat with tqdm(total=n_conformations) as pbar: for result in pool.imap_unordered(create_side_chain_structure,\ - [[structure.coords, beta, save_path + f"/{n}.pdb"] for n in range(n_conformations)]): + [[structure.coords, beta, retain_resi, save_path + f"/{n}.pdb"] for n in range(n_conformations)]): conformations.append(result[0]) success_indicator.append(result[1]) if result[1]: diff --git a/src/mcsce/libs/libstructure.py b/src/mcsce/libs/libstructure.py index 6d3db2b..60b46fc 100644 --- a/src/mcsce/libs/libstructure.py +++ b/src/mcsce/libs/libstructure.py @@ -437,16 +437,21 @@ def check_backbone_atom_completeness(self): return missing_atoms - def remove_side_chains(self): + def remove_side_chains(self, retain_idxs=[]): """ Create a copy of the current structure that removed all atoms beyond CB to be regrown by the MCSCE algorithm """ copied_structure = deepcopy(self) retained_atoms_filter = np.array([atom in backbone_atoms for atom in copied_structure.data_array[:, col_name]]) extra_pro_H_filter = (copied_structure.data_array[:, col_name] == 'H') & (copied_structure.data_array[:, col_resName] == 'PRO') + print(np.sum(retained_atoms_filter)) + if len(retain_idxs) > 0: + for resid in retain_idxs: + retained_atoms_filter = retained_atoms_filter | (copied_structure.data_array[:, col_resSeq] == str(resid)) + print(resid, np.sum(retained_atoms_filter)) retained_atoms_filter = retained_atoms_filter & (~extra_pro_H_filter) copied_structure._data_array = copied_structure.data_array[retained_atoms_filter] - return copied_structure + return copied_structure #, None if np.all(retained_atoms_filter) else retained_atoms_filter def add_side_chain(self, res_idx, sidechain_template): template_structure, sc_atoms = sidechain_template From fd403c3eb59cf915f1d0e45d20e00075759fac8f Mon Sep 17 00:00:00 2001 From: Oufan75 Date: Sat, 22 Oct 2022 02:58:45 -0700 Subject: [PATCH 13/22] correct fix resid reading --- src/mcsce/cli.py | 18 ++++++++++-------- src/mcsce/core/rotamer_library.py | 2 +- src/mcsce/libs/libstructure.py | 6 +++--- 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/src/mcsce/cli.py b/src/mcsce/cli.py index 435c356..d600331 100644 --- a/src/mcsce/cli.py +++ b/src/mcsce/cli.py @@ -100,7 +100,6 @@ def main(input_structure, n_conf, n_worker, output_dir, logfile, mode, fix, batc all_pdbs = [input_folder + f for f in os.listdir(input_folder) if f[-3:].upper() == "PDB"] fix_idxs = [] - struct_mask = None if same_structure: # Assume all structures in a folder are the same: the energy creation step can be done only once s = Structure(all_pdbs[0]) @@ -116,16 +115,19 @@ def main(input_structure, n_conf, n_worker, output_dir, logfile, mode, fix, batc else: fix_id_chunks = fix.split('+') for chunk in fix_id_chunks: - fix_range = [int(n) for n in chunk.split('-')] - if len(fix_range) > 2: - print('--fix argument syntax error. Abort') - return - start, stop = fix_range - fix_idxs += list(range(start, stop+1)) + if chunk.find('-') == -1: + fix_idxs += [int(chunk)] + else: + fix_range = [int(n) for n in chunk.split('-')] + if len(fix_range) > 2: + print('--fix argument syntax error. Abort') + return + start, stop = fix_range + fix_idxs += list(range(start, stop+1)) if any(array(fix_idxs) > len(s.residue_types)): print('--fix residue id out of range') return - #print('fixed residues:', fix_idxs) + print('fixed residues:', fix_idxs) #TODO input structure contains unnecessary sidechains #s = s.remove_side_chains(retain_idxs=fix_idxs) diff --git a/src/mcsce/core/rotamer_library.py b/src/mcsce/core/rotamer_library.py index 149b163..4271c35 100644 --- a/src/mcsce/core/rotamer_library.py +++ b/src/mcsce/core/rotamer_library.py @@ -142,7 +142,7 @@ def retrieve_torsion_and_prob(self, residue_type, phi, psi, ptmlib): if residue_type in ['S1P', 'T1P', 'Y1P', 'H1D', 'H1E', 'H2E']: residue_type = ptm_h[residue_type] if residue_type not in ptmlib._info: - #print("ptm residue rotamers not provided, assumes unmodified residue") + print("ptm residue rotamers not provided, assumes rotamers of unmodified residue") return [chis, probs] # search ptm library diff --git a/src/mcsce/libs/libstructure.py b/src/mcsce/libs/libstructure.py index 60b46fc..f2703cc 100644 --- a/src/mcsce/libs/libstructure.py +++ b/src/mcsce/libs/libstructure.py @@ -439,16 +439,16 @@ def check_backbone_atom_completeness(self): def remove_side_chains(self, retain_idxs=[]): """ - Create a copy of the current structure that removed all atoms beyond CB to be regrown by the MCSCE algorithm + Create a copy of the current structure that removed all atoms beyond CB to be regrown by the MCSCE algorithm, except ones defined in resids to be retained. """ copied_structure = deepcopy(self) retained_atoms_filter = np.array([atom in backbone_atoms for atom in copied_structure.data_array[:, col_name]]) extra_pro_H_filter = (copied_structure.data_array[:, col_name] == 'H') & (copied_structure.data_array[:, col_resName] == 'PRO') - print(np.sum(retained_atoms_filter)) + #print(np.sum(retained_atoms_filter)) if len(retain_idxs) > 0: for resid in retain_idxs: retained_atoms_filter = retained_atoms_filter | (copied_structure.data_array[:, col_resSeq] == str(resid)) - print(resid, np.sum(retained_atoms_filter)) + #print(resid, np.sum(retained_atoms_filter)) retained_atoms_filter = retained_atoms_filter & (~extra_pro_H_filter) copied_structure._data_array = copied_structure.data_array[retained_atoms_filter] return copied_structure #, None if np.all(retained_atoms_filter) else retained_atoms_filter From 943364779bf35a50decda881f6cccc9e3c40e190 Mon Sep 17 00:00:00 2001 From: Oufan75 Date: Wed, 11 Jan 2023 16:46:53 -0800 Subject: [PATCH 14/22] fixes pdb segid and adds support for exhaustive mode --- .gitignore | 1 + src/mcsce/cli.py | 5 +++-- src/mcsce/core/side_chain_builder.py | 2 +- src/mcsce/libs/libstructure.py | 3 +++ 4 files changed, 8 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 7ed0cac..d1221fd 100644 --- a/.gitignore +++ b/.gitignore @@ -117,6 +117,7 @@ SimpleOpt1-5/ # in development files src/mcsce/*.pdb src/mcsce/*_mcsce/ +src/mcsce/*.csv src/mcsce/test_func.py src/mcsce/core/data/glycam_06j.xml src/mcsce/core/sidechain_templates/cap_templates/ diff --git a/src/mcsce/cli.py b/src/mcsce/cli.py index d600331..621c555 100644 --- a/src/mcsce/cli.py +++ b/src/mcsce/cli.py @@ -17,7 +17,7 @@ parser.add_argument("-s", "--same_structure", action="store_true", default=False, help="When generating PDB files in a folder, whether each structure in the folder has the same amino acid sequence. When this is set to True, the energy function preparation will be done only once.") parser.add_argument("-b", "--batch_size", default=16, type=int, help="The batch size used for calculating energies for conformations. Consider decreasing batch size when encountered OOM in building.") parser.add_argument("-m", "--mode", choices=["ensemble", "simple", "exhaustive"], default="ensemble", help="This option controls whether a structural ensemble or just a single structure is created for every input structure. The default behavior is to create a structural ensemble. Simple/exhaustive modes are for creating single structure. In simple mode, n_conf structures are tried sequentially and the first valid structure is returned, and in exhaustive mode, a total number of n_conf structures will be created and the lowest energy conformation is returned.") -parser.add_argument("-f", "--fix", default=None, help="list of residue ids whose side chains are retained. Specify start to stop with dash, and seperate id ranges with plus. E.g. 2-5+10-13. Only supports same structure mode and input structure should contain original side chains") +parser.add_argument("-f", "--fix", default=None, help="list of residue ids whose side chains are retained. Specify start to stop with dash, and seperate id ranges with plus. E.g. 2-5+10-13. Only supports same structure mode and input structure should contain original side chains.") parser.add_argument("-l", "--logfile", default="log.csv", help="File name of the log file") @@ -195,7 +195,8 @@ def main(input_structure, n_conf, n_worker, output_dir, logfile, mode, fix, batc with open(logfile, "a") as _log: _log.write("%s,%d,%s\n" % (f, sum(success_indicator), str(datetime.now() - t0))) else: - best_structure = create_side_chain(s, n_conf, 300, n_worker, mode=="simple") + best_structure = create_side_chain(s, n_conf, 300, parallel_worker=n_worker, + retain_resi=fix_idxs, return_first_valid=(mode=="simple")) if best_structure is not None: best_structure.write_PDB(output_dir + ".pdb") with open(logfile, "a") as _log: diff --git a/src/mcsce/core/side_chain_builder.py b/src/mcsce/core/side_chain_builder.py index ef852fe..f1da98d 100644 --- a/src/mcsce/core/side_chain_builder.py +++ b/src/mcsce/core/side_chain_builder.py @@ -254,7 +254,7 @@ def create_side_chain(structure, n_trials, temperature, retain_resi=[], parallel with tqdm(total=n_trials) as pbar: for result in pool.imap_unordered( create_side_chain_structure, \ - [[structure.coords, beta, None]] * n_trials): + [[structure.coords, beta, retain_resi, None]] * n_trials): conf, succeeded, energy, _ = result if succeeded: conformations.append(conf) diff --git a/src/mcsce/libs/libstructure.py b/src/mcsce/libs/libstructure.py index f2703cc..e3c48b0 100644 --- a/src/mcsce/libs/libstructure.py +++ b/src/mcsce/libs/libstructure.py @@ -461,6 +461,9 @@ def add_side_chain(self, res_idx, sidechain_template): sidechain_data_arr = template_structure.data_array.copy() sidechain_data_arr[:, cols_coords] = np.round(sc_all_atom_coords, decimals=3).astype(' Date: Thu, 22 Jun 2023 23:08:33 -0700 Subject: [PATCH 15/22] compatible with residue idx from > 1 start --- src/mcsce/cli.py | 6 ++-- src/mcsce/core/build_definitions.py | 4 +-- src/mcsce/core/check_library.py | 49 ++++++++++++++++++++++++++++ src/mcsce/core/rotamer_library.py | 4 ++- src/mcsce/core/side_chain_builder.py | 13 ++++---- src/mcsce/libs/libpdb.py | 2 +- src/mcsce/libs/libstructure.py | 4 ++- 7 files changed, 68 insertions(+), 14 deletions(-) create mode 100644 src/mcsce/core/check_library.py diff --git a/src/mcsce/cli.py b/src/mcsce/cli.py index 621c555..dc4b076 100644 --- a/src/mcsce/cli.py +++ b/src/mcsce/cli.py @@ -17,7 +17,7 @@ parser.add_argument("-s", "--same_structure", action="store_true", default=False, help="When generating PDB files in a folder, whether each structure in the folder has the same amino acid sequence. When this is set to True, the energy function preparation will be done only once.") parser.add_argument("-b", "--batch_size", default=16, type=int, help="The batch size used for calculating energies for conformations. Consider decreasing batch size when encountered OOM in building.") parser.add_argument("-m", "--mode", choices=["ensemble", "simple", "exhaustive"], default="ensemble", help="This option controls whether a structural ensemble or just a single structure is created for every input structure. The default behavior is to create a structural ensemble. Simple/exhaustive modes are for creating single structure. In simple mode, n_conf structures are tried sequentially and the first valid structure is returned, and in exhaustive mode, a total number of n_conf structures will be created and the lowest energy conformation is returned.") -parser.add_argument("-f", "--fix", default=None, help="list of residue ids whose side chains are retained. Specify start to stop with dash, and seperate id ranges with plus. E.g. 2-5+10-13. Only supports same structure mode and input structure should contain original side chains.") +parser.add_argument("-f", "--fix", default=None, help="list of residue ids whose side chains are retained. Specify start to stop (inclusive) with dash, and seperate id ranges with plus. E.g. 2-5+10-13. Only supports same structure mode and input structure should contain original side chains.") parser.add_argument("-l", "--logfile", default="log.csv", help="File name of the log file") @@ -124,10 +124,10 @@ def main(input_structure, n_conf, n_worker, output_dir, logfile, mode, fix, batc return start, stop = fix_range fix_idxs += list(range(start, stop+1)) - if any(array(fix_idxs) > len(s.residue_types)): + if any(array(fix_idxs) > s.res_nums[-1]): print('--fix residue id out of range') return - print('fixed residues:', fix_idxs) + #print('fixed residues:', fix_idxs) #TODO input structure contains unnecessary sidechains #s = s.remove_side_chains(retain_idxs=fix_idxs) diff --git a/src/mcsce/core/build_definitions.py b/src/mcsce/core/build_definitions.py index 9cbfc40..dc680eb 100644 --- a/src/mcsce/core/build_definitions.py +++ b/src/mcsce/core/build_definitions.py @@ -3,8 +3,8 @@ Original code in this file from IDP Conformer Generator package (https://github.com/julie-forman-kay-lab/IDPConformerGenerator) -developed by Joao M. C. Teixeira (@joaomcteixeira), and added to the -MSCCE repository in commit 30e417937968f3c6ef09d8c06a22d54792297161. +developed by Joao M. C. Teixeira (@joaomcteixeira) + Modifications herein are of MCSCE authors. """ import xml.etree.ElementTree as ET diff --git a/src/mcsce/core/check_library.py b/src/mcsce/core/check_library.py new file mode 100644 index 0000000..932a723 --- /dev/null +++ b/src/mcsce/core/check_library.py @@ -0,0 +1,49 @@ +""" +This file defines the function that reads the Dunbrack protein side chain rotamer library + +Shapovalov, Maxim V., and Roland L. Dunbrack Jr. "A smoothed backbone-dependent rotamer library for proteins derived from adaptive kernel density estimates and regressions." Structure 19.6 (2011): 844-858. + +Coded by Jie Li +Date created: Jul 28, 2021 +Modified by Oufan Zhang to add ptm rotamers +""" + +from pathlib import Path +import numpy as np +from mcsce.core.definitions import ptm_aa, ptm_h + +_filepath = Path(__file__).resolve().parent # folder +_library_path = _filepath.joinpath('data', 'SimpleOpt1-5', 'ALL.bbdep.rotamers.lib') +_ptm_library_path = _filepath.joinpath('data', 'ptm_rotamer.lib') + + +def get_closest_angle(input_angle): + if 0 < input_angle < 120: + return 1 + elif -120 < input_angle < 0: + return 2 + else: + return 3 + +def calc_total_prob(residue): + data = {1: {"counts": [], "probs": []}, + 2: {"counts": [], "probs": []}, + 3: {"counts": [], "probs": []}, + } + with open(_library_path) as f: + for line in f: + line = line.strip() + if line.startswith(residue): + restype, phi, psi, count, _, _, _, _, prob, chi1, chi2, chi3, chi4, _, _, _, _= line.split() + assert restype == residue + chi1_range = get_closest_angle(float(chi1)) + data[chi1_range]["counts"].append(float(count)) + data[chi1_range]["probs"].append(float(prob)) + + for n in range(1, 4): + print(np.sum(np.array(data[n]["counts"]) * np.array(data[n]["probs"]) / np.sum(data[n]["counts"]))) + +if __name__ == "__main__": + for resn in ["SER", "THR", "TYR"]: + print(resn) + calc_total_prob(resn) diff --git a/src/mcsce/core/rotamer_library.py b/src/mcsce/core/rotamer_library.py index 4271c35..935c04c 100644 --- a/src/mcsce/core/rotamer_library.py +++ b/src/mcsce/core/rotamer_library.py @@ -244,4 +244,6 @@ def retrieve_torsion_and_prob(self, residue_type, dchi=-360.): if __name__ == "__main__": library = DunbrakRotamerLibrary() ptmlib = ptmRotamerLib() - print(library.retrieve_torsion_and_prob("S1P", 73, -54.8, ptmlib)) + for resn in ["SER", "THR", "TYR"]: + library.retrieve_torsion_and_prob(resn, 73, -54.8, ptmlib) + #print(library.retrieve_torsion_and_prob("S1P", 73, -54.8, ptmlib)) diff --git a/src/mcsce/core/side_chain_builder.py b/src/mcsce/core/side_chain_builder.py index f1da98d..e7ffc92 100644 --- a/src/mcsce/core/side_chain_builder.py +++ b/src/mcsce/core/side_chain_builder.py @@ -74,17 +74,18 @@ def initialize_func_calc(efunc_creator, aa_seq=None, structure=None, retain_idxs # extract amino acid sequence from structure object aa_seq = structure.residue_types structure = deepcopy(structure) + sidechain_placeholders.append(deepcopy(structure)) energy_calculators.append(efunc_creator(structure.atom_labels, structure.res_nums, structure.res_labels)) for idx, resname in tqdm(enumerate(aa_seq), total=len(aa_seq)): - if idx + 1 not in retain_idxs: + if idx + structure.res_nums[0] not in retain_idxs: template = sidechain_templates[resname] - structure.add_side_chain(idx + 1, template) + structure.add_side_chain(idx + structure.res_nums[0], template) sidechain_placeholders.append(deepcopy(structure)) - if resname not in ["GLY", "ALA"] and idx + 1 not in retain_idxs: + if resname not in ["GLY", "ALA"] and idx + structure.res_nums[0] not in retain_idxs: n_sidechain_atoms = len(template[1]) all_indices = np.arange(len(structure.atom_labels)) energy_func = efunc_creator(structure.atom_labels, @@ -138,10 +139,10 @@ def create_side_chain_structure(inputs): all_psi = np.concatenate([all_backbone_dihedrals[::3], [np.nan]]) * 180 / np.pi structure_coords = backbone_coords accumulated_energy = energy_calculators[0](structure_coords[None], structure_coords[None, :0])[0] # energies of backbone only - + for idx, resname in enumerate(structure.residue_types): # if residue is to retain, skip building sidechain - if idx + 1 in retain_idxs: continue + if idx + structure.res_nums[0] in retain_idxs: continue # copy coordinates from the previous growing step to the current placeholder previous_coords = structure_coords # structure = deepcopy(sidechain_placeholders[idx]) @@ -173,7 +174,7 @@ def create_side_chain_structure(inputs): energies = energy_func(all_coords[:, -n_sidechain_atoms:], all_coords[:, : -n_sidechain_atoms]) minimum_energy = min(energies) # Keep track of the minimum energy so that the renormalized energies can be converted back - #print(idx, resname, len(candidate_probs), np.isinf(energies).sum()) + #print(idx+structure.res_nums[0], resname, len(candidate_probs), np.isinf(energies).sum()) # If all energies are inf, end this growth if np.isinf(energies).all(): return None, False, None, None diff --git a/src/mcsce/libs/libpdb.py b/src/mcsce/libs/libpdb.py index a1d3df8..b4bfa18 100644 --- a/src/mcsce/libs/libpdb.py +++ b/src/mcsce/libs/libpdb.py @@ -221,7 +221,7 @@ def is_pdb(datastr): """Detect if `datastr` if a PDB format v3 file.""" assert isinstance(datastr, str), \ f'`datastr` is not str: {type(datastr)} instead' - return bool(datastr.count('\nATOM ') > 0) + return bool(datastr.count('\nATOM ') > 0) or bool(datastr.count('\nHETATM ') > 0) class PDBIDFactory: diff --git a/src/mcsce/libs/libstructure.py b/src/mcsce/libs/libstructure.py index e3c48b0..e049cd8 100644 --- a/src/mcsce/libs/libstructure.py +++ b/src/mcsce/libs/libstructure.py @@ -461,7 +461,9 @@ def add_side_chain(self, res_idx, sidechain_template): sidechain_data_arr = template_structure.data_array.copy() sidechain_data_arr[:, cols_coords] = np.round(sc_all_atom_coords, decimals=3).astype(' Date: Thu, 22 Jun 2023 23:10:20 -0700 Subject: [PATCH 16/22] compatible with residue idx from > 1 start --- src/mcsce/core/check_library.py | 49 --------------------------------- 1 file changed, 49 deletions(-) delete mode 100644 src/mcsce/core/check_library.py diff --git a/src/mcsce/core/check_library.py b/src/mcsce/core/check_library.py deleted file mode 100644 index 932a723..0000000 --- a/src/mcsce/core/check_library.py +++ /dev/null @@ -1,49 +0,0 @@ -""" -This file defines the function that reads the Dunbrack protein side chain rotamer library - -Shapovalov, Maxim V., and Roland L. Dunbrack Jr. "A smoothed backbone-dependent rotamer library for proteins derived from adaptive kernel density estimates and regressions." Structure 19.6 (2011): 844-858. - -Coded by Jie Li -Date created: Jul 28, 2021 -Modified by Oufan Zhang to add ptm rotamers -""" - -from pathlib import Path -import numpy as np -from mcsce.core.definitions import ptm_aa, ptm_h - -_filepath = Path(__file__).resolve().parent # folder -_library_path = _filepath.joinpath('data', 'SimpleOpt1-5', 'ALL.bbdep.rotamers.lib') -_ptm_library_path = _filepath.joinpath('data', 'ptm_rotamer.lib') - - -def get_closest_angle(input_angle): - if 0 < input_angle < 120: - return 1 - elif -120 < input_angle < 0: - return 2 - else: - return 3 - -def calc_total_prob(residue): - data = {1: {"counts": [], "probs": []}, - 2: {"counts": [], "probs": []}, - 3: {"counts": [], "probs": []}, - } - with open(_library_path) as f: - for line in f: - line = line.strip() - if line.startswith(residue): - restype, phi, psi, count, _, _, _, _, prob, chi1, chi2, chi3, chi4, _, _, _, _= line.split() - assert restype == residue - chi1_range = get_closest_angle(float(chi1)) - data[chi1_range]["counts"].append(float(count)) - data[chi1_range]["probs"].append(float(prob)) - - for n in range(1, 4): - print(np.sum(np.array(data[n]["counts"]) * np.array(data[n]["probs"]) / np.sum(data[n]["counts"]))) - -if __name__ == "__main__": - for resn in ["SER", "THR", "TYR"]: - print(resn) - calc_total_prob(resn) From 5519447f5f01e2740152228f2c50e9c8b1f8da1a Mon Sep 17 00:00:00 2001 From: Oufan75 Date: Thu, 22 Jun 2023 23:14:35 -0700 Subject: [PATCH 17/22] compatible with residue idx from > 1 start --- .gitignore | 1 + src/mcsce/cli.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index d1221fd..9b566c8 100644 --- a/.gitignore +++ b/.gitignore @@ -115,6 +115,7 @@ venv.bak/ SimpleOpt1-5/ # in development files +src/mcsce/core/check_library.py src/mcsce/*.pdb src/mcsce/*_mcsce/ src/mcsce/*.csv diff --git a/src/mcsce/cli.py b/src/mcsce/cli.py index dc4b076..c05ea9e 100644 --- a/src/mcsce/cli.py +++ b/src/mcsce/cli.py @@ -17,7 +17,7 @@ parser.add_argument("-s", "--same_structure", action="store_true", default=False, help="When generating PDB files in a folder, whether each structure in the folder has the same amino acid sequence. When this is set to True, the energy function preparation will be done only once.") parser.add_argument("-b", "--batch_size", default=16, type=int, help="The batch size used for calculating energies for conformations. Consider decreasing batch size when encountered OOM in building.") parser.add_argument("-m", "--mode", choices=["ensemble", "simple", "exhaustive"], default="ensemble", help="This option controls whether a structural ensemble or just a single structure is created for every input structure. The default behavior is to create a structural ensemble. Simple/exhaustive modes are for creating single structure. In simple mode, n_conf structures are tried sequentially and the first valid structure is returned, and in exhaustive mode, a total number of n_conf structures will be created and the lowest energy conformation is returned.") -parser.add_argument("-f", "--fix", default=None, help="list of residue ids whose side chains are retained. Specify start to stop (inclusive) with dash, and seperate id ranges with plus. E.g. 2-5+10-13. Only supports same structure mode and input structure should contain original side chains.") +parser.add_argument("-f", "--fix", default=None, help="list of residue ids whose side chains are retained. Specify start to stop (inclusive) with dash, and seperate id ranges with plus. E.g. 2-5+10-13. Only supports same structure mode and input structure should contain original side chains for residues to be fixed. Currently assumes continuous residue ids.") parser.add_argument("-l", "--logfile", default="log.csv", help="File name of the log file") From d8b750baf04cb1fb304797150ae347373abe1c2c Mon Sep 17 00:00:00 2001 From: Oufan75 Date: Sat, 24 Jun 2023 21:37:41 -0700 Subject: [PATCH 18/22] remove added sidechains only for parts to be processed --- .gitignore | 2 +- src/mcsce/cli.py | 12 +++++------- src/mcsce/libs/libstructure.py | 2 -- 3 files changed, 6 insertions(+), 10 deletions(-) diff --git a/.gitignore b/.gitignore index 9b566c8..7c52fa5 100644 --- a/.gitignore +++ b/.gitignore @@ -119,6 +119,6 @@ src/mcsce/core/check_library.py src/mcsce/*.pdb src/mcsce/*_mcsce/ src/mcsce/*.csv -src/mcsce/test_func.py +src/mcsce/test_pdbs/ src/mcsce/core/data/glycam_06j.xml src/mcsce/core/sidechain_templates/cap_templates/ diff --git a/src/mcsce/cli.py b/src/mcsce/cli.py index c05ea9e..8402152 100644 --- a/src/mcsce/cli.py +++ b/src/mcsce/cli.py @@ -36,7 +36,7 @@ def maincli(): """Independent client entry point.""" cli(parser, main) -def read_structure_and_check(file_name): +def read_structure_and_check(file_name, retain_idx=[]): """ Helper function for reading a structure from a given filename and returns the structure object checks whether there are missing atoms in the structure and @@ -49,8 +49,7 @@ def read_structure_and_check(file_name): for resid, atom_name in missing_backbone_atoms: message += f"\n{resid} {atom_name}" print(message + "\n") - #if sc_mask is not None: - # s.coords = s.coords[sc_mask] + s = s.remove_side_chains(retain_idx) return s @@ -127,9 +126,8 @@ def main(input_structure, n_conf, n_worker, output_dir, logfile, mode, fix, batc if any(array(fix_idxs) > s.res_nums[-1]): print('--fix residue id out of range') return - #print('fixed residues:', fix_idxs) - #TODO input structure contains unnecessary sidechains - #s = s.remove_side_chains(retain_idxs=fix_idxs) + # remove added sidechains in sections to be processed + s = s.remove_side_chains(fix_idxs) initialize_func_calc(partial(prepare_energy_function, batch_size=batch_size, forcefield=ff_obj, terms=["lj", "clash", "coulomb"]), @@ -137,7 +135,7 @@ def main(input_structure, n_conf, n_worker, output_dir, logfile, mode, fix, batc if mode == "simple" and same_structure and n_worker > 1: # parallel executing sequential trials on the same structure (different conformations) t0 = datetime.now() - structures = [read_structure_and_check(f) for f in all_pdbs] + structures = [read_structure_and_check(f, fix_idxs) for f in all_pdbs] side_chain_parallel_creator = partial(create_side_chain, n_trials=n_conf, temperature=300, diff --git a/src/mcsce/libs/libstructure.py b/src/mcsce/libs/libstructure.py index e049cd8..0d65b6d 100644 --- a/src/mcsce/libs/libstructure.py +++ b/src/mcsce/libs/libstructure.py @@ -444,11 +444,9 @@ def remove_side_chains(self, retain_idxs=[]): copied_structure = deepcopy(self) retained_atoms_filter = np.array([atom in backbone_atoms for atom in copied_structure.data_array[:, col_name]]) extra_pro_H_filter = (copied_structure.data_array[:, col_name] == 'H') & (copied_structure.data_array[:, col_resName] == 'PRO') - #print(np.sum(retained_atoms_filter)) if len(retain_idxs) > 0: for resid in retain_idxs: retained_atoms_filter = retained_atoms_filter | (copied_structure.data_array[:, col_resSeq] == str(resid)) - #print(resid, np.sum(retained_atoms_filter)) retained_atoms_filter = retained_atoms_filter & (~extra_pro_H_filter) copied_structure._data_array = copied_structure.data_array[retained_atoms_filter] return copied_structure #, None if np.all(retained_atoms_filter) else retained_atoms_filter From 3439772f84faf50f32a19066e191bc00b2f17a5a Mon Sep 17 00:00:00 2001 From: Oufan75 Date: Sat, 24 Jun 2023 21:42:20 -0700 Subject: [PATCH 19/22] remove added sidechains only for parts to be processed --- src/mcsce/libs/libenergy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mcsce/libs/libenergy.py b/src/mcsce/libs/libenergy.py index 2524d40..bc5cd24 100644 --- a/src/mcsce/libs/libenergy.py +++ b/src/mcsce/libs/libenergy.py @@ -147,7 +147,7 @@ def prepare_energy_function( if clash_term: vdw_radii_sum = calc_vdw_radii_sum(atom_labels[new_indices], atom_labels[old_indices]) - vdw_radii_sum *= 0.6 # The clash check parameter as defined in the SI of the MCSCE paper + vdw_radii_sum *= 0.7 # The clash check parameter as defined in the SI of the MCSCE paper vdw_radii_sum[bonds_1_mask] = 0 vdw_radii_sum = vdw_radii_sum[None] else: From fbb619b23ed1fcecada6b4e2d4fa083cad32199b Mon Sep 17 00:00:00 2001 From: Oufan75 Date: Sat, 24 Jun 2023 21:52:51 -0700 Subject: [PATCH 20/22] remove added sidechains only for parts to be processed; increase vdw restraint --- src/mcsce/libs/libenergy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mcsce/libs/libenergy.py b/src/mcsce/libs/libenergy.py index bc5cd24..c85abd1 100644 --- a/src/mcsce/libs/libenergy.py +++ b/src/mcsce/libs/libenergy.py @@ -147,7 +147,7 @@ def prepare_energy_function( if clash_term: vdw_radii_sum = calc_vdw_radii_sum(atom_labels[new_indices], atom_labels[old_indices]) - vdw_radii_sum *= 0.7 # The clash check parameter as defined in the SI of the MCSCE paper + vdw_radii_sum *= 0.65 # The clash check parameter as defined in the SI of the MCSCE paper vdw_radii_sum[bonds_1_mask] = 0 vdw_radii_sum = vdw_radii_sum[None] else: From d0e0ba577f2b9e3c369a2a2e2b52bd42c8e94fa9 Mon Sep 17 00:00:00 2001 From: Oufan75 Date: Mon, 26 Jun 2023 13:37:36 -0700 Subject: [PATCH 21/22] add verbose flag --- src/mcsce/cli.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/src/mcsce/cli.py b/src/mcsce/cli.py index 8402152..f9974f4 100644 --- a/src/mcsce/cli.py +++ b/src/mcsce/cli.py @@ -19,6 +19,7 @@ parser.add_argument("-m", "--mode", choices=["ensemble", "simple", "exhaustive"], default="ensemble", help="This option controls whether a structural ensemble or just a single structure is created for every input structure. The default behavior is to create a structural ensemble. Simple/exhaustive modes are for creating single structure. In simple mode, n_conf structures are tried sequentially and the first valid structure is returned, and in exhaustive mode, a total number of n_conf structures will be created and the lowest energy conformation is returned.") parser.add_argument("-f", "--fix", default=None, help="list of residue ids whose side chains are retained. Specify start to stop (inclusive) with dash, and seperate id ranges with plus. E.g. 2-5+10-13. Only supports same structure mode and input structure should contain original side chains for residues to be fixed. Currently assumes continuous residue ids.") parser.add_argument("-l", "--logfile", default="log.csv", help="File name of the log file") +parser.add_argument("-v", "--verbose", default=False, help="whether to print out warnings") def load_args(parser): @@ -36,7 +37,7 @@ def maincli(): """Independent client entry point.""" cli(parser, main) -def read_structure_and_check(file_name, retain_idx=[]): +def read_structure_and_check(file_name, retain_idx=[], verbose=False): """ Helper function for reading a structure from a given filename and returns the structure object checks whether there are missing atoms in the structure and @@ -48,7 +49,10 @@ def read_structure_and_check(file_name, retain_idx=[]): message = f"WARNING! These atoms are missing from the current backbone structure [{file_name}]:" for resid, atom_name in missing_backbone_atoms: message += f"\n{resid} {atom_name}" - print(message + "\n") + if verbose: + print(message + "\n") + else: + print("WARNING! %d missing atoms in total"%(len(message.split("\n")) - 1)) s = s.remove_side_chains(retain_idx) return s @@ -56,7 +60,7 @@ def read_structure_and_check(file_name, retain_idx=[]): -def main(input_structure, n_conf, n_worker, output_dir, logfile, mode, fix, batch_size=4, same_structure=False): +def main(input_structure, n_conf, n_worker, output_dir, logfile, mode, fix, batch_size=4, same_structure=False, verbose=False): # antipattern to save time from mcsce.core.side_chain_builder import initialize_func_calc, create_side_chain_ensemble, create_side_chain @@ -87,8 +91,9 @@ def main(input_structure, n_conf, n_worker, output_dir, logfile, mode, fix, batc n_worker = multiprocessing.cpu_count() - 1 else: n_worker = n_worker - print("# workers:", n_worker) - print("mode: ", mode) + if verbose: + print("# workers:", n_worker) + print("mode: ", mode) if input_structure[-3:].upper() == "PDB": all_pdbs = [input_structure] @@ -135,7 +140,7 @@ def main(input_structure, n_conf, n_worker, output_dir, logfile, mode, fix, batc if mode == "simple" and same_structure and n_worker > 1: # parallel executing sequential trials on the same structure (different conformations) t0 = datetime.now() - structures = [read_structure_and_check(f, fix_idxs) for f in all_pdbs] + structures = [read_structure_and_check(f, fix_idxs, verbose) for f in all_pdbs] side_chain_parallel_creator = partial(create_side_chain, n_trials=n_conf, temperature=300, @@ -173,7 +178,7 @@ def main(input_structure, n_conf, n_worker, output_dir, logfile, mode, fix, batc output_dir = base_dir + "/" + os.path.splitext(os.path.basename(f))[0] + "_mcsce" if not os.path.exists(output_dir) and mode == "ensemble": os.makedirs(output_dir) - s = read_structure_and_check(f) + s = read_structure_and_check(f, fix_idxs, verbose=verbose) if not same_structure: From 02b107fac251a7f1bfb367d5e3873cbae06b9835 Mon Sep 17 00:00:00 2001 From: Jerry Date: Tue, 25 Jul 2023 15:49:07 -0700 Subject: [PATCH 22/22] bug fixes to allow code to run --- src/mcsce/libs/libenergy.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/mcsce/libs/libenergy.py b/src/mcsce/libs/libenergy.py index 0999ec5..de01978 100644 --- a/src/mcsce/libs/libenergy.py +++ b/src/mcsce/libs/libenergy.py @@ -149,7 +149,7 @@ def prepare_energy_function( if clash_term: vdw_radii_sum = calc_vdw_radii_sum(atom_labels[new_indices], atom_labels[old_indices]) - vdw_radii_sum *= 0.65 # The clash check parameter as defined in the SI of the MCSCE paper + vdw_radii_sum *= 0.6 # The clash check parameter as defined in the SI of the MCSCE paper vdw_radii_sum[bonds_1_mask] = 0 vdw_radii_sum = vdw_radii_sum[None] else: @@ -190,6 +190,8 @@ def prepare_energy_function( new_indices, old_indices, forcefield.forcefield, + N_terminal_indicators, + C_terminal_indicators, ) if coulomb_term: @@ -489,14 +491,16 @@ def create_Coulomb_params_raw( new_indices, old_indices, force_field, + n_terminal_indicators, + c_terminal_indicators, ): """Borrowed from IDP Conformer Generator package (https://github.com/julie-forman-kay-lab/IDPConformerGenerator) developed by Joao M. C. Teixeira""" charges_i_new = extract_ff_params_for_seq( atom_labels[new_indices], residue_numbers[new_indices], residue_labels[new_indices], - min(residue_numbers), - max(residue_numbers), + n_terminal_indicators[new_indices], + c_terminal_indicators[new_indices], force_field, 'charge', ) @@ -505,8 +509,8 @@ def create_Coulomb_params_raw( atom_labels[old_indices], residue_numbers[old_indices], residue_labels[old_indices], - min(residue_numbers), - max(residue_numbers), + n_terminal_indicators[old_indices], + c_terminal_indicators[old_indices], force_field, 'charge', )