diff --git a/src/mcsce/cli.py b/src/mcsce/cli.py index f9974f4..1adaca4 100644 --- a/src/mcsce/cli.py +++ b/src/mcsce/cli.py @@ -44,6 +44,18 @@ def read_structure_and_check(file_name, retain_idx=[], verbose=False): """ s = Structure(file_name) s.build() + his_id = [str(resid) for resid, res_type in zip(s.residues, s.residue_types) if res_type == "HIS"] + if len(his_id) > 0: + message = f"WARNING! Undefined histidine protonation status found for structure [{file_name}]:\n" + message += " ".join(his_id) + message += "\nThese residues have been modified to HIP\n" + res_labels = s.res_labels + res_labels[res_labels == "HIS"] = "HIP" + s.res_labels = res_labels + if verbose: + print(message) + else: + print("HIS residues modified to HIP\n") missing_backbone_atoms = s.check_backbone_atom_completeness() if len(missing_backbone_atoms) > 0: message = f"WARNING! These atoms are missing from the current backbone structure [{file_name}]:" @@ -133,7 +145,7 @@ def main(input_structure, n_conf, n_worker, output_dir, logfile, mode, fix, batc return # remove added sidechains in sections to be processed s = s.remove_side_chains(fix_idxs) - + initialize_func_calc(partial(prepare_energy_function, batch_size=batch_size, forcefield=ff_obj, terms=["lj", "clash", "coulomb"]), structure=s, retain_idxs=fix_idxs) diff --git a/src/mcsce/core/side_chain_builder.py b/src/mcsce/core/side_chain_builder.py index e7ffc92..a7755ff 100644 --- a/src/mcsce/core/side_chain_builder.py +++ b/src/mcsce/core/side_chain_builder.py @@ -74,23 +74,31 @@ def initialize_func_calc(efunc_creator, aa_seq=None, structure=None, retain_idxs # extract amino acid sequence from structure object aa_seq = structure.residue_types structure = deepcopy(structure) + chain_ids = structure.residue_chains sidechain_placeholders.append(deepcopy(structure)) + n_terms, c_terms = structure.get_terminal_res_atom_arr() energy_calculators.append(efunc_creator(structure.atom_labels, structure.res_nums, - structure.res_labels)) - for idx, resname in tqdm(enumerate(aa_seq), total=len(aa_seq)): + structure.res_labels, + n_terms, + c_terms)) + + for idx, resname, chain_id in tqdm(zip(range(len(aa_seq)), aa_seq, chain_ids), total=len(aa_seq)): if idx + structure.res_nums[0] not in retain_idxs: template = sidechain_templates[resname] - structure.add_side_chain(idx + structure.res_nums[0], template) + structure.add_side_chain(idx + structure.res_nums[0], template, chain_id) sidechain_placeholders.append(deepcopy(structure)) if resname not in ["GLY", "ALA"] and idx + structure.res_nums[0] not in retain_idxs: n_sidechain_atoms = len(template[1]) all_indices = np.arange(len(structure.atom_labels)) + n_terms, c_terms = structure.get_terminal_res_atom_arr() energy_func = efunc_creator(structure.atom_labels, structure.res_nums, structure.res_labels, + n_terms, + c_terms, partial_indices=[all_indices[-n_sidechain_atoms:], all_indices[:-n_sidechain_atoms]]) energy_calculators.append(energy_func) diff --git a/src/mcsce/libs/libenergy.py b/src/mcsce/libs/libenergy.py index c85abd1..0999ec5 100644 --- a/src/mcsce/libs/libenergy.py +++ b/src/mcsce/libs/libenergy.py @@ -28,6 +28,8 @@ def prepare_energy_function( atom_labels, residue_numbers, residue_labels, + N_terminal_indicators, + C_terminal_indicators, forcefield, batch_size=16, partial_indices=None, @@ -162,6 +164,8 @@ def prepare_energy_function( new_indices, old_indices, forcefield.forcefield, + N_terminal_indicators, + C_terminal_indicators, ) # 0.2 as 0.4 @@ -387,7 +391,7 @@ def create_bonds_apart_mask_for_ij_pairs_old( residue_labels_ij_gen, residue_numbers_ij_gen, atom_labels_ij_gen, - bonds_intra, + bonds_atom_labelsintra, bonds_inter, ) @@ -407,15 +411,18 @@ def create_LJ_params_raw( new_indices, old_indices, force_field, + n_terminal_indicators, + c_terminal_indicators, ): """Create ACOEFF and BCOEFF parameters. Borrowed from IDP Conformer Generator package (https://github.com/julie-forman-kay-lab/IDPConformerGenerator) developed by Joao M. C. Teixeira""" + sigmas_ii_new = extract_ff_params_for_seq( atom_labels[new_indices], residue_numbers[new_indices], residue_labels[new_indices], - min(residue_numbers), - max(residue_numbers), + n_terminal_indicators[new_indices], + c_terminal_indicators[new_indices], force_field, 'sigma', ) @@ -424,8 +431,8 @@ def create_LJ_params_raw( atom_labels[old_indices], residue_numbers[old_indices], residue_labels[old_indices], - min(residue_numbers), - max(residue_numbers), + n_terminal_indicators[old_indices], + c_terminal_indicators[old_indices], force_field, 'sigma', ) @@ -434,8 +441,8 @@ def create_LJ_params_raw( atom_labels[new_indices], residue_numbers[new_indices], residue_labels[new_indices], - min(residue_numbers), - max(residue_numbers), + n_terminal_indicators[new_indices], + c_terminal_indicators[new_indices], force_field, 'epsilon', ) @@ -444,8 +451,8 @@ def create_LJ_params_raw( atom_labels[old_indices], residue_numbers[old_indices], residue_labels[old_indices], - min(residue_numbers), - max(residue_numbers), + n_terminal_indicators[old_indices], + c_terminal_indicators[old_indices], force_field, 'epsilon', ) @@ -463,6 +470,7 @@ def create_LJ_params_raw( # mixing rules epsilons_ij = epsilons_ij_pre ** 0.5 + # mixing + nm to Angstrom converstion # / 2 and * 10 sigmas_ij = sigmas_ij_pre * 5 diff --git a/src/mcsce/libs/libparse.py b/src/mcsce/libs/libparse.py index 9414baf..13cbfb2 100644 --- a/src/mcsce/libs/libparse.py +++ b/src/mcsce/libs/libparse.py @@ -415,15 +415,16 @@ def extract_ff_params_for_seq( params_l = [] params_append = params_l.append - zipit = zip(atom_labels, residue_numbers, residue_labels) - for atom_name, res_num, res_label in zipit: - #print(atom_name, res_num, res_label) + zipit = zip(atom_labels, residue_numbers, residue_labels, n_terminal_idx, c_terminal_idx) + for atom_name, res_num, res_label, is_nterm, is_cterm in zipit: + # adds N and C to the terminal residues - if res_num == n_terminal_idx: + + if is_nterm: res = 'N' + res_label assert res.isupper() and len(res) == 4, res - elif res_num == c_terminal_idx: + elif is_cterm: res = 'C' + res_label assert res.isupper() and len(res) == 4, res else: diff --git a/src/mcsce/libs/libstructure.py b/src/mcsce/libs/libstructure.py index 0d65b6d..44f53f3 100644 --- a/src/mcsce/libs/libstructure.py +++ b/src/mcsce/libs/libstructure.py @@ -158,6 +158,29 @@ def coords(self): def coords(self, coords): self.data_array[:, cols_coords] = \ np.round(coords, decimals=3).astype('