Skip to content

Commit

Permalink
Merge pull request #465 from carlocamilloni/mglob_prior
Browse files Browse the repository at this point in the history
Mglob prior
  • Loading branch information
carlocamilloni authored Sep 5, 2024
2 parents 32a7809 + 5e31096 commit ae2b31a
Show file tree
Hide file tree
Showing 5 changed files with 399 additions and 83 deletions.
34 changes: 32 additions & 2 deletions multiego.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import sys
import os
import numpy as np
import pandas as pd

from src.multiego import ensemble
from src.multiego import io
Expand Down Expand Up @@ -406,9 +407,38 @@ def get_meGO_LJ(meGO_ensemble, args):
"""
pairs14, exclusion_bonds14 = ensemble.generate_14_data(meGO_ensemble)
if args.egos == "rc":
meGO_LJ = ensemble.generate_basic_LJ(meGO_ensemble, args)
meGO_LJ_14 = pairs14
meGO_LJ, meGO_LJ_14 = ensemble.generate_basic_LJ(meGO_ensemble, args)
# meGO_LJ_14 = pairs14
meGO_LJ_14 = pd.concat([meGO_LJ_14, pairs14])
needed_fields = [
"ai",
"aj",
"type",
"c6",
"c12",
"sigma",
"epsilon",
"probability",
"rc_probability",
"md_threshold",
"rc_threshold",
"rep",
"cutoff",
"molecule_name_ai",
"molecule_name_aj",
"same_chain",
"source",
"number_ai",
"number_aj",
]
meGO_LJ_14 = meGO_LJ_14[needed_fields]
meGO_LJ_14["epsilon"] = -meGO_LJ_14["c12"]
meGO_LJ_14.reset_index(inplace=True)
# Sorting the pairs prioritising intermolecular interactions
meGO_LJ_14.sort_values(by=["ai", "aj", "c12"], ascending=[True, True, True], inplace=True)
# Cleaning the duplicates
meGO_LJ_14 = meGO_LJ_14.drop_duplicates(subset=["ai", "aj"], keep="first")
print(meGO_LJ_14)
else:
train_dataset, check_dataset = ensemble.init_LJ_datasets(meGO_ensemble, pairs14, exclusion_bonds14, args)
meGO_LJ, meGO_LJ_14 = ensemble.generate_LJ(meGO_ensemble, train_dataset, check_dataset, args)
Expand Down
132 changes: 107 additions & 25 deletions src/multiego/ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -493,7 +493,7 @@ def init_meGO_ensemble(args):
return ensemble

reference_set = set(ensemble["topology_dataframe"]["name"].to_list())
unique_ref_molecule_names = topology_dataframe["molecule_name"].unique()
# unique_ref_molecule_names = topology_dataframe["molecule_name"].unique()

# now we process the train contact matrices
train_contact_matrices = {}
Expand Down Expand Up @@ -521,7 +521,7 @@ def init_meGO_ensemble(args):
_,
) = initialize_topology(topology, custom_dict, args)
# check that the molecules defined have a reference
unique_temp_molecule_names = temp_topology_dataframe["molecule_name"].unique()
# unique_temp_molecule_names = temp_topology_dataframe["molecule_name"].unique()
# check_molecule_names(unique_ref_molecule_names, unique_temp_molecule_names)

train_topology_dataframe = pd.concat(
Expand Down Expand Up @@ -596,7 +596,7 @@ def init_meGO_ensemble(args):
_,
) = initialize_topology(topology, custom_dict, args)
# check that the molecules defined have a reference
unique_temp_molecule_names = temp_topology_dataframe["molecule_name"].unique()
# unique_temp_molecule_names = temp_topology_dataframe["molecule_name"].unique()
# check_molecule_names(unique_ref_molecule_names, unique_temp_molecule_names)
check_topology_dataframe = pd.concat(
[check_topology_dataframe, temp_topology_dataframe],
Expand Down Expand Up @@ -762,6 +762,7 @@ def generate_14_data(meGO_ensemble):
pairs["aj"] = pairs["aj"].map(type_atnum_dict)
pairs["rep"] = pairs["c12"]
pairs["same_chain"] = True
pairs["1-4"] = "1_4"
else:
pairs["ai"] = meGO_ensemble["user_pairs"][molecule].ai.astype(str)
pairs["aj"] = meGO_ensemble["user_pairs"][molecule].aj.astype(str)
Expand Down Expand Up @@ -979,6 +980,10 @@ def init_LJ_datasets(meGO_ensemble, pairs14, exclusion_bonds14, args):
return train_dataset, check_dataset


def get_residue_number(s):
return int(s.split("_")[-1])


def generate_basic_LJ(meGO_ensemble, args):
"""
Generates basic LJ (Lennard-Jones) interactions DataFrame within a molecular ensemble.
Expand Down Expand Up @@ -1021,6 +1026,7 @@ def generate_basic_LJ(meGO_ensemble, args):
"cutoff",
"rep",
"att",
"bare",
]

basic_LJ = pd.DataFrame()
Expand Down Expand Up @@ -1059,15 +1065,37 @@ def generate_basic_LJ(meGO_ensemble, args):
c6_list = ai_name.map(name_to_c6).to_numpy()
ai_name = ai_name.to_numpy(dtype=str)
oxygen_mask = masking.create_array_mask(ai_name, ai_name, [("O", "OM"), ("O", "O"), ("OM", "OM")], symmetrize=True)
bb_mask = masking.create_array_mask(
ca_mask = masking.create_array_mask(
ai_name,
ai_name,
[
("CH1", "CH1"),
("CH1", "C"),
("CH1", "N"),
("CH1", "S"),
("CH1", "CH"),
("CH1", "CH2"),
("CH1", "CH3"),
("CH1", "CH2r"),
("CH1", "CH"),
("CH1", "CH3"),
("CH1", "OA"),
("NL", "CH1"),
("NZ", "CH1"),
("NE", "CH1"),
("NT", "CH1"),
("NR", "CH1"),
("N", "N"),
("N", "NT"),
("NT", "NT"),
("C", "C"),
("S", "S"),
],
symmetrize=True,
)
bb_mask = masking.create_array_mask(
ai_name,
ai_name,
[
("CH1", "CH1"),
("CH1", "C"),
("CH1", "N"),
("C", "C"),
Expand Down Expand Up @@ -1138,36 +1166,74 @@ def generate_basic_LJ(meGO_ensemble, args):
],
symmetrize=True,
)
hydrophobic_w_mask = masking.create_array_mask(
ai_name,
ai_name,
[
("CH1", "CH2"),
("CH1", "CH3"),
("CH1", "CH2r"),
("CH1", "CH"),
],
symmetrize=True,
)
basic_LJ["type"] = 1
basic_LJ["source"] = "basic"
basic_LJ["same_chain"] = True
basic_LJ.c6 = 0.0
basic_LJ.c12 = np.sqrt(bare_c12_list * bare_c12_list[:, np.newaxis]).flatten()
basic_LJ.bare = np.sqrt(bare_c12_list * bare_c12_list[:, np.newaxis]).flatten()
basic_LJ.rep = np.sqrt(c12_list * c12_list[:, np.newaxis]).flatten()
basic_LJ.att = np.sqrt(c6_list * c6_list[:, np.newaxis]).flatten()
oxygen_LJ = basic_LJ[oxygen_mask].copy()
oxygen_LJ["c12"] *= 11.4
oxygen_LJ["c6"] = 0.0
hydrophobic_LJ = basic_LJ[hydrophobic_mask].copy()
hydrophobic_LJ["c12"] = 0.20 * hydrophobic_LJ["rep"]
hydrophobic_LJ["c6"] = 0.20 * hydrophobic_LJ["att"]
bb_LJ = basic_LJ[bb_mask].copy()
bb_LJ["c12"] = 0.25 * bb_LJ["rep"]
bb_LJ["c6"] = 0.25 * bb_LJ["att"]
hbond_LJ = basic_LJ[hbond_mask].copy()
hbond_LJ["c12"] = 0.25 * hbond_LJ["rep"]
hbond_LJ["c6"] = 0.25 * hbond_LJ["att"]
catpi_LJ = basic_LJ[catpi_mask].copy()
catpi_LJ["c12"] = 0.20 * catpi_LJ["rep"]
catpi_LJ["c6"] = 0.20 * catpi_LJ["att"]
basic_LJ = pd.concat([oxygen_LJ, hbond_LJ, hydrophobic_LJ, catpi_LJ, bb_LJ])
# oxygen_LJ = basic_LJ[oxygen_mask].copy()
# oxygen_LJ["c12"] *= 11.4
# oxygen_LJ["c6"] = 0.0
# hydrophobic_LJ = basic_LJ[hydrophobic_mask].copy()
# hydrophobic_LJ["c12"] = 0.25 * hydrophobic_LJ["rep"]
# hydrophobic_LJ["c6"] = 0.25 * hydrophobic_LJ["att"]
# hydrophobic_w_LJ = basic_LJ[hydrophobic_w_mask].copy()
# hydrophobic_w_LJ["c12"] = 0.12 * hydrophobic_w_LJ["rep"]
# hydrophobic_w_LJ["c6"] = 0.12 * hydrophobic_w_LJ["att"]
# bb_LJ = basic_LJ[bb_mask].copy()
# bb_LJ["c12"] = 0.275 * bb_LJ["rep"]
# bb_LJ["c6"] = 0.275 * bb_LJ["att"]
# hbond_LJ = basic_LJ[hbond_mask].copy()
# hbond_LJ["c12"] = 0.275 * hbond_LJ["rep"]
# hbond_LJ["c6"] = 0.275 * hbond_LJ["att"]
# catpi_LJ = basic_LJ[catpi_mask].copy()
# catpi_LJ["c12"] = 0.20 * catpi_LJ["rep"]
# catpi_LJ["c6"] = 0.20 * catpi_LJ["att"]
# basic_LJ = pd.concat([oxygen_LJ, hbond_LJ, hydrophobic_LJ, hydrophobic_w_LJ, catpi_LJ, bb_LJ])
basic_LJ["intra_domain"] = True
basic_LJ["rep"] = basic_LJ["c12"]
basic_LJ = basic_LJ.loc[(basic_LJ["c6"] == 0.0) | ((basic_LJ["c6"] ** 2 / (4.0 * basic_LJ["c12"])) > args.epsilon_min)]
basic_LJ["residue_ai"] = basic_LJ["ai"].apply(get_residue_number)
basic_LJ["residue_aj"] = basic_LJ["aj"].apply(get_residue_number)
basic_LJ.loc[oxygen_mask, "c12"] *= 11.4
basic_LJ.loc[oxygen_mask, "c6"] = 0.0
basic_LJ.loc[~oxygen_mask, "c12"] = 0.195 * basic_LJ["rep"]
basic_LJ.loc[~oxygen_mask, "c6"] = 0.195 * basic_LJ["att"]
basic_LJ_14 = basic_LJ.copy()
basic_LJ_14 = basic_LJ_14[(~oxygen_mask & ~ca_mask)]
basic_LJ_14 = basic_LJ_14.loc[
(basic_LJ["same_chain"]) & ((basic_LJ["residue_ai"] - basic_LJ["residue_aj"]).abs() <= 2)
]
basic_LJ = basic_LJ[~ca_mask]
basic_LJ["rep"] = basic_LJ["bare"]
basic_LJ_14["c12"] = basic_LJ_14["bare"]
basic_LJ_14["c6"] = 0.0
basic_LJ_14["rep"] = basic_LJ_14["c12"]
# basic_LJ = basic_LJ.loc[(basic_LJ["c6"] == 0.0) | ((basic_LJ["c6"] ** 2 / (4.0 * basic_LJ["c12"])) > args.epsilon_min)]
basic_LJ["index_ai"], basic_LJ["index_aj"] = basic_LJ[["index_ai", "index_aj"]].min(axis=1), basic_LJ[
["index_ai", "index_aj"]
].max(axis=1)
basic_LJ = basic_LJ.drop_duplicates(subset=["index_ai", "index_aj", "same_chain"], keep="first")
basic_LJ.sort_values(by=["index_ai", "index_aj"], inplace=True)
basic_LJ = basic_LJ.drop(["index_ai", "index_aj"], axis=1)
basic_LJ_14["index_ai"], basic_LJ_14["index_aj"] = basic_LJ_14[["index_ai", "index_aj"]].min(axis=1), basic_LJ_14[
["index_ai", "index_aj"]
].max(axis=1)
basic_LJ_14 = basic_LJ_14.drop_duplicates(subset=["index_ai", "index_aj", "same_chain"], keep="first")
basic_LJ_14.sort_values(by=["index_ai", "index_aj"], inplace=True)
basic_LJ_14 = basic_LJ_14.drop(["index_ai", "index_aj"], axis=1)

for name in meGO_ensemble["reference_matrices"].keys():
temp_basic_LJ = pd.DataFrame(columns=columns)
Expand Down Expand Up @@ -1218,8 +1284,24 @@ def generate_basic_LJ(meGO_ensemble, args):
basic_LJ.sort_values(by=["ai", "aj", "same_chain"], ascending=[True, True, True], inplace=True)
# Cleaning the duplicates
basic_LJ = basic_LJ.drop_duplicates(subset=["ai", "aj"], keep="first")
basic_LJ_14["probability"] = 1.0
basic_LJ_14["rc_probability"] = 1.0
basic_LJ_14["rc_threshold"] = 1.0
basic_LJ_14["md_threshold"] = 1.0
basic_LJ_14["epsilon"] = -basic_LJ_14["c12"]
basic_LJ_14.loc[basic_LJ_14["c6"] > 0, "epsilon"] = basic_LJ_14["c6"] ** 2 / (4.0 * basic_LJ_14["c12"])
basic_LJ_14["cutoff"] = 1.45 * basic_LJ_14["c12"] ** (1.0 / 12.0)
basic_LJ_14["sigma"] = basic_LJ_14["cutoff"] / (2.0 ** (1.0 / 6.0))
basic_LJ_14.loc[basic_LJ_14["c6"] > 0, "sigma"] = (basic_LJ_14["c12"] / basic_LJ_14["c6"]) ** (1 / 6)
basic_LJ_14["distance"] = basic_LJ_14["cutoff"]
basic_LJ_14["learned"] = 0
basic_LJ_14["1-4"] = "1>4"
# Sorting the pairs prioritising intermolecular interactions
basic_LJ_14.sort_values(by=["ai", "aj", "same_chain"], ascending=[True, True, True], inplace=True)
# Cleaning the duplicates
basic_LJ_14 = basic_LJ_14.drop_duplicates(subset=["ai", "aj"], keep="first")

return basic_LJ
return basic_LJ, basic_LJ_14


def set_epsilon(meGO_LJ):
Expand Down
61 changes: 56 additions & 5 deletions tools/box_concentration/get_box.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,64 @@
import argparse
import numpy as np

if __name__ == "__main__":
parser = argparse.ArgumentParser(description="TODO!")
parser.add_argument("--n_mol", type=int, required=True, help="Number of molecules to simulate.")
parser.add_argument("--conc", type=float, required=True, help="Concentration in Molar")
parser = argparse.ArgumentParser(
description="Calculate number of moles, concentration, box size or volume based on provided parameters."
)
parser.add_argument(
"--n_mol",
type=int,
required=False,
help="Number of molecules, to be combined with --conc or one from (--volume, --sphere_r, --cubic_side)",
)
parser.add_argument(
"--conc",
type=float,
required=False,
help="Concentration in Molar, to be combined with --n_mol or one from (--volume, --sphere_r, --cubic_side)",
)
parser.add_argument("--volume", type=float, required=False, help="Volume in nm^3, to be combined with --n_mol or --conc")
parser.add_argument("--sphere_r", type=float, required=False, help="Radius of a sphere in nm")
parser.add_argument("--cubic_side", type=float, required=False, help="Side of a cube in nm")

args = parser.parse_args()

# Print help if no arguments are provided
if not any(vars(args).values()):
parser.print_help()
exit(0)

n_mol = args.n_mol
conc = args.conc
box = 10**8 * (n_mol / (conc * 6.022 * 10**23)) ** (1 / 3)
print("cubic box of side: %.5f nm" % box)
volume = args.volume
sphere_r = args.sphere_r
cubic_side = args.cubic_side

if n_mol is not None and n_mol > 0 and conc is not None and conc > 0:
box = 10**8 * (n_mol / (conc * 6.022 * 10**23)) ** (1 / 3)
print("cubic box of side: %.5f nm" % box)
exit()

if sphere_r is not None and sphere_r > 0:
volume = (4 / 3) * np.pi * sphere_r**3

if cubic_side is not None and cubic_side > 0:
volume = cubic_side**3

if n_mol is not None and n_mol > 0 and volume is not None and volume > 0:
conc = 10**24 * (n_mol / (volume * 6.022 * 10**23))
print("concentration is: %.12f M" % conc)
exit()

if conc is not None and conc > 0 and volume is not None and volume > 0:
n_mol = int(round((volume * conc * 6.022 * 10**23) / 10**24))
print("n_mol is: %i" % n_mol)
exit()

if sphere_r is not None and sphere_r > 0:
volume = (4 / 3) * np.pi * sphere_r**3
side = volume ** (1 / 3)
print("volume is %.6f nm^3 and cubic side would be %.6f nm" % (volume, side))
exit()

parser.print_help()
2 changes: 1 addition & 1 deletion tools/cmdata/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ int main(int argc, const char** argv)
{"mol_cutoff", '\0', POPT_ARG_DOUBLE | POPT_ARGFLAG_OPTIONAL, &mol_cutoff, 0, "Molecule cutoff distance", "DOUBLE"},
{"nskip", '\0', POPT_ARG_INT | POPT_ARGFLAG_OPTIONAL, &nskip, 0, "Number of frames to skip", "INT"},
{"num_threads", '\0', POPT_ARG_INT | POPT_ARGFLAG_OPTIONAL, &num_threads, 0, "Number of threads", "INT"},
{"num_threads", '\0', POPT_ARG_INT | POPT_ARGFLAG_OPTIONAL, &mol_threads, 0, "Number of molecule threads", "INT"},
{"mol_threads", '\0', POPT_ARG_INT | POPT_ARGFLAG_OPTIONAL, &mol_threads, 0, "Number of molecule threads", "INT"},
{"mode", '\0', POPT_ARG_STRING | POPT_ARGFLAG_OPTIONAL, &p_mode, 0, "Mode of operation", "STRING"},
{"weights", '\0', POPT_ARG_STRING | POPT_ARGFLAG_OPTIONAL, &p_weights_path, 0, "Weights file", "FILE"},
{"no_pbc", '\0', POPT_ARG_NONE | POPT_ARGFLAG_OPTIONAL, &p_nopbc, 0, "Ignore pbcs", 0},
Expand Down
Loading

0 comments on commit ae2b31a

Please sign in to comment.