Merge pull request #478 from carlocamilloni/mglob_prior

Mglob prior
multi-ego · Nov 5, 2024 · a2dd33b · a2dd33b
2 parents 705fad6 + 93c03b7
commit a2dd33b
Show file tree

Hide file tree

Showing 5 changed files with 279 additions and 348 deletions.
diff --git a/multiego.py b/multiego.py
@@ -1,7 +1,6 @@
 import argparse
 import sys
 import os
-import numpy as np
 import parmed as pmd
 import pandas as pd
 import time
@@ -111,7 +110,7 @@ def meGO_parsing():
     elif not multi_flag:
         args.names = topol_names
 
-    if args.egos != "rc" and not args.reference:
+    if args.egos == "production" and not args.reference:
         args.reference = ["reference"]
 
     if args.epsilon and not args.inter_epsilon:
@@ -128,7 +127,7 @@ def meGO_parsing():
         args.multi_epsilon_inter = {k1: {k2: args.inter_epsilon for k2 in args.names} for k1 in args.names}
 
     # check all epsilons are set and greater than epsilon_min
-    if args.egos != "rc":
+    if args.egos == "production":
         for k, v in args.multi_epsilon_intra.items():
             if v < args.epsilon_min:
                 print("ERROR: epsilon value for " + k + " is less than epsilon_min")
@@ -180,15 +179,16 @@ def main():
     """
 
     bt = time.time()
-    print("Multi-eGO\n")
     args, custom_dict = meGO_parsing()
 
     if not args.no_header:
         generate_face.print_welcome()
 
+    print(f"Multi-eGO: {args.egos}\n")
+
     print("- Checking for input files and folders")
     io.check_files_existence(args)
-    if args.egos != "rc":
+    if args.egos == "production":
         io.check_matrix_format(args)
 
     print("- Processing Multi-eGO topology")
@@ -203,7 +203,7 @@ def main():
     st = et
     print("- Done in:", elapsed_time, "seconds")
 
-    if args.egos != "rc":
+    if args.egos == "production":
         print("- Processing Multi-eGO contact matrices")
         meGO_ensembles, matrices = ensemble.init_meGO_matrices(meGO_ensembles, args, custom_dict)
         et = time.time()
@@ -231,45 +231,16 @@ def main():
         st = et
         print("- Done in:", elapsed_time, "seconds")
     else:
-        print("- Generate LJ dataset")
-        meGO_LJ, meGO_LJ_14 = ensemble.generate_basic_LJ(meGO_ensembles, args)
-        # meGO_LJ_14 = pairs14
-        meGO_LJ_14 = pd.concat([meGO_LJ_14, pairs14])
-        needed_fields = [
-            "ai",
-            "aj",
-            "type",
-            "c6",
-            "c12",
-            "sigma",
-            "epsilon",
-            "probability",
-            "rc_probability",
-            "md_threshold",
-            "rc_threshold",
-            "rep",
-            "cutoff",
-            "molecule_name_ai",
-            "molecule_name_aj",
-            "same_chain",
-            "source",
-            "number_ai",
-            "number_aj",
-        ]
-        meGO_LJ_14 = meGO_LJ_14[needed_fields]
-        meGO_LJ_14["epsilon"] = -meGO_LJ_14["c12"]
-        meGO_LJ_14.reset_index(inplace=True)
-        # Sorting the pairs prioritising intermolecular interactions
-        meGO_LJ_14.sort_values(by=["ai", "aj", "c12"], ascending=[True, True, True], inplace=True)
-        # Cleaning the duplicates
-        meGO_LJ_14 = meGO_LJ_14.drop_duplicates(subset=["ai", "aj"], keep="first")
+        print("- Generate the LJ dataset")
+        meGO_LJ = ensemble.generate_rc_LJ(meGO_ensembles)
+        meGO_LJ_14 = pairs14
         et = time.time()
         elapsed_time = et - st
         st = et
         print("- Done in:", elapsed_time, "seconds")
 
     print("- Finalize pairs and exclusions")
-    meGO_LJ_14 = ensemble.make_pairs_exclusion_topology(meGO_ensembles, meGO_LJ_14)
+    meGO_LJ_14 = ensemble.make_pairs_exclusion_topology(meGO_ensembles, meGO_LJ_14, args)
     et = time.time()
     elapsed_time = et - st
     st = et

diff --git a/src/multiego/arguments.py b/src/multiego/arguments.py
@@ -5,14 +5,11 @@
     },
     "--egos": {
         "type": str,
-        "choices": ["rc", "production"],
+        "choices": ["rc", "mg", "production"],
         "help": "rc: creates a force-field for random coil simulations. "
+        "mg: creates a force-field for molten globule simulations."
         "production: creates a force-field combining random coil simulations and training simulations.",
     },
-    "--epsilon": {
-        "type": float,
-        "help": "Maximum interaction energy per contact. The typical range is 0.2-0.4 kJ/mol",
-    },
     "--reference": {
         "type": lambda x: x.split(","),
         "default": [],
@@ -25,13 +22,17 @@
         "help": "A list of the training simulations to be included in multi-eGO, "
         "corresponding to the subfolders to process and where the contacts are learned.",
     },
+    "--epsilon": {
+        "type": float,
+        "help": "Maximum interaction energy per contact.",
+    },
     "--inter_epsilon": {
         "type": float,
-        "help": "Maximum interaction energy per intermolecular contacts. The typical range is 0.2-0.4 kJ/mol",
+        "help": "Maximum interaction energy per intermolecular contacts.",
     },
     "--inter_domain_epsilon": {
         "type": float,
-        "help": "Maximum interaction energy per interdomain contacts. The typical range is 0.2-0.4 kJ/mol",
+        "help": "Maximum interaction energy per interdomain contacts.",
     },
     "--p_to_learn": {
         "type": float,