From 1b7e02e81ff22ac62a1f0f1323f985a7672b186c Mon Sep 17 00:00:00 2001
From: Wei-Tse Hsu <wehs7661@colorado.edu>
Date: Mon, 17 Jul 2023 21:46:04 -0500
Subject: [PATCH] Added the YAML parameter mdp_args to enable differing
 parameters across replicas

---
 docs/simulations.rst                   |  9 +++++-
 ensemble_md/ensemble_EXE.py            | 38 ++++++++++++++++++++++++--
 ensemble_md/tests/test_ensemble_EXE.py |  3 --
 ensemble_md/utils/utils.py             |  2 +-
 4 files changed, 44 insertions(+), 8 deletions(-)

diff --git a/docs/simulations.rst b/docs/simulations.rst
index b25833b7..7aacefeb 100644
--- a/docs/simulations.rst
+++ b/docs/simulations.rst
@@ -196,7 +196,7 @@ status of the previous iteration.
 This means:
 
 * For each replica, the input configuration for initializing a new iterations should be the output configuraiton of the previous iteration. For example, if the final configurations are represented by :code:`[1, 2, 0, 3]` (returned by :obj:`.get_swapped_configs`), then in the next iteration, replica 0 should be initialized by the output configuration of replica 1 in the previous iteration, while replica 3 can just inherit the output configuration from previous iteration of the same replica. Notably, instead of exchanging the MDP files, we recommend swapping out the coordinate files to exchange replicas.
-* For each replica, the MDP file for the new iteration should be the same as the one used in the previous iteartion of the same replica except that parameters like :code:`tinit`, :code:`init-lambda-state`, :code:`init-wl-delta`, and :code:`init-lambda-weights` should be modified to the final values in the previous iteration. This can be done by :class:`.gmx_parser.MDP` and :obj:`.update_MDP`.
+* For each replica, the MDP file for the new iteration should be the same as the one used in the previous iteartion of the same replica except that parameters like :code:`tinit`, :code:`init_lambda_state`, :code:`init_wl_delta`, and :code:`init_lambda_weights` should be modified to the final values in the previous iteration. This can be done by :class:`.gmx_parser.MDP` and :obj:`.update_MDP`.
 
 Step 4: Run the new iteration
 -----------------------------
@@ -287,6 +287,13 @@ include parameters for data analysis here.
   - :code:`n_ex`: (Optional, Default: 1)
       The number of attempts swap during an exchange interval. This option is only relevant if the option :code:`proposal` is :code:`multiple`.
       Otherwise, this option is ignored. For more details, please refer to :ref:`doc_multiple_swaps`.
+  - :code:`mdp_args`: (Optional, Default: :code:`None`)
+      MDP parameters differing across replicas provided in a dictionary. For each key in the dictionary, the value should
+      always be a list of length of the number of replicas. For example, :code:`{'ref_p': [1.0, 1.01, 1.02, 1.03]}` means that the
+      MDP parameter :code:`ref_p` will be set as 1.0 bar, 1.01 bar, 1.02 bar, and 1.03 bar for replicas 0, 1, 2, and 3, respectively.
+      Note that while this feature allows high flexibility in parameter specification, not all parameters are suitable to be
+      varied across replicas. For example, varying :code:`nsteps` across replicas for synchronous EEXE simulations does not make sense. 
+      Additionally, this feature is a work in progress and differing :code:`ref_t` or :code:`dt` across replicas might cause issues. 
   - :code:`grompp_args`: (Optional: Default: :code:`None`)
       Additional arguments to be appended to the GROMACS :code:`grompp` command provided in a dictionary.
       For example, one could have :code:`{'-maxwarn', '1'}` to specify the :code:`maxwarn` argument for the :code:`grompp` command.
diff --git a/ensemble_md/ensemble_EXE.py b/ensemble_md/ensemble_EXE.py
index d9fae3d3..41a25982 100644
--- a/ensemble_md/ensemble_EXE.py
+++ b/ensemble_md/ensemble_EXE.py
@@ -165,6 +165,7 @@ def set_params(self, analysis):
             "N_cutoff": 1000,
             "n_ex": 'N^3',   # only active for multiple swaps.
             "verbose": True,
+            "mdp_args": None,
             "grompp_args": None,
             "runtime_args": None,
             "n_ckpt": 100,
@@ -253,6 +254,17 @@ def set_params(self, analysis):
                     if not isinstance(item, int) or item < 0:
                         raise ParameterError("Each number specified in 'add_swappables' should be a non-negative integer.")  # noqa: E501
 
+        if self.mdp_args is not None:
+            for key in self.mdp_args.keys():
+                if isinstance(key, str):
+                    raise ParameterError("All keys specified in 'mdp_args' should be strings.")
+                else:
+                    if '-' in key:
+                        raise ParameterError("Parameters specified in 'mdp_args' must use underscores in place of hyphens.")  # noqa: E501
+            for val_list in self.mdp_args.values():
+                if len(val_list) != self.n_sim:
+                    raise ParameterError("The number of values specified for each key in 'mdp_args' should be the same as the number of replicas.")  # noqa: E501
+
         # Step 5: Reformat the input MDP file to replace all hypens with underscores.
         self.reformat_MDP(self.mdp)
 
@@ -292,9 +304,6 @@ def set_params(self, analysis):
         if 'gen_seed' in self.template and self.template['gen_seed'] != -1:
             self.warnings.append('Warning: We recommend setting gen_seed as -1 so the random seed is different for each iteration.')  # noqa: E501
 
-        if 'symmetrized_transition_matrix' in self.template and self.template['symmetrized_transition_matrix'] == 'yes':  # noqa: E501
-            self.warnings.append('Warning: We recommend setting symmetrized-transition-matrix to no instead of yes.')
-
         if self.nst_sim % self.template['nstlog'] != 0:
             raise ParameterError(
                 'The parameter "nstlog" must be a factor of the parameter "nst_sim" specified in the YAML file.')
@@ -307,6 +316,25 @@ def set_params(self, analysis):
             raise ParameterError(
                 'In EEXE, the parameter "nstdhdl" must be a factor of the parameter "nstexpanded", or the calculation of acceptance ratios might be wrong.')  # noqa: E501
 
+        if self.mdp_args is not None:
+            if 'lmc_seed' in self.mdp_args and -1 not in self.mdp_args['lmc_seed']:
+                self.warnings.append('Warning: We recommend setting lmc_seed as -1 so the random seed is different for each iteration.')  # noqa: E501
+
+            if 'gen_seed' in self.mdp_args and -1 not in self.mdp_args['gen_seed']:
+                self.warnings.append('Warning: We recommend setting gen_seed as -1 so the random seed is different for each iteration.')  # noqa: E501
+
+            if 'nstlog' in self.mdp_args and sum(self.nst_sim % np.array(self.mdp_args['nstlog'])) != 0:
+                raise ParameterError(
+                    'The parameter "nstlog" must be a factor of the parameter "nst_sim" specified in the YAML file.')
+
+            if 'nstdhdl' in self.mdp_args and sum(self.nst_sim % np.array(self.mdp_args['nstdhdl'])) != 0:
+                raise ParameterError(
+                    'The parameter "nstdhdl" must be a factor of the parameter "nst_sim" specified in the YAML file.')
+
+            if 'nstexpanded' in self.mdp_args and 'nstdhdl' in self.mdp_args and sum(np.array(self.mdp_args['nstexpanded']) % np.array(self.mdp_args['nstdhdl'])) != 0:  # noqa: E501
+                raise ParameterError(
+                    'In EEXE, the parameter "nstdhdl" must be a factor of the parameter "nstexpanded", or the calculation of acceptance ratios might be wrong.')  # noqa: E501                
+
         # Step 7: Set up derived parameters
         # 7-1. kT in kJ/mol
         self.kT = k * NA * self.temp / 1000  # 1 kT in kJ/mol
@@ -492,6 +520,10 @@ def initialize_MDP(self, idx):
         MDP = copy.deepcopy(self.template)
         MDP["nsteps"] = self.nst_sim
 
+        if self.mdp_args is not None:
+            for param in self.mdp_args.keys():
+                MDP[param] = self.mdp_args[param][idx]
+
         start_idx = idx * self.s
         for i in self.lambda_types:
             MDP[i] = self.template[i][start_idx:start_idx + self.n_sub]
diff --git a/ensemble_md/tests/test_ensemble_EXE.py b/ensemble_md/tests/test_ensemble_EXE.py
index 8e907912..3afe445f 100644
--- a/ensemble_md/tests/test_ensemble_EXE.py
+++ b/ensemble_md/tests/test_ensemble_EXE.py
@@ -137,7 +137,6 @@ def test_set_params_warnings(self, params_dict):
         mdp = gmx_parser.MDP(os.path.join(input_path, "expanded.mdp"))  # A perfect mdp file
         mdp['lmc_seed'] = 1000
         mdp['gen_seed'] = 1000
-        mdp['symmetrized_transition_matrix'] = 'yes'
         mdp['wl_scale'] = ''
         mdp.write(os.path.join(input_path, "expanded_test.mdp"))
 
@@ -148,11 +147,9 @@ def test_set_params_warnings(self, params_dict):
         warning_1 = 'Warning: The histogram correction/weight combination method is specified but will not be used since the weights are fixed.'  # noqa: E501
         warning_2 = 'Warning: We recommend setting lmc_seed as -1 so the random seed is different for each iteration.'
         warning_3 = 'Warning: We recommend setting gen_seed as -1 so the random seed is different for each iteration.'
-        warning_4 = 'Warning: We recommend setting symmetrized-transition-matrix to no instead of yes.'
         assert warning_1 in EEXE.warnings
         assert warning_2 in EEXE.warnings
         assert warning_3 in EEXE.warnings
-        assert warning_4 in EEXE.warnings
 
         os.remove(os.path.join(input_path, "expanded_test.mdp"))
 
diff --git a/ensemble_md/utils/utils.py b/ensemble_md/utils/utils.py
index b0fab704..85fb2252 100644
--- a/ensemble_md/utils/utils.py
+++ b/ensemble_md/utils/utils.py
@@ -117,7 +117,7 @@ def format_time(t):
 
     Returns
     -------
-    t_str : sttr
+    t_str : str
         A string in the format of "XX day XX hour(s) XX minute(s) XX second(s)".
     """
     hh_mm_ss = str(datetime.timedelta(seconds=t)).split(":")