From cb965ad049556dc97445acc16cbb21a2bf3f7f38 Mon Sep 17 00:00:00 2001 From: Zhiyi Wu Date: Wed, 21 Sep 2022 17:05:44 +0100 Subject: [PATCH] ABFE workflow will not read dHdl file if no TI estimator is choosen (#231) If not explicitly set, AMBER will not generate the dHdl file. (I don't know if the u_nk will be written by default but I got a complain that workflow cannot be used to process their amber output, which doesn't have dhdl data). Currently, the ABFE workflow will attempt to read both dHdl and u_nk. This PR will make it such that if no TI estimator is chosen, the workflow will not attempt to read the dHdl. I have also removed the ignore_warnings in the read file stage as maintaining it and having it to be clever enough to solve all the dumb user issues seems to be too much. Co-authored-by: Zhiyi Wu --- CHANGES | 4 + src/alchemlyb/tests/test_workflow_ABFE.py | 28 +++++ src/alchemlyb/workflows/abfe.py | 118 ++++++++++++++-------- 3 files changed, 109 insertions(+), 41 deletions(-) diff --git a/CHANGES b/CHANGES index 25074d49..e4d391ea 100644 --- a/CHANGES +++ b/CHANGES @@ -24,6 +24,8 @@ Changes - AutoMBAR accepts the `method` argument (PR #114). - Refactor the subsampling module to unify the behaviour of equilibrium_detection and statistical_inefficiency (PR #218). + - Remove the ignore_warnings flag in ABFE workflow. (PR #231) + Enhancements - Add u_nk2series and dhdl2series to convert u_nk and dHdl to series (PR @@ -41,6 +43,8 @@ Fixes at which the simulations were run (issue #225, PR #232) - changed how int/float are read from AMBER files (issue #229, PR #235) - substitute the any_none() function with a check "if None in" in the AMBER parser (issue #236, PR #237) + - For ABFE workflow, dHdl will only be read when a TI estimator is chosen. + Similarly, u_nk will only be read when FEP estimators are chosen. (PR #231) 07/22/2022 xiki-tempula, IAlibay, dotsdl, orbeckst, ptmerz diff --git a/src/alchemlyb/tests/test_workflow_ABFE.py b/src/alchemlyb/tests/test_workflow_ABFE.py index 1ea9f14e..cbe6694b 100644 --- a/src/alchemlyb/tests/test_workflow_ABFE.py +++ b/src/alchemlyb/tests/test_workflow_ABFE.py @@ -273,6 +273,34 @@ def test_run_none(self, workflow): workflow.run(uncorr=None, estimators=None, overlap=None, breakdown=None, forwrev=None) + def test_run_single_estimator(self, workflow, monkeypatch): + monkeypatch.setattr(workflow, 'u_nk_list', []) + monkeypatch.setattr(workflow, 'dHdl_list', []) + workflow.run(uncorr=None, estimators='MBAR', overlap=None, breakdown=True, + forwrev=None) + + def test_run_invalid_estimator(self, workflow): + with pytest.raises(ValueError, + match=r'Estimator aaa is not supported.'): + workflow.run(uncorr=None, estimators='aaa', overlap=None, breakdown=None, + forwrev=None) + + @pytest.mark.parametrize('read_u_nk', [True, False]) + @pytest.mark.parametrize('read_dHdl', [True, False]) + def test_read_TI_FEP(self, workflow, monkeypatch, read_u_nk, read_dHdl): + monkeypatch.setattr(workflow, 'u_nk_list', []) + monkeypatch.setattr(workflow, 'dHdl_list', []) + workflow.read(read_u_nk, read_dHdl) + if read_u_nk: + assert len(workflow.u_nk_list) == 5 + else: + assert len(workflow.u_nk_list) == 0 + + if read_dHdl: + assert len(workflow.dHdl_list) == 5 + else: + assert len(workflow.dHdl_list) == 0 + def test_read_invalid_u_nk(self, workflow, monkeypatch): def extract_u_nk(self, T): raise IOError('Error read u_nk.') diff --git a/src/alchemlyb/workflows/abfe.py b/src/alchemlyb/workflows/abfe.py index b16c9afd..c975f665 100644 --- a/src/alchemlyb/workflows/abfe.py +++ b/src/alchemlyb/workflows/abfe.py @@ -48,8 +48,6 @@ class ABFE(WorkflowBase): outdirectory : str Directory in which the output files produced by this script will be stored. Default: os.path.curdir. - ignore_warnings : bool - Turn all errors into warnings. Attributes ---------- @@ -60,11 +58,9 @@ class ABFE(WorkflowBase): ''' def __init__(self, T, units='kT', software='GROMACS', dir=os.path.curdir, prefix='dhdl', suffix='xvg', - outdirectory=os.path.curdir, - ignore_warnings=False): + outdirectory=os.path.curdir): super().__init__(units, software, T, outdirectory) - self.ignore_warnings = ignore_warnings self.logger = logging.getLogger('alchemlyb.workflows.ABFE') self.logger.info('Initialise Alchemlyb ABFE Workflow') self.logger.info(f'Alchemlyb Version: f{__version__}') @@ -93,10 +89,17 @@ def __init__(self, T, units='kT', software='GROMACS', dir=os.path.curdir, else: raise NotImplementedError(f'{software} parser not found.') - def read(self): + def read(self, read_u_nk=True, read_dHdl=True): '''Read the u_nk and dHdL data from the :attr:`~alchemlyb.workflows.ABFE.file_list` + Parameters + ---------- + read_u_nk : bool + Whether to read the u_nk. + read_dHdl : bool + Whether to read the dHdl. + Attributes ---------- u_nk_list : list @@ -104,50 +107,65 @@ def read(self): dHdl_list : list A list of :class:`pandas.DataFrame` of dHdl. ''' + self.u_nk_sample_list = None + self.dHdl_sample_list = None + u_nk_list = [] dHdl_list = [] for file in self.file_list: - try: - u_nk = self._extract_u_nk(file, T=self.T) - self.logger.info( - f'Reading {len(u_nk)} lines of u_nk from {file}') - u_nk_list.append(u_nk) - except Exception as exc: - msg = f'Error reading u_nk from {file}.' - if self.ignore_warnings: - self.logger.exception(msg + f'\n{exc}\n' + - 'This exception is being ignored because ignore_warnings=True.') - else: + if read_u_nk: + try: + u_nk = self._extract_u_nk(file, T=self.T) + self.logger.info( + f'Reading {len(u_nk)} lines of u_nk from {file}') + u_nk_list.append(u_nk) + except Exception as exc: + msg = f'Error reading u_nk from {file}.' self.logger.error(msg) raise OSError(msg) from exc - try: - dhdl = self._extract_dHdl(file, T=self.T) - self.logger.info( - f'Reading {len(dhdl)} lines of dhdl from {file}') - dHdl_list.append(dhdl) - except Exception as exc: - msg = f'Error reading dHdl from {file}.' - if self.ignore_warnings: - self.logger.exception(msg + f'\n{exc}\n' + - 'This exception is being ignored because ignore_warnings=True.') - else: + if read_dHdl: + try: + dhdl = self._extract_dHdl(file, T=self.T) + self.logger.info( + f'Reading {len(dhdl)} lines of dhdl from {file}') + dHdl_list.append(dhdl) + except Exception as exc: + msg = f'Error reading dHdl from {file}.' self.logger.error(msg) raise OSError(msg) from exc # Sort the files according to the state - self.logger.info('Sort files according to the u_nk.') - column_names = u_nk_list[0].columns.values.tolist() - index_list = sorted(range(len(self.file_list)), - key=lambda x:column_names.index( - u_nk_list[x].reset_index('time').index.values[0])) + if read_u_nk: + self.logger.info('Sort files according to the u_nk.') + column_names = u_nk_list[0].columns.values.tolist() + index_list = sorted(range(len(self.file_list)), + key=lambda x: column_names.index( + u_nk_list[x].reset_index( + 'time').index.values[0])) + elif read_dHdl: + self.logger.info('Sort files according to the dHdl.') + index_list = sorted(range(len(self.file_list)), + key=lambda x: + dHdl_list[x].reset_index( + 'time').index.values[0]) + else: + self.u_nk_list = [] + self.dHdl_list = [] + return self.file_list = [self.file_list[i] for i in index_list] self.logger.info("Sorted file list: \n%s", '\n'.join(self.file_list)) - self.u_nk_list = [u_nk_list[i] for i in index_list] - self.dHdl_list = [dHdl_list[i] for i in index_list] - self.u_nk_sample_list = None - self.dHdl_sample_list = None + if read_u_nk: + self.u_nk_list = [u_nk_list[i] for i in index_list] + else: + self.u_nk_list = [] + + if read_dHdl: + self.dHdl_list = [dHdl_list[i] for i in index_list] + else: + self.dHdl_list = [] + def run(self, skiptime=0, uncorr='dhdl', threshold=50, estimators=('MBAR', 'BAR', 'TI'), overlap='O_MBAR.pdf', @@ -196,7 +214,24 @@ def run(self, skiptime=0, uncorr='dhdl', threshold=50, :func:`~alchemlyb.convergence.forward_backward_convergence` for further explanation. ''' - self.read() + use_FEP = False + use_TI = False + + if estimators is not None: + if isinstance(estimators, str): + estimators = [estimators, ] + for estimator in estimators: + if estimator in FEP_ESTIMATORS: + use_FEP = True + elif estimator in TI_ESTIMATORS: + use_TI = True + else: + msg = f"Estimator {estimator} is not supported. Choose one from " \ + f"{FEP_ESTIMATORS + TI_ESTIMATORS}." + self.logger.error(msg) + raise ValueError(msg) + + self.read(use_FEP, use_TI) if uncorr is not None: self.preprocess(skiptime=skiptime, uncorr=uncorr, @@ -205,13 +240,14 @@ def run(self, skiptime=0, uncorr='dhdl', threshold=50, self.estimate(estimators) self.generate_result() - if overlap is not None: + if overlap is not None and use_FEP: ax = self.plot_overlap_matrix(overlap) plt.close(ax.figure) if breakdown: - ax = self.plot_ti_dhdl() - plt.close(ax.figure) + if use_TI: + ax = self.plot_ti_dhdl() + plt.close(ax.figure) fig = self.plot_dF_state() plt.close(fig) fig = self.plot_dF_state(dF_state='dF_state_long.pdf',