diff --git a/CHANGES b/CHANGES index e4d391ea..a882a67e 100644 --- a/CHANGES +++ b/CHANGES @@ -45,6 +45,9 @@ Fixes - substitute the any_none() function with a check "if None in" in the AMBER parser (issue #236, PR #237) - For ABFE workflow, dHdl will only be read when a TI estimator is chosen. Similarly, u_nk will only be read when FEP estimators are chosen. (PR #231) + - All parsers now have a 'extract(file, T)' method that returns a dict with both + "dHdl" and "u_nk" data (or None). THe AMBER parser when using this function will read the + file just once, extracting all data at once. (issue #222, PR #240) 07/22/2022 xiki-tempula, IAlibay, dotsdl, orbeckst, ptmerz diff --git a/docs/api_principles.rst b/docs/api_principles.rst index 8fc6b613..1702a58c 100644 --- a/docs/api_principles.rst +++ b/docs/api_principles.rst @@ -69,7 +69,7 @@ The library is structured as follows, following a similar style to │   ├── abfe.py -* The :mod:`~alchemlyb.parsing` submodule contains parsers for individual MD engines, since the output files needed to perform alchemical free energy calculations vary widely and are not standardized. Each module at the very least provides an `extract_u_nk` function for extracting reduced potentials (needed for MBAR), as well as an `extract_dHdl` function for extracting derivatives required for thermodynamic integration. Other helper functions may be exposed for additional processing, such as generating an XVG file from an EDR file in the case of GROMACS. All `extract\_*` functions take similar arguments (a file path, parameters such as temperature), and produce standard outputs (:class:`pandas.DataFrame` for reduced potentials, :class:`pandas.Series` for derivatives). +* The :mod:`~alchemlyb.parsing` submodule contains parsers for individual MD engines, since the output files needed to perform alchemical free energy calculations vary widely and are not standardized. Each module at the very least provides an `extract_u_nk` function for extracting reduced potentials (needed for MBAR), as well as an `extract_dHdl` function for extracting derivatives required for thermodynamic integration. Moreover an `extract`` functon is provided, which returns a dict containing both derivatives and reduced potentials. Other helper functions may be exposed for additional processing, such as generating an XVG file from an EDR file in the case of GROMACS. All `extract\_*` functions take similar arguments (a file path, parameters such as temperature), and produce standard outputs (:class:`pandas.DataFrame` for reduced potentials, :class:`pandas.Series` for derivatives). * The :mod:`~alchemlyb.preprocessing` submodule features functions for subsampling timeseries, as may be desired before feeding them to an estimator. So far, these are limited to `slicing`, `statistical_inefficiency`, and `equilibrium_detection` functions, many of which make use of subsampling schemes available from :mod:`pymbar`. These functions are written in such a way that they can be easily composed as parts of complex processing pipelines. * The :mod:`~alchemlyb.estimators` module features classes *a la* **scikit-learn** that can be initialized with parameters that determine their behavior and then "trained" on a `fit` method. MBAR, BAR, and thermodynamic integration (TI) as the major methods are all implemented. Correct error estimates require the use of time series with independent samples. * The :mod:`~alchemlyb.convergence` submodule features convenience functions/classes for doing convergence analysis using a given dataset and a chosen estimator. diff --git a/docs/parsing/alchemlyb.parsing.amber.rst b/docs/parsing/alchemlyb.parsing.amber.rst index c297b888..53a61686 100644 --- a/docs/parsing/alchemlyb.parsing.amber.rst +++ b/docs/parsing/alchemlyb.parsing.amber.rst @@ -18,3 +18,4 @@ This submodule includes these parsing functions: .. autofunction:: alchemlyb.parsing.amber.extract_dHdl .. autofunction:: alchemlyb.parsing.amber.extract_u_nk +.. autofunction:: alchemlyb.parsing.amber.extract diff --git a/docs/parsing/alchemlyb.parsing.gmx.rst b/docs/parsing/alchemlyb.parsing.gmx.rst index 0c875f56..4f4da027 100644 --- a/docs/parsing/alchemlyb.parsing.gmx.rst +++ b/docs/parsing/alchemlyb.parsing.gmx.rst @@ -37,3 +37,4 @@ This submodule includes these parsing functions: .. autofunction:: alchemlyb.parsing.gmx.extract_dHdl .. autofunction:: alchemlyb.parsing.gmx.extract_u_nk +.. autofunction:: alchemlyb.parsing.gmx.extract diff --git a/docs/parsing/alchemlyb.parsing.gomc.rst b/docs/parsing/alchemlyb.parsing.gomc.rst index 200d4a29..51291c9b 100644 --- a/docs/parsing/alchemlyb.parsing.gomc.rst +++ b/docs/parsing/alchemlyb.parsing.gomc.rst @@ -13,3 +13,4 @@ This submodule includes these parsing functions: .. autofunction:: alchemlyb.parsing.gomc.extract_dHdl .. autofunction:: alchemlyb.parsing.gomc.extract_u_nk +.. autofunction:: alchemlyb.parsing.gomc.extract diff --git a/docs/parsing/alchemlyb.parsing.namd.rst b/docs/parsing/alchemlyb.parsing.namd.rst index 9e71e519..d43a5cda 100644 --- a/docs/parsing/alchemlyb.parsing.namd.rst +++ b/docs/parsing/alchemlyb.parsing.namd.rst @@ -31,3 +31,4 @@ API Reference This submodule includes these parsing functions: .. autofunction:: alchemlyb.parsing.namd.extract_u_nk +.. autofunction:: alchemlyb.parsing.namd.extract diff --git a/src/alchemlyb/parsing/__init__.py b/src/alchemlyb/parsing/__init__.py index 60b0f34e..60165ac9 100644 --- a/src/alchemlyb/parsing/__init__.py +++ b/src/alchemlyb/parsing/__init__.py @@ -14,3 +14,20 @@ def wrapper(outfile, T, *args, **kwargs): dataframe.attrs['energy_unit'] = 'kT' return dataframe return wrapper + + +def _init_attrs_dict(func): + '''Add temperature and energy units to the parsed dataframes. + + The temperature is added to the dataframe as dataframe.attrs['temperature'] + and the energy unit is initiated as dataframe.attrs['energy_unit'] = 'kT'. + ''' + @wraps(func) + def wrapper(outfile, T, *args, **kwargs): + dict_with_df = func(outfile, T, *args, **kwargs) + for k in dict_with_df.keys(): + if dict_with_df[k] is not None: + dict_with_df[k].attrs['temperature'] = T + dict_with_df[k].attrs['energy_unit'] = 'kT' + return dict_with_df + return wrapper diff --git a/src/alchemlyb/parsing/amber.py b/src/alchemlyb/parsing/amber.py index a8e4e723..a674c83a 100644 --- a/src/alchemlyb/parsing/amber.py +++ b/src/alchemlyb/parsing/amber.py @@ -1,6 +1,6 @@ """Parsers for extracting alchemical data from `Amber `_ output files. -Most of the file parsing parts are inherited from +Some of the file parsing parts are adapted from `alchemical-analysis`_. .. _alchemical-analysis: https://github.com/MobleyLab/alchemical-analysis @@ -14,13 +14,15 @@ import numpy as np from .util import anyopen -from . import _init_attrs +from . import _init_attrs_dict from ..postprocessors.units import R_kJmol, kJ2kcal logger = logging.getLogger("alchemlyb.parsers.Amber") k_b = R_kJmol * kJ2kcal +_FP_RE = r'[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?' + def convert_to_pandas(file_datum): """Convert the data structure from numpy to pandas format""" @@ -40,11 +42,6 @@ def convert_to_pandas(file_datum): return df -DVDL_COMPS = ['BOND', 'ANGLE', 'DIHED', '1-4 NB', '1-4 EEL', 'VDWAALS', - 'EELEC', 'RESTRAINT'] -_FP_RE = r'[+-]?(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?' - - def _pre_gen(it, first): """A generator that returns first first if it exists.""" @@ -58,7 +55,7 @@ def _pre_gen(it, first): return -class SectionParser(object): +class SectionParser(): """ A simple parser to extract data values from sections. """ @@ -129,14 +126,11 @@ def extract_section(self, start, end, fields, limit=None, extra='', def __iter__(self): return self - def next(self): + def __next__(self): """Read next line of the filehandle and check for EOF.""" self.lineno += 1 return next(self.fileh) - # make compatible with python 3.6 - __next__ = next - def close(self): """Close the filehandle.""" self.fileh.close() @@ -148,11 +142,11 @@ def __exit__(self, typ, value, traceback): self.close() -class FEData(object): +class FEData(): """A simple struct container to collect data from individual files.""" __slots__ = ['clambda', 't0', 'dt', 'T', 'ntpr', 'gradients', - 'component_gradients', 'mbar_energies', + 'mbar_energies', 'have_mbar', 'mbar_lambdas', 'mbar_lambda_idx'] def __init__(self): @@ -162,7 +156,6 @@ def __init__(self): self.T = -1.0 self.ntpr = -1 self.gradients = [] - self.component_gradients = [] self.mbar_energies = [] self.have_mbar = False self.mbar_lambdas = [] @@ -206,13 +199,12 @@ def file_validation(outfile): mbar_ndata = int(nstlim / mbar_ndata) mbar_lambdas = _process_mbar_lambdas(secp) file_datum.mbar_lambdas = mbar_lambdas - clambda_str = '%6.4f' % clambda + clambda_str = f'{clambda:6.4f}' if clambda_str not in mbar_lambdas: logger.warning('WARNING: lamba %s not contained in set of ' 'MBAR lambas: %s\nNot using MBAR.', clambda_str, ', '.join(mbar_lambdas)) - have_mbar = False else: mbar_nlambda = len(mbar_lambdas) @@ -240,9 +232,9 @@ def file_validation(outfile): file_datum.have_mbar = have_mbar return file_datum -@_init_attrs -def extract_u_nk(outfile, T): - """Return reduced potentials `u_nk` from Amber outputfile. +@_init_attrs_dict +def extract(outfile, T): + """Return reduced potentials `u_nk` and gradients `dH/dl` from Amber outputfile. Parameters ---------- @@ -254,20 +246,20 @@ def extract_u_nk(outfile, T): Returns ------- - u_nk : DataFrame - Reduced potential for each alchemical state (k) for each frame (n). + Dict + A dictionary with keys of 'u_nk', which is a pandas DataFrame of reduced potentials for each + alchemical state (k) for each frame (n), and 'dHdl', which is a Series of dH/dl + as a function of time for this lambda window. - .. versionchanged:: 0.5.0 - The :mod:`scipy.constants` is used for parsers instead of - the constants used by the corresponding MD engine. - + .. versionadded:: 1.0.0 """ + beta = 1/(k_b * T) file_datum = file_validation(outfile) - if not file_validation(outfile): # pragma: no cover - return None + if not file_validation(outfile): + return {"u_nk": None, "dHdl": None} if not np.isclose(T, file_datum.T, atol=0.01): msg = f'The temperature read from the input file ({file_datum.T:.2f} K)' @@ -275,40 +267,73 @@ def extract_u_nk(outfile, T): logger.error(msg) raise ValueError(msg) - if not file_datum.have_mbar: - raise Exception('ERROR: No MBAR energies found! Cannot parse file.') + finished = False with SectionParser(outfile) as secp: line = secp.skip_lines(5) high_E_cnt = 0 + nensec = 0 + old_nstep = -1 for line in secp: - if line.startswith('MBAR Energy analysis'): + if " A V E R A G E S O V E R" in line: + _ = secp.skip_after('^|=========================================') + elif line.startswith(' NSTEP'): + nstep, dvdl = secp.extract_section('^ NSTEP', '^ ---', + ['NSTEP', 'DV/DL'], + extra=line) + if nstep != old_nstep and dvdl is not None and nstep is not None: + file_datum.gradients.append(dvdl) + nensec += 1 + old_nstep = nstep + elif line.startswith('MBAR Energy analysis') and file_datum.have_mbar: mbar = secp.extract_section('^MBAR', '^ ---', file_datum.mbar_lambdas, extra=line) - - if None in mbar: + + if None in mbar: # pragma: no cover continue - - E_ref = mbar[file_datum.mbar_lambda_idx] - - for lmbda, E in enumerate(mbar): - if E > 0.0: + + reference_energy = mbar[file_datum.mbar_lambda_idx] + for lmbda, energy in enumerate(mbar): + if energy > 0.0: high_E_cnt += 1 - file_datum.mbar_energies[lmbda].append(beta * (E - E_ref)) + file_datum.mbar_energies[lmbda].append(beta * (energy - reference_energy)) + elif line == ' 5. TIMINGS\n': + finished = True + break if high_E_cnt: logger.warning('%i MBAR energ%s > 0.0 kcal/mol', high_E_cnt, 'ies are' if high_E_cnt > 1 else 'y is') - time = [file_datum.t0 + (frame_index + 1) * file_datum.dt * file_datum.ntpr - for frame_index in range(len(file_datum.mbar_energies[0]))] + if not finished: # pragma: no cover + logger.warning('WARNING: file %s is a prematurely terminated run' % outfile) + + if file_datum.have_mbar: + mbar_time = [ + file_datum.t0 + (frame_index + 1) * file_datum.dt * file_datum.ntpr + for frame_index in range(len(file_datum.mbar_energies[0]))] + + mbar_df = pd.DataFrame( + file_datum.mbar_energies, + index=np.array(file_datum.mbar_lambdas, dtype=np.float64), + columns=pd.MultiIndex.from_arrays( + [mbar_time, np.repeat(file_datum.clambda, len(mbar_time))], names=['time', 'lambdas']) + ).T + else: + logger.info('WARNING: No MBAR energies found! "u_nk" entry will be None') + mbar_df = None + + if not nensec: # pragma: no cover + logger.warning('WARNING: File %s does not contain any dV/dl data' % outfile) + dHdl_df = None + else: + logger.info('Read %s DV/DL data points in file %s' % (nensec, outfile)) + dHdl_df = convert_to_pandas(file_datum) + dHdl_df['dHdl'] *= beta + + return {"u_nk": mbar_df, "dHdl": dHdl_df} - return pd.DataFrame(file_datum.mbar_energies, - columns=pd.MultiIndex.from_arrays([time, np.repeat(file_datum.clambda, len(time))], - names=['time', 'lambdas']), - index=np.array(file_datum.mbar_lambdas, dtype=np.float64)).T -@_init_attrs def extract_dHdl(outfile, T): """Return gradients ``dH/dl`` from Amber TI outputfile. @@ -330,56 +355,34 @@ def extract_dHdl(outfile, T): the constants used by the corresponding MD engine. """ - beta = 1/(k_b * T) + extracted = extract(outfile, T) + return extracted['dHdl'] - file_datum = file_validation(outfile) - if not file_validation(outfile): - return None - if not np.isclose(T, file_datum.T, atol=0.01): - msg = f'The temperature read from the input file ({file_datum.T:.2f} K)' - msg += f' is different from the temperature passed as parameter ({T:.2f} K)' - logger.error(msg) - raise ValueError(msg) +def extract_u_nk(outfile, T): + """Return reduced potentials `u_nk` from Amber outputfile. - finished = False - comps = [] - with SectionParser(outfile) as secp: - line = secp.skip_lines(5) - nensec = 0 - old_nstep = -1 - into_average_section = False - for line in secp: - if 'DV/DL, AVERAGES OVER' in line \ - or " A V E R A G E S O V E R" in line: - into_average_section = True - if line.startswith(' NSTEP') and into_average_section: - _ = secp.skip_lines(1) - into_average_section = False - elif line.startswith(' NSTEP'): - nstep, dvdl = secp.extract_section('^ NSTEP', '^ ---', - ['NSTEP', 'DV/DL'], - extra=line) - if nstep != old_nstep and dvdl is not None \ - and nstep is not None: - file_datum.gradients.append(dvdl) - nensec += 1 - old_nstep = nstep - if line == ' 5. TIMINGS\n': - finished = True - break - if not finished: # pragma: no cover - logger.warning(' WARNING: prematurely terminated run') - if not nensec: # pragma: no cover - logger.warning('WARNING: File %s does not contain any DV/DL data', - outfile) - logger.info(f'Read {nensec} DV/DL data points') - # at this step we get info stored in the FEData object for a given amber out file - file_datum.component_gradients.extend(comps) - # convert file_datum to the pandas format to make it identical to alchemlyb output format - df = convert_to_pandas(file_datum) - df['dHdl'] *= beta - return df + Parameters + ---------- + outfile : str + Path to Amber .out file to extract data from. + T : float + Temperature in Kelvin at which the simulations were performed; + needed to generated the reduced potential (in units of kT) + + Returns + ------- + u_nk : DataFrame + Reduced potential for each alchemical state (k) for each frame (n). + + + .. versionchanged:: 0.5.0 + The :mod:`scipy.constants` is used for parsers instead of + the constants used by the corresponding MD engine. + + """ + extracted = extract(outfile, T) + return extracted['u_nk'] def _process_mbar_lambdas(secp): @@ -410,7 +413,7 @@ def _process_mbar_lambdas(secp): if 'total' in line: data = line.split() mbar_lambdas.extend(data[2:]) - else: + else: # pragma: no cover mbar_lambdas.extend(line.split()) return mbar_lambdas diff --git a/src/alchemlyb/parsing/gmx.py b/src/alchemlyb/parsing/gmx.py index b1540765..00267c66 100644 --- a/src/alchemlyb/parsing/gmx.py +++ b/src/alchemlyb/parsing/gmx.py @@ -243,6 +243,36 @@ def extract_dHdl(xvg, T, filter=True): return dHdl +def extract(xvg, T, filter=True): + r"""Return reduced potentials `u_nk` and gradients `dH/dl` + from a Hamiltonian differences XVG file. + + Parameters + ---------- + xvg : str + Path to XVG file to extract data from. + T : float + Temperature in Kelvin the simulations sampled. + filter : bool + Filter out the lines that cannot be parsed. + Such as rows with incorrect number of Columns and incorrectly + formatted numbers (e.g. 123.45.67, nan or -). + + Returns + ------- + Dict + A dictionary with keys of 'u_nk', which is a pandas DataFrame of + potential energy for each alchemical state (k) for each frame (n), + and 'dHdl', which is a Series of dH/dl + as a function of time for this lambda window. + + + .. versionadded:: 1.0.0 + """ + + return {"u_nk": extract_u_nk(xvg, T, filter), "dHdl": extract_dHdl(xvg, T, filter)} + + def _extract_state(xvg, headers=None): """Extract information on state sampled, names of lambdas. diff --git a/src/alchemlyb/parsing/gomc.py b/src/alchemlyb/parsing/gomc.py index 1944b2a4..7cf03af4 100644 --- a/src/alchemlyb/parsing/gomc.py +++ b/src/alchemlyb/parsing/gomc.py @@ -149,6 +149,36 @@ def extract_dHdl(filename, T): return dHdl +def extract(filename, T): + r"""Return reduced potentials `u_nk` and gradients `dH/dl` + from a Hamiltonian differences free energy file. + + Parameters + ---------- + xvg : str + Path to free energy file to extract data from. + T : float + Temperature in Kelvin the simulations sampled. + filter : bool + Filter out the lines that cannot be parsed. + Such as rows with incorrect number of Columns and incorrectly + formatted numbers (e.g. 123.45.67, nan or -). + + Returns + ------- + Dict + A dictionary with keys of 'u_nk', which is a pandas DataFrame of + potential energy for each alchemical state (k) for each frame (n), + and 'dHdl', which is a Series of dH/dl + as a function of time for this lambda window. + + + .. versionadded:: 1.0.0 + """ + + return {"u_nk": extract_u_nk(filename, T), "dHdl": extract_dHdl(filename, T)} + + def _extract_state(filename): """Extract information on state sampled, names of lambdas. diff --git a/src/alchemlyb/parsing/namd.py b/src/alchemlyb/parsing/namd.py index 710a0786..c4181b1d 100644 --- a/src/alchemlyb/parsing/namd.py +++ b/src/alchemlyb/parsing/namd.py @@ -306,3 +306,40 @@ def extract_u_nk(fep_files, T): u_nk.set_index(['time','fep-lambda'], inplace=True) return u_nk + +def extract(fep_files, T): + """Return reduced potentials `u_nk` from NAMD fepout file(s). + + Parameters + ---------- + fep_file : str or list of str + Path to fepout file(s) to extract data from. These are sorted by filename, + not including the path, prior to processing, using natural-sort. This way, + filenames including numbers without leading zeros are handled intuitively. + + Windows may be split across files, or more than one window may be present + in a given file. Windows without footer lines (which may be in a different + file than the respective header lines) will raise an error. This means that + while windows may have been interrupted and restarted, they must be + complete. Lambda values are expected to increase or decrease monotonically, + and match between header and footer of each window. + + T : float + Temperature in Kelvin at which the simulation was sampled. + + Returns + ------- + Dict + A dictionary with keys of 'u_nk', which is a pandas DataFrame of + potential energy for each alchemical state (k) for each frame (n). + + Note + ---- + If the number of forward and backward samples in a given window are different, + the extra sample(s) will be discarded. This is typically zero or one sample. + + + .. versionadded:: 1.0.0 + """ + + return {"u_nk": extract_u_nk(fep_files, T)} # NOTE: maybe we should also have 'dHdl': None diff --git a/src/alchemlyb/postprocessors/units.py b/src/alchemlyb/postprocessors/units.py index a0cdec77..f5e1984d 100644 --- a/src/alchemlyb/postprocessors/units.py +++ b/src/alchemlyb/postprocessors/units.py @@ -1,5 +1,6 @@ -'''Unit conversion and constants -================================''' +""" +Unit conversion and constants +""" from scipy.constants import R, calorie diff --git a/src/alchemlyb/tests/parsing/test_amber.py b/src/alchemlyb/tests/parsing/test_amber.py index afeb5d13..e1b24a2d 100644 --- a/src/alchemlyb/tests/parsing/test_amber.py +++ b/src/alchemlyb/tests/parsing/test_amber.py @@ -2,12 +2,12 @@ """ import pytest -import logging from numpy.testing import assert_allclose from alchemlyb.parsing.amber import extract_dHdl from alchemlyb.parsing.amber import extract_u_nk from alchemlyb.parsing.amber import file_validation +from alchemlyb.parsing.amber import extract from alchemtest.amber import load_simplesolvated from alchemtest.amber import load_invalidfiles from alchemtest.amber import load_bace_example @@ -28,7 +28,7 @@ def fixture_invalid_file(request): @pytest.fixture(name="single_u_nk", scope="module") def fixture_single_u_nk(): """return a single file to check u_unk parsing""" - return load_bace_example().data['solvated']['vdw'][0] + return load_bace_example().data['complex']['vdw'][0] @pytest.fixture(name="single_dHdl", scope="module") @@ -50,40 +50,47 @@ def test_dHdl_invalidfiles(invalid_file): assert extract_dHdl(invalid_file, T=298.0) is None -def test_dHdl_time_reading(single_dHdl, first_time=22.0, last_time=1020.0): +def test_dHdl_time_reading(single_dHdl): """Test if time information is read correctly when extracting dHdl""" dHdl = extract_dHdl(single_dHdl, T=298.0) - assert_allclose(dHdl.index.values[0][0], first_time) - assert_allclose(dHdl.index.values[-1][0], last_time) + assert_allclose(dHdl.index.values[0][0], 22.0) + assert_allclose(dHdl.index.values[-1][0], 1020.0) -def test_u_nk_time_reading(single_u_nk, first_time=22.0, last_time=1020.0): +def test_u_nk_time_reading(single_u_nk): """Test if time information is read correctly when extracting u_nk""" u_nk = extract_u_nk(single_u_nk, T=298.0) - assert_allclose(u_nk.index.values[0][0], first_time) - assert_allclose(u_nk.index.values[-1][0], last_time) + assert_allclose(u_nk.index.values[0][0], 22.0) + assert_allclose(u_nk.index.values[-1][0], 1020.0) -def test_wrong_T_should_raise_warning_in_extract_dHdl(single_dHdl, T=300.0): - """ - Test if calling extract_dHdl with differnt T from what's - read from the AMBER file gives a warning - """ - with pytest.raises( - ValueError, - match="is different from the temperature passed as parameter"): - _ = extract_dHdl(single_dHdl, T=T) +def test_extract_with_both_data(single_u_nk): + """Test that dHdl and u_nk have the correct form when + extracted from files with the extract funcion.""" + df_dict = extract(single_u_nk, T=298.0) + assert df_dict['dHdl'].index.names == ('time', 'lambdas') + assert df_dict['dHdl'].shape == (500, 1) + assert df_dict['u_nk'].index.names == ('time', 'lambdas') + + +def test_extract_with_only_dhdl_data(single_dHdl): + """Test that parsing with the extract function a file + with just dHdl gives the correct results""" + df_dict = extract(single_dHdl, T=298.0) + assert df_dict['dHdl'].index.names == ('time', 'lambdas') + assert df_dict['dHdl'].shape == (500, 1) + assert df_dict['u_nk'] is None -def test_wrong_T_should_raise_warning_in_extract_u_nk(single_u_nk, T=300.0): +def test_wrong_T_should_raise_warning(single_dHdl, T=300.0): """ - Test if calling extract_u_nk with differnt T from what's + Test if calling extract with differnt T from what's read from the AMBER file gives a warning """ with pytest.raises( ValueError, match="is different from the temperature passed as parameter"): - _ = extract_u_nk(single_u_nk, T=T) + _ = extract(single_dHdl, T=T) @pytest.mark.parametrize("filename", diff --git a/src/alchemlyb/tests/parsing/test_gmx.py b/src/alchemlyb/tests/parsing/test_gmx.py index 0c3a0877..a5c6e0bc 100644 --- a/src/alchemlyb/tests/parsing/test_gmx.py +++ b/src/alchemlyb/tests/parsing/test_gmx.py @@ -5,7 +5,7 @@ import bz2 import pytest -from alchemlyb.parsing.gmx import extract_dHdl, extract_u_nk +from alchemlyb.parsing.gmx import extract_dHdl, extract_u_nk, extract from alchemtest.gmx import load_benzene from alchemtest.gmx import load_expanded_ensemble_case_1, load_expanded_ensemble_case_2, load_expanded_ensemble_case_3 from alchemtest.gmx import load_water_particle_with_total_energy @@ -199,6 +199,15 @@ def test_extract_dHdl_unit(): assert dhdl.attrs['temperature'] == 310 assert dhdl.attrs['energy_unit'] == 'kT' +def test_calling_extract(): + '''Test if the extract function is working''' + dataset = load_benzene() + df_dict = extract(dataset['data']['Coulomb'][0], 310) + assert df_dict['dHdl'].attrs['temperature'] == 310 + assert df_dict['dHdl'].attrs['energy_unit'] == 'kT' + assert df_dict['u_nk'].attrs['temperature'] == 310 + assert df_dict['u_nk'].attrs['energy_unit'] == 'kT' + class TestRobustGMX(): '''Test dropping the row that is wrong in different way''' @staticmethod diff --git a/src/alchemlyb/tests/parsing/test_gomc.py b/src/alchemlyb/tests/parsing/test_gomc.py index c03a0b6b..d61b3789 100644 --- a/src/alchemlyb/tests/parsing/test_gomc.py +++ b/src/alchemlyb/tests/parsing/test_gomc.py @@ -2,7 +2,7 @@ """ -from alchemlyb.parsing.gomc import extract_dHdl, extract_u_nk +from alchemlyb.parsing.gomc import extract_dHdl, extract_u_nk, extract from alchemtest.gomc import load_benzene @@ -30,3 +30,15 @@ def test_u_nk(): assert u_nk.index.names == ['time', 'Coulomb-lambda', 'VDW-lambda'] assert u_nk.shape == (1000, 23) +def test_extract(): + """Test that u_nk and dHdl have the correct form when extracted from files. + + """ + dataset = load_benzene() + + df_dict = extract(dataset['data'][0], T=298) + + assert df_dict['u_nk'].index.names == ['time', 'Coulomb-lambda', 'VDW-lambda'] + assert df_dict['u_nk'].shape == (1000, 23) + assert df_dict['dHdl'].index.names == ['time', 'Coulomb-lambda', 'VDW-lambda'] + assert df_dict['dHdl'].shape == (1000, 2) diff --git a/src/alchemlyb/tests/parsing/test_namd.py b/src/alchemlyb/tests/parsing/test_namd.py index d12ff65d..e624aeff 100644 --- a/src/alchemlyb/tests/parsing/test_namd.py +++ b/src/alchemlyb/tests/parsing/test_namd.py @@ -6,7 +6,7 @@ import bz2 import pytest -from alchemlyb.parsing.namd import extract_u_nk +from alchemlyb.parsing.namd import extract_u_nk, extract from alchemtest.namd import load_tyr2ala from alchemtest.namd import load_idws from alchemtest.namd import load_restarted @@ -302,6 +302,15 @@ def test_u_nk_restarted_reversed(): assert u_nk.shape == (30170, 11) +def test_extract(): + filenames = load_restarted_reversed()['data']['both'] + df_dict = extract(filenames, T=300) + + assert df_dict['u_nk'].index.names == ['time', 'fep-lambda'] + assert df_dict['u_nk'].shape == (30170, 11) + # assert df_dict['dHdl'] is None + + def test_u_nk_restarted_reversed_missing_window_header(tmp_path): """Test that u_nk has the correct form when a #NEW line is missing from the restarted_reversed dataset and the parser has to infer lambda_idws for that window.""" diff --git a/src/alchemlyb/tests/test_import.py b/src/alchemlyb/tests/test_import.py index 50bc7c8c..50a5d933 100644 --- a/src/alchemlyb/tests/test_import.py +++ b/src/alchemlyb/tests/test_import.py @@ -1,7 +1,4 @@ import alchemlyb def test_name(): - try: - assert alchemlyb.__name__ == 'alchemlyb' - except Exception as e: - raise e + assert alchemlyb.__name__ == 'alchemlyb'