Skip to content

Commit

Permalink
The statistical inefficiency is logged at the debug level (#325)
Browse files Browse the repository at this point in the history
- close #295 
- log SI (at DEBUG)
- updated docs
- updated CHANGES

---------

Co-authored-by: Zhiyi Wu <[email protected]>
  • Loading branch information
xiki-tempula and xiki-tempula authored Jun 12, 2023
1 parent 4c3308c commit 1ff15fe
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 0 deletions.
2 changes: 2 additions & 0 deletions CHANGES
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ Changes
- Use loguru instead of logging for log (issue #301, PR #303).

Enhancements
- "Statistical inefficiency" is logged at debug level for equilibrium_detection
and statistical_inefficiency (issue #295, PR#325).
- Add a parser to read serialised pandas dataframe (parquet) (issue #316, PR#317).
- workflow.ABFE allow parquet as input (issue #316, PR#317).
- Allow MBAR estimator to use bootstrap to compute error (issue #320, PR#322).
Expand Down
3 changes: 3 additions & 0 deletions docs/parsing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,9 @@ a :class:`pandas.DataFrame`. For loading alchemlyb data we provide the
new_u_nk = extract_u_nk('u_nk.parquet', T=300)
new_dHdl = extract_dHdl('dHdl.parquet', T=300)

.. Note::
Serialization of :class:`pandas.DataFrame` to `parquet` file is only allowed
for `pandas>=2`, whereas the deserialization is permitted for any pandas version.

.. _dHdl:

Expand Down
8 changes: 8 additions & 0 deletions src/alchemlyb/preprocessing/subsampling.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from pymbar.timeseries import detect_equilibration as _detect_equilibration
from pymbar.timeseries import statistical_inefficiency as _statistical_inefficiency
from pymbar.timeseries import subsample_correlated_data as _subsample_correlated_data
from loguru import logger

from .. import pass_attrs

Expand Down Expand Up @@ -516,12 +517,15 @@ def statistical_inefficiency(
df = slicing(df, lower=lower, upper=upper, step=step)

# calculate statistical inefficiency of series (could use fft=True but needs test)
logger.debug("Running statistical inefficiency analysis.")
statinef = _statistical_inefficiency(series)
logger.debug("Statistical inefficiency: {:.2f}.", statinef)

# use the subsample_correlated_data function to get the subsample index
indices = _subsample_correlated_data(
series, g=statinef, conservative=conservative
)
logger.debug("Number of uncorrelated samples: {}.", len(indices))
df = df.iloc[indices]
else:
df = slicing(df, lower=lower, upper=upper, step=step)
Expand Down Expand Up @@ -592,12 +596,16 @@ def equilibrium_detection(
df = slicing(df, lower=lower, upper=upper, step=step)

# calculate statistical inefficiency of series, with equilibrium detection
logger.debug("Running equilibration detection.")
t, statinef, Neff_max = _detect_equilibration(series.values)
logger.debug("Start index: {}.", t)
logger.debug("Statistical inefficiency: {:.2f}.", statinef)

series_equil = series[t:]
df_equil = df[t:]

indices = _subsample_correlated_data(series_equil, g=statinef)
logger.debug("Number of uncorrelated samples: {}.", len(indices))
df = df_equil.iloc[indices]
else:
df = slicing(df, lower=lower, upper=upper, step=step)
Expand Down
21 changes: 21 additions & 0 deletions src/alchemlyb/tests/test_preprocessing.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
"""Tests for preprocessing functions.
"""
import logging

import numpy as np
import pytest
from numpy.testing import assert_allclose
Expand Down Expand Up @@ -523,3 +525,22 @@ def test_u_nk2series_deprecated(self, u_nk, methodargs, reference):
def test_other_method_ValueError(self, u_nk):
with pytest.raises(ValueError, match="Decorrelation method bogus not found."):
u_nk2series(u_nk, method="bogus")


class TestLogging:
def test_detect_equilibration(self, caplog, u_nk):
with caplog.at_level(logging.DEBUG):
decorrelate_u_nk(u_nk, remove_burnin=True)

assert "Running equilibration detection." in caplog.text
assert "Start index:" in caplog.text
assert "Statistical inefficiency:" in caplog.text
assert "Number of uncorrelated samples:" in caplog.text

def test_statistical_inefficiency(self, caplog, u_nk):
with caplog.at_level(logging.DEBUG):
decorrelate_u_nk(u_nk)

assert "Running statistical inefficiency analysis." in caplog.text
assert "Statistical inefficiency:" in caplog.text
assert "Number of uncorrelated samples:" in caplog.text

0 comments on commit 1ff15fe

Please sign in to comment.