Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Log-normal log-likelihood #1378

Merged
merged 15 commits into from
Aug 11, 2022
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion docs/source/log_likelihoods.rst
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ Overview:
- :class:`GaussianKnownSigmaLogLikelihood`
- :class:`GaussianLogLikelihood`
- :class:`KnownNoiseLogLikelihood`
- :class:`LogNormalLogLikelihood`
- :class:`MultiplicativeGaussianLogLikelihood`
- :class:`ScaledLogLikelihood`
- :class:`StudentTLogLikelihood`
Expand All @@ -46,11 +47,12 @@ Overview:

.. autoclass:: KnownNoiseLogLikelihood

.. autoclass:: LogNormalLogLikelihood

.. autoclass:: MultiplicativeGaussianLogLikelihood

.. autoclass:: ScaledLogLikelihood

.. autoclass:: StudentTLogLikelihood

.. autoclass:: UnknownNoiseLogLikelihood

1 change: 1 addition & 0 deletions pints/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ def version(formatted=False):
GaussianKnownSigmaLogLikelihood,
GaussianLogLikelihood,
KnownNoiseLogLikelihood,
LogNormalLogLikelihood,
MultiplicativeGaussianLogLikelihood,
ScaledLogLikelihood,
StudentTLogLikelihood,
Expand Down
116 changes: 116 additions & 0 deletions pints/_log_likelihoods.py
Original file line number Diff line number Diff line change
Expand Up @@ -777,6 +777,122 @@ def __init__(self, problem, sigma):
super(KnownNoiseLogLikelihood, self).__init__(problem, sigma)


class LogNormalLogLikelihood(pints.ProblemLogLikelihood):
r"""
Calculates a log-likelihood assuming independent log-normal noise at each
time point, and adds a parameter representing the standard deviation
(sigma) of the noise on the log scale for each output.

Specifically, the sampling distribution takes the form:

.. math::
y(t) \sim \text{log-Normal}(\log f(\theta, t), \sigma)

which can alternatively be written:

.. math::
\log y(t) \sim \text{Normal}(\log f(\theta, t), \sigma)

Important to note is that:

.. math::
\mathbb{E}(y(t)) = f(\theta, t) \exp(\sigma^2 / 2)

As such, the optional parameter `mean_adjust` (if true) adjusts the
mean of the distribution so that its expectation is ``f(\theta, t)``. This
MichaelClerx marked this conversation as resolved.
Show resolved Hide resolved
shifted distribution is of the form:

.. math::
y(t) \sim \text{log-Normal}(\log f(\theta, t) - \sigma^2/2, \sigma)

Extends :class:`ProblemLogLikelihood`.

Parameters
----------
problem
A :class:`SingleOutputProblem` or :class:`MultiOutputProblem`. For a
single-output problem a single parameter is added, for a multi-output
problem ``n_outputs`` parameters are added.
mean_adjust
A Boolean. Adjusts the distribution so that its mean corresponds to the
function value. By default, this parameter if False.
"""

def __init__(self, problem, mean_adjust=False):
super(LogNormalLogLikelihood, self).__init__(problem)

# Get number of times, number of outputs
self._nt = len(self._times)
self._no = problem.n_outputs()

# Add parameters to problem
self._n_parameters = problem.n_parameters() + self._no

# Pre-calculate parts
self._logn = 0.5 * self._nt * np.log(2 * np.pi)

# For a log-normal sampling distribution any data points being below
# zero would mean that the log-likelihood is always -infinity
vals = np.asarray(self._values)
if np.any(vals <= 0):
raise ValueError('All data points must exceed zero.')
self._log_values = np.log(self._values)

self._mean_adjust = mean_adjust

def __call__(self, x):
sigma = np.asarray(x[-self._no:])
if any(sigma < 0):
return -np.inf

soln = self._problem.evaluate(x[:-self._no])
if np.any(soln < 0):
DavAug marked this conversation as resolved.
Show resolved Hide resolved
return -np.inf

error = np.log(self._values) - np.log(soln)
if self._mean_adjust:
error += 0.5 * sigma**2
return np.sum(- self._logn - self._nt * np.log(sigma)
- np.sum(self._log_values, axis=0)
- np.sum(error**2, axis=0) / (2 * sigma**2))

def evaluateS1(self, x):
""" See :meth:`LogPDF.evaluateS1()`. """
# Calculate log-likelihood and when log-prob is infinite, gradients are
# not defined
L = self.__call__(x)
if L == -np.inf:
return L, np.tile(np.nan, self._n_parameters)

sigma = np.asarray(x[-self._no:])

# Evaluate, and get residuals
y, dy = self._problem.evaluateS1(x[:-self._no])

# Reshape dy, in case we're working with a single-output problem
dy = dy.reshape(self._nt, self._no, self._n_parameters - self._no)

# Note: Must be (np.log(data) - simulation), sign now matters!
ben18785 marked this conversation as resolved.
Show resolved Hide resolved
error = np.log(self._values) - np.log(y)
if self._mean_adjust:
error += 0.5 * sigma**2

# Calculate derivatives in the model parameters
dL = np.sum(
(sigma**(-2.0) * np.sum((error.T * dy.T / y.T).T, axis=0).T).T,
axis=0)

# Calculate derivative wrt sigma
dsigma = -self._nt / sigma + sigma**(-3.0) * np.sum(error**2, axis=0)
if self._mean_adjust:
dsigma -= 1 / sigma * np.sum(error, axis=0)

dL = np.concatenate((dL, np.array(list(dsigma))))

# Return
return L, dL


class MultiplicativeGaussianLogLikelihood(pints.ProblemLogLikelihood):
r"""
Calculates the log-likelihood for a time-series model assuming a
Expand Down
144 changes: 144 additions & 0 deletions pints/tests/test_log_likelihoods.py
Original file line number Diff line number Diff line change
Expand Up @@ -1405,6 +1405,150 @@ def simulate(self, x, times):
self.assertAlmostEqual(log1(0) + log2(0), log3(0))


class TestLogNormalLogLikelihood(unittest.TestCase):

@classmethod
def setUpClass(cls):
# sreate test single output test model
cls.model_single = pints.toy.ConstantModel(1)
cls.model_multiple = pints.toy.ConstantModel(2)
cls.times = [1, 2, 3, 4]
cls.data_single = [3, 4, 5.5, 7.2]
cls.data_multiple = [[3, 1.1],
[4, 3.2],
[5.5, 4.5],
[7.2, 10.1]]
cls.problem_single = pints.SingleOutputProblem(
cls.model_single, cls.times, cls.data_single)
cls.problem_multiple = pints.MultiOutputProblem(
cls.model_multiple, cls.times, cls.data_multiple)
cls.log_likelihood = pints.LogNormalLogLikelihood(cls.problem_single)
cls.log_likelihood_adj = pints.LogNormalLogLikelihood(
cls.problem_single, mean_adjust=True)
cls.log_likelihood_multiple = pints.LogNormalLogLikelihood(
cls.problem_multiple)
cls.log_likelihood_multiple_adj = pints.LogNormalLogLikelihood(
cls.problem_multiple, mean_adjust=True)

def test_bad_constructor(self):
# tests that bad data types result in error
data = [0, 4, 5.5, 7.2]
problem = pints.SingleOutputProblem(
self.model_single, self.times, data)
self.assertRaises(ValueError, pints.LogNormalLogLikelihood,
problem)

def test_call(self):
# test calls of log-likelihood

# single output problem
sigma = 1
mu = 3.7
log_like = self.log_likelihood([mu, sigma])
self.assertAlmostEqual(log_like, -10.164703123713256)
DavAug marked this conversation as resolved.
Show resolved Hide resolved
log_like_adj = self.log_likelihood_adj([mu, sigma])
self.assertAlmostEqual(log_like_adj, -11.129905368437115)

sigma = -1
log_like = self.log_likelihood([mu, sigma])
self.assertEqual(log_like, -np.inf)
log_like = self.log_likelihood([-1, sigma])

mu = -1
sigma = 1
log_like = self.log_likelihood([-1, sigma])
self.assertEqual(log_like, -np.inf)

# two dim output problem
mu1 = 1.5
mu2 = 3.4 / 2
sigma1 = 3
sigma2 = 1.2
log_like = self.log_likelihood_multiple([mu1, mu2, sigma1, sigma2])
self.assertAlmostEqual(log_like, -24.906992140695426)

# adjusts mean
log_like = self.log_likelihood_multiple_adj([mu1, mu2, sigma1, sigma2])
self.assertAlmostEqual(log_like, -32.48791585037583)

sigma1 = -1
log_like = self.log_likelihood_multiple([mu1, mu2, sigma1, sigma2])
self.assertEqual(log_like, -np.inf)

def test_evaluateS1(self):
# tests sensitivity

# single output problem
sigma = 1
mu = 3.7
y, dL = self.log_likelihood.evaluateS1([mu, sigma])
self.assertEqual(len(dL), 2)
y_call = self.log_likelihood([mu, sigma])
self.assertEqual(y, y_call)
correct_vals = [0.2514606728237081, -3.3495735543077423]
for i in range(len(dL)):
self.assertAlmostEqual(dL[i], correct_vals[i])

# mean-adjustment
y, dL = self.log_likelihood_adj.evaluateS1([mu, sigma])
self.assertEqual(len(dL), 2)
y_call = self.log_likelihood_adj([mu, sigma])
self.assertEqual(y, y_call)
correct_vals = [0.7920012133642484, -4.349573554307744]
for i in range(len(dL)):
self.assertAlmostEqual(dL[i], correct_vals[i])

sigma = -1
y, dL = self.log_likelihood.evaluateS1([mu, sigma])
self.assertEqual(y, -np.inf)
for dl in dL:
self.assertTrue(np.isnan(dL[i]))

mu = -1
sigma = 1
y, dL = self.log_likelihood.evaluateS1([mu, sigma])
self.assertEqual(y, -np.inf)
for dl in dL:
self.assertTrue(np.isnan(dL[i]))

# two dim output problem
mu1 = 1.5
mu2 = 3.4 / 2
sigma1 = 3
sigma2 = 1.2
y, dL = self.log_likelihood_multiple.evaluateS1(
[mu1, mu2, sigma1, sigma2])
self.assertEqual(len(dL), 4)
y_call = self.log_likelihood_multiple([mu1, mu2, sigma1, sigma2])
self.assertEqual(y, y_call)
# note that 2x needed for second output due to df / dtheta for
# constant model
correct_vals = [0.33643521004561316, 0.03675900403289047 * 2,
-1.1262529182124121, -1.8628028462558714]
for i in range(len(dL)):
self.assertAlmostEqual(dL[i], correct_vals[i])

# mean-adjustment
y, dL = self.log_likelihood_multiple_adj.evaluateS1(
[mu1, mu2, sigma1, sigma2])
self.assertEqual(len(dL), 4)
y_call = self.log_likelihood_multiple_adj([mu1, mu2, sigma1, sigma2])
self.assertEqual(y, y_call)
# note that 2x needed for second output due to df / dtheta for
# constant model
correct_vals = [1.6697685433789466, 0.6249942981505375 * 2,
-4.126252918212412, -3.062802846255874]
for i in range(len(dL)):
self.assertAlmostEqual(dL[i], correct_vals[i])

sigma2 = -2
y, dL = self.log_likelihood_multiple.evaluateS1(
[mu1, mu2, sigma1, sigma2])
self.assertEqual(y, -np.inf)
for dl in dL:
self.assertTrue(np.isnan(dL[i]))


class TestMultiplicativeGaussianLogLikelihood(unittest.TestCase):

@classmethod
Expand Down