From e3aab90db6e261888ddf855ef3a408b81cbb57ac Mon Sep 17 00:00:00 2001
From: ricardoV94 <ricardo.vieira1994@gmail.com>
Date: Wed, 6 Nov 2024 14:44:18 +0100
Subject: [PATCH] Implement Symmetric and Asymmetric Multivariate Laplace
 distributions

---
 docs/api_reference.rst                        |   2 +
 pymc_experimental/distributions/__init__.py   |   3 +
 .../distributions/multivariate/laplace.py     | 276 ++++++++++++++++++
 .../multivariate/test_laplace.py              | 207 +++++++++++++
 4 files changed, 488 insertions(+)
 create mode 100644 pymc_experimental/distributions/multivariate/laplace.py
 create mode 100644 tests/distributions/multivariate/test_laplace.py

diff --git a/docs/api_reference.rst b/docs/api_reference.rst
index 18ddc24b..fda1ba1f 100644
--- a/docs/api_reference.rst
+++ b/docs/api_reference.rst
@@ -39,6 +39,8 @@ Distributions
    GeneralizedPoisson
    BetaNegativeBinomial
    GenExtreme
+   MvAsymmetricLaplace
+   MvLaplace
    R2D2M2CP
    Skellam
    histogram_approximation
diff --git a/pymc_experimental/distributions/__init__.py b/pymc_experimental/distributions/__init__.py
index f206c246..56b66cea 100644
--- a/pymc_experimental/distributions/__init__.py
+++ b/pymc_experimental/distributions/__init__.py
@@ -24,6 +24,7 @@
     Skellam,
 )
 from pymc_experimental.distributions.histogram_utils import histogram_approximation
+from pymc_experimental.distributions.multivariate.laplace import MvAsymmetricLaplace, MvLaplace
 from pymc_experimental.distributions.multivariate.r2d2m2cp import R2D2M2CP
 from pymc_experimental.distributions.timeseries import DiscreteMarkovChain
 
@@ -32,6 +33,8 @@
     "DiscreteMarkovChain",
     "GeneralizedPoisson",
     "GenExtreme",
+    "MvAsymmetricLaplace",
+    "MvLaplace",
     "R2D2M2CP",
     "Skellam",
     "histogram_approximation",
diff --git a/pymc_experimental/distributions/multivariate/laplace.py b/pymc_experimental/distributions/multivariate/laplace.py
new file mode 100644
index 00000000..30025b4e
--- /dev/null
+++ b/pymc_experimental/distributions/multivariate/laplace.py
@@ -0,0 +1,276 @@
+import numpy as np
+import pytensor.tensor as pt
+import scipy
+
+from pymc.distributions.dist_math import check_parameters
+from pymc.distributions.distribution import Continuous, SymbolicRandomVariable, _support_point
+from pymc.distributions.moments.means import _mean
+from pymc.distributions.multivariate import (
+    _logdet_from_cholesky,
+    nan_lower_cholesky,
+    quaddist_chol,
+    quaddist_matrix,
+    solve_lower,
+)
+from pymc.distributions.shape_utils import implicit_size_from_params, rv_size_is_none
+from pymc.logprob.basic import _logprob
+from pymc.pytensorf import normalize_rng_param
+from pytensor.gradient import grad_not_implemented
+from pytensor.scalar import BinaryScalarOp, upgrade_to_float
+from pytensor.tensor.elemwise import Elemwise
+from pytensor.tensor.random.utils import normalize_size_param
+
+
+class Kv(BinaryScalarOp):
+    """
+    Modified Bessel function of the second kind of real order v.
+    """
+
+    nfunc_spec = ("scipy.special.kv", 2, 1)
+
+    @staticmethod
+    def st_impl(v, x):
+        return scipy.special.kv(v, x)
+
+    def impl(self, v, x):
+        return self.st_impl(v, x)
+
+    def L_op(self, inputs, outputs, output_grads):
+        v, x = inputs
+        [out] = outputs
+        [g_out] = output_grads
+        dx = -(v / x) * out - self.kv(v - 1, x)
+        return [grad_not_implemented(self, 0, v), g_out * dx]
+
+    def c_code(self, *args, **kwargs):
+        raise NotImplementedError()
+
+
+kv = Elemwise(Kv(upgrade_to_float, name="kv"))
+
+
+class MvLaplaceRV(SymbolicRandomVariable):
+    name = "multivariate_laplace"
+    extended_signature = "[rng],[size],(m),(m,m)->[rng],(m)"
+    _print_name = ("MultivariateLaplace", "\\operatorname{MultivariateLaplace}")
+
+    @classmethod
+    def rv_op(cls, mu, cov, *, size=None, rng=None):
+        mu = pt.as_tensor(mu)
+        cov = pt.as_tensor(cov)
+        rng = normalize_rng_param(rng)
+        size = normalize_size_param(size)
+
+        assert mu.type.ndim >= 1
+        assert cov.type.ndim >= 2
+
+        if rv_size_is_none(size):
+            size = implicit_size_from_params(mu, cov, ndims_params=(1, 2))
+
+        next_rng, e = pt.random.exponential(size=size, rng=rng).owner.outputs
+        next_rng, z = pt.random.multivariate_normal(
+            mean=pt.zeros(mu.shape[-1]), cov=cov, size=size, rng=next_rng
+        ).owner.outputs
+        rv = mu + pt.sqrt(e)[..., None] * z
+
+        return cls(
+            inputs=[rng, size, mu, cov],
+            outputs=[next_rng, rv],
+        )(rng, size, mu, cov)
+
+
+class MvLaplace(Continuous):
+    r"""Multivariate (Symmetric) Laplace distribution.
+
+    The pdf of this distribution is
+
+    .. math::
+
+        pdf(x \mid \mu, \Sigma) =
+            \frac{2}{(2\pi)^{k/2} |\Sigma|^{1/2}}
+            ( \frac{(x-\mu)'\Sigma^{-1}(x-mu)}{2} )^{v/2}
+            \K_v (\sqrt{2(x-\mu)' \Sigma^{-1} (x - \mu)}})
+
+    where :math:`v = 1 - k/2` and :math:`\K_v` is the modified Bessel function of the second kind.
+
+    ========  ==========================
+    Support   :math:`x \in \mathbb{R}^k`
+    Mean      :math:`\mu`
+    Variance  :math:`\Sigma`
+    ========  ==========================
+
+    Parameters
+    ----------
+    mu : tensor_like of float
+        Location.
+    cov : tensor_like of float, optional
+        Covariance matrix. Exactly one of cov, tau, or chol is needed.
+    tau : tensor_like of float, optional
+        Precision matrix. Exactly one of cov, tau, or chol is needed.
+    chol : tensor_like of float, optional
+        Cholesky decomposition of covariance matrix. Exactly one of cov,
+        tau, or chol is needed.
+    lower: bool, default=True
+        Whether chol is the lower tridiagonal cholesky factor.
+    """
+
+    rv_type = MvLaplaceRV
+    rv_op = MvLaplaceRV.rv_op
+
+    @classmethod
+    def dist(cls, mu=0, cov=None, *, tau=None, chol=None, lower=True, **kwargs):
+        cov = quaddist_matrix(cov, chol, tau, lower)
+
+        mu = pt.atleast_1d(pt.as_tensor_variable(mu))
+        if mu.type.broadcastable[-1] and not cov.type.broadcastable[-1]:
+            mu, _ = pt.broadcast_arrays(mu, cov[..., -1])
+        return super().dist([mu, cov], **kwargs)
+
+
+class MvAsymmetricLaplaceRV(SymbolicRandomVariable):
+    name = "multivariate_asymmetric_laplace"
+    extended_signature = "[rng],[size],(m),(m,m)->[rng],(m)"
+    _print_name = ("MultivariateAsymmetricLaplace", "\\operatorname{MultivariateAsymmetricLaplace}")
+
+    @classmethod
+    def rv_op(cls, mu, cov, *, size=None, rng=None):
+        mu = pt.as_tensor(mu)
+        cov = pt.as_tensor(cov)
+        rng = normalize_rng_param(rng)
+        size = normalize_size_param(size)
+
+        assert mu.type.ndim >= 1
+        assert cov.type.ndim >= 2
+
+        if rv_size_is_none(size):
+            size = implicit_size_from_params(mu, cov, ndims_params=(1, 2))
+
+        next_rng, e = pt.random.exponential(size=size, rng=rng).owner.outputs
+        next_rng, z = pt.random.multivariate_normal(
+            mean=pt.zeros(mu.shape[-1]), cov=cov, size=size, rng=next_rng
+        ).owner.outputs
+        e = e[..., None]
+        rv = e * mu + pt.sqrt(e) * z
+
+        return cls(
+            inputs=[rng, size, mu, cov],
+            outputs=[next_rng, rv],
+        )(rng, size, mu, cov)
+
+
+class MvAsymmetricLaplace(Continuous):
+    r"""Multivariate Asymmetric Laplace distribution.
+
+    The pdf of this distribution is
+
+    .. math::
+
+        pdf(x \mid \mu, \Sigma) =
+            \frac{2}{(2\pi)^{k/2} |\Sigma|^{1/2}}
+            ( \frac{(x-\mu)'\Sigma^{-1}(x-mu)}{2} )^{v/2}
+            \K_v (\sqrt{2(x-\mu)' \Sigma^{-1} (x - \mu)}})
+
+    where :math:`v = 1 - k/2` and :math:`\K_v` is the modified Bessel function of the second kind.
+
+    ========  ==========================
+    Support   :math:`x \in \mathbb{R}^k`
+    Mean      :math:`\mu`
+    Variance  :math:`\Sigma + \mu' \mu`
+    ========  ==========================
+
+    Parameters
+    ----------
+    mu : tensor_like of float
+        Location.
+    cov : tensor_like of float, optional
+        Covariance matrix. Exactly one of cov, tau, or chol is needed.
+    tau : tensor_like of float, optional
+        Precision matrix. Exactly one of cov, tau, or chol is needed.
+    chol : tensor_like of float, optional
+        Cholesky decomposition of covariance matrix. Exactly one of cov,
+        tau, or chol is needed.
+    lower: bool, default=True
+        Whether chol is the lower tridiagonal cholesky factor.
+    """
+
+    rv_type = MvAsymmetricLaplaceRV
+    rv_op = MvAsymmetricLaplaceRV.rv_op
+
+    @classmethod
+    def dist(cls, mu=0, cov=None, *, tau=None, chol=None, lower=True, **kwargs):
+        cov = quaddist_matrix(cov, chol, tau, lower)
+
+        mu = pt.atleast_1d(pt.as_tensor_variable(mu))
+        if mu.type.broadcastable[-1] and not cov.type.broadcastable[-1]:
+            mu, _ = pt.broadcast_arrays(mu, cov[..., -1])
+        return super().dist([mu, cov], **kwargs)
+
+
+@_logprob.register(MvLaplaceRV)
+def mv_laplace_logp(op, values, rng, size, mu, cov, **kwargs):
+    [value] = values
+    quaddist, logdet, posdef = quaddist_chol(value, mu, cov)
+
+    k = value.shape[-1].astype("floatX")
+    norm = np.log(2) - (k / 2) * np.log(2 * np.pi) - logdet
+
+    v = 1 - (k / 2)
+    kernel = ((v / 2) * pt.log(quaddist / 2)) + pt.log(kv(v, pt.sqrt(2 * quaddist)))
+
+    logp_val = norm + kernel
+    return check_parameters(logp_val, posdef, msg="posdef scale")
+
+
+@_logprob.register(MvAsymmetricLaplaceRV)
+def mv_asymmetric_laplace_logp(op, values, rng, size, mu, cov, **kwargs):
+    [value] = values
+
+    chol_cov = nan_lower_cholesky(cov)
+    logdet, posdef = _logdet_from_cholesky(chol_cov)
+
+    # solve_triangular will raise if there are nans
+    # (which happens if the cholesky fails)
+    chol_cov = pt.switch(posdef[..., None, None], chol_cov, 1)
+
+    solve_x = solve_lower(chol_cov, value, b_ndim=1)
+    solve_mu = solve_lower(chol_cov, mu, b_ndim=1)
+
+    x_quaddist = (solve_x**2).sum(-1)
+    mu_quaddist = (solve_mu**2).sum(-1)
+    x_mu_quaddist = (value * solve_mu).sum(-1)
+
+    k = value.shape[-1].astype("floatX")
+    norm = np.log(2) - (k / 2) * np.log(2 * np.pi) - logdet
+
+    v = 1 - (k / 2)
+    kernel = (
+        x_mu_quaddist
+        + ((v / 2) * (pt.log(x_quaddist) - pt.log(2 + mu_quaddist)))
+        + pt.log(kv(v, pt.sqrt((2 + mu_quaddist) * x_quaddist)))
+    )
+
+    logp_val = norm + kernel
+    return check_parameters(logp_val, posdef, msg="posdef scale")
+
+
+@_mean.register(MvLaplaceRV)
+@_mean.register(MvAsymmetricLaplaceRV)
+def mv_laplace_mean(op, rv, rng, size, mu, cov):
+    if rv_size_is_none(size):
+        bcast_mu, _ = pt.random.utils.broadcast_params([mu, cov], ndims_params=[1, 2])
+    else:
+        bcast_mu = pt.broadcast_to(mu, pt.concatenate([size, [mu.shape[-1]]]))
+    return bcast_mu
+
+
+@_support_point.register(MvLaplaceRV)
+@_support_point.register(MvAsymmetricLaplaceRV)
+def mv_laplace_support_point(op, rv, rng, size, mu, cov):
+    # We have a 0 * inf when value = mu. I assume density is infinite, which isn't a good starting point.
+    point = mu + 1
+    if rv_size_is_none(size):
+        bcast_point, _ = pt.random.utils.broadcast_params([point, cov], ndims_params=[1, 2])
+    else:
+        bcast_shape = pt.concatenate([size, [point.shape[-1]]])
+        bcast_point = pt.broadcast_to(point, bcast_shape)
+    return bcast_point
diff --git a/tests/distributions/multivariate/test_laplace.py b/tests/distributions/multivariate/test_laplace.py
new file mode 100644
index 00000000..be9a4ee2
--- /dev/null
+++ b/tests/distributions/multivariate/test_laplace.py
@@ -0,0 +1,207 @@
+import numpy as np
+import pymc as pm
+import pytensor.tensor as pt
+import pytest
+import scipy
+
+from pymc.testing import assert_support_point_is_expected
+from pytensor.graph.basic import equal_computations
+
+from pymc_experimental.distributions.multivariate.laplace import MvAsymmetricLaplace, MvLaplace
+
+
+@pytest.mark.parametrize("dist", [MvLaplace, MvAsymmetricLaplace])
+def test_params(dist):
+    mu = pt.vector("mu")
+    cov = pt.matrix("cov")
+    chol = pt.matrix("chol")
+    tau = pt.matrix("tau")
+
+    rv = dist.dist(mu=mu, cov=cov)
+    mu_param, cov_param = rv.owner.op.dist_params(rv.owner)
+    assert mu_param is mu
+    assert cov_param is cov
+
+    # Default mu from shape of chol
+    rv = dist.dist(chol=chol)
+    mu_param, cov_param = rv.owner.op.dist_params(rv.owner)
+    cov_expected = chol @ chol.T
+    mu_expected, _ = pt.broadcast_arrays(0, cov_expected[..., -1])
+    assert equal_computations([mu_param, cov_param], [mu_expected, cov_expected])
+
+    # Broadcast mu to chol (upper triangular)
+    rv = dist.dist(mu=mu[0], chol=chol, lower=False)
+    mu_param, cov_param = rv.owner.op.dist_params(rv.owner)
+    # It's a bit silly but we transpose twice when lower=False
+    cov_expected = chol.T @ chol.T.T
+    mu_expected, _ = pt.broadcast_arrays(mu[0], cov_expected[..., -1])
+    assert equal_computations([mu_param, cov_param], [mu_expected, cov_expected])
+
+    # Mu and tau
+    rv = dist.dist(mu=mu, tau=tau)
+    mu_param, cov_param = rv.owner.op.dist_params(rv.owner)
+    assert equal_computations([mu_param, cov_param], [mu, pt.linalg.inv(tau)])
+
+
+@pytest.mark.parametrize(
+    "mu, cov, size, expected",
+    [
+        (np.arange(6).reshape(3, 2), np.eye(2), None, np.arange(1, 7).reshape(3, 2)),
+        (
+            np.arange(2),
+            np.broadcast_to(np.eye(2), (3, 2, 2)),
+            None,
+            np.broadcast_to(np.arange(1, 3), (3, 2)),
+        ),
+        (0, np.eye(3), (4,), np.ones((4, 3))),
+    ],
+)
+@pytest.mark.parametrize("dist", [MvLaplace, MvAsymmetricLaplace])
+def test_support_point(dist, mu, cov, size, expected):
+    with pm.Model() as model:
+        dist("x", mu=mu, cov=cov, size=size)
+    assert_support_point_is_expected(model, expected)
+
+
+@pytest.mark.parametrize("dist", [MvLaplace, MvAsymmetricLaplace])
+def test_mean(dist):
+    mu = [np.pi, np.e]
+
+    # Explicit size
+    rv = dist.dist(mu=mu, chol=np.eye(2), size=(3,))
+    mean = pm.distributions.moments.mean(rv)
+    np.testing.assert_allclose(mean.eval(mode="FAST_COMPILE"), np.broadcast_to(mu, (3, 2)))
+
+    # Implicit size from cov
+    rv = dist.dist(mu=mu, cov=np.broadcast_to(np.eye(2), (4, 2, 2)))
+    mean = pm.distributions.moments.mean(rv)
+    np.testing.assert_allclose(mean.eval(mode="FAST_COMPILE"), np.broadcast_to(mu, (4, 2)))
+
+
+@pytest.mark.parametrize("dist", [MvLaplace, MvAsymmetricLaplace])
+def test_random(dist):
+    mu = [-1, np.pi, 1]
+    cov = [[1, 0.5, 0.25], [0.5, 2, 0.5], [0.25, 0.5, 3]]
+    rv = dist.dist(mu=mu, cov=cov, size=10_000)
+
+    samples = pm.draw(rv, random_seed=13)
+    assert samples.shape == (10_000, 3)
+    np.testing.assert_allclose(np.mean(samples, axis=0), mu, rtol=0.05)
+
+    expected_cov = cov if dist is MvLaplace else cov + np.outer(mu, mu)
+    np.testing.assert_allclose(np.cov(samples, rowvar=False), expected_cov, rtol=0.1)
+
+
+def test_symmetric_matches_univariate_logp():
+    # Test MvLaplace matches Univariate Laplace when there's a single entry
+    mean = 1.0
+    scale = 2.0
+    # Variance of Laplace is 2 * scale ** 2
+    rv = MvLaplace.dist(mu=[mean], cov=[[2 * scale**2]])
+    ref_rv = pm.Laplace.dist(mu=mean, b=scale)
+
+    test_val = np.random.normal(size=(3, 1))
+    rv_logp = pm.logp(rv, test_val).eval()
+    ref_logp = pm.logp(ref_rv, test_val).squeeze(-1).eval()
+    np.testing.assert_allclose(rv_logp, ref_logp)
+
+
+@pytest.mark.xfail(reason="Not sure about equivalence. Test fails")
+def test_asymmetric_matches_univariate_logp():
+    # Test MvAsymmetricLaplace matches Univariate AsymmetricLaplace when there's a single entry
+    k = 2.0
+    # From wikipedia: https://en.wikipedia.org/wiki/Asymmetric_Laplace_distribution
+    mean = (1 - k**2) / k
+    var = ((1 + k**4) / (k**2)) - mean**2
+    print(mean, var)
+    rv = MvAsymmetricLaplace.dist(mu=[mean], cov=[[var]])
+    ref_rv = pm.AsymmetricLaplace.dist(kappa=k, mu=0, b=1)
+
+    test_val = np.random.normal(size=(3, 1))
+    rv_logp = pm.logp(rv, test_val).eval()
+    ref_logp = pm.logp(ref_rv, test_val).squeeze(-1).eval()
+    np.testing.assert_allclose(rv_logp, ref_logp)
+
+
+def test_asymmetric_matches_symmetric_logp():
+    # Test it matches with the symmetric case when mu = 0
+    mu = np.zeros(2)
+    tau = np.linalg.inv(np.array([[1, -0.5], [-0.5, 2]]))
+    rv = MvAsymmetricLaplace.dist(mu=mu, tau=tau)
+    ref_rv = MvLaplace.dist(mu=mu, tau=tau)
+
+    rv_val = np.random.normal(size=(3, 2))
+    logp_eval = pm.logp(rv, rv_val).eval()
+    logp_expected = pm.logp(ref_rv, rv_val).eval()
+    np.testing.assert_allclose(logp_eval, logp_expected)
+
+
+def test_symmetric_logp():
+    # Testing against simple bivariate cases described in:
+    # https://en.wikipedia.org/wiki/Multivariate_Laplace_distribution#Probability_density_function
+
+    # Zero mean, non-identity covariance case
+    mu = np.zeros(2)
+    s1 = 0.5
+    s2 = 2.0
+    r = -0.25
+    cov = np.array(
+        [
+            [s1**2, r * s1 * s2],
+            [r * s1 * s2, s2**2],
+        ]
+    )
+    rv = MvLaplace.dist(mu=mu, cov=cov)
+    rv_val = np.random.normal(size=(2,))
+    logp_eval = pm.logp(rv, rv_val).eval()
+
+    x1, x2 = rv_val
+    logp_expected = np.log(
+        (1 / (np.pi * s1 * s2 * np.sqrt(1 - r**2)))
+        * scipy.special.kv(
+            0,
+            np.sqrt(
+                (2 * ((x1**2 / s1**2) - (2 * r * x1 * x2 / (s1 * s2)) + (x2**2 / s2**2)))
+                / (1 - r**2)
+            ),
+        )
+    )
+    np.testing.assert_allclose(
+        logp_eval,
+        logp_expected,
+    )
+
+    # Non zero mean, identity covariance case
+    mu = np.array([1, 3])
+    rv = MvLaplace.dist(mu=mu, cov=np.eye(2))
+    rv_val = np.random.normal(size=(2,))
+    logp_eval = pm.logp(rv, rv_val).eval()
+
+    logp_expected = np.log(1 / np.pi * scipy.special.kv(0, np.sqrt(2 * np.sum((rv_val - mu) ** 2))))
+    np.testing.assert_allclose(
+        logp_eval,
+        logp_expected,
+    )
+
+
+def test_asymmetric_logp():
+    # Testing against trivial univariate case
+    mu = 0.5
+    cov = np.array([[1.0]])
+    rv = MvAsymmetricLaplace.dist(mu=mu, cov=cov)
+    rv_val = np.random.normal(size=(1,))
+    logp_eval = pm.logp(rv, rv_val).eval()
+
+    [x] = rv_val
+    logp_expected = np.log(
+        ((2 * np.exp(x * mu)) / np.sqrt(2 * np.pi))
+        * np.power(x**2 / (2 + mu**2), 1 / 4)
+        * scipy.special.kv(
+            1 / 2,
+            np.sqrt((2 + mu**2) * x**2),
+        )
+    )
+    np.testing.assert_allclose(
+        logp_eval,
+        logp_expected,
+    )