PyLops · mrava87 · Jun 13, 2022 · Jun 2, 2022 · Jun 2, 2022 · Jun 2, 2022
diff --git a/docs/source/adding.rst b/docs/source/adding.rst
@@ -156,7 +156,7 @@ Once the operator has been created, we can add it to the documentation of PyProx
 the operator within the ``index.rst`` file in ``docs/source/api`` directory.
 
 Moreover, in order to facilitate the user of your operator by other users, a simple example should be provided as part of the
-Sphinx-gallery of the documentation of the PyProximal library. The directory ``examples`` containes several scripts that
+Sphinx-gallery of the documentation of the PyProximal library. The directory ``examples`` contains several scripts that
 can be used as template.
 
 

diff --git a/docs/source/api/index.rst b/docs/source/api/index.rst
@@ -75,6 +75,7 @@ Convex
     Orthogonal
     Quadratic
     Simplex
+    TV
 
 
 Non-Convex
@@ -140,6 +141,8 @@ Primal
     HQS
     LinearizedADMM
     ProximalGradient
+    AcceleratedProximalGradient
+    GeneralizedProximalGradient
     ProximalPoint
     TwIST
 

diff --git a/examples/plot_norms.py b/examples/plot_norms.py
@@ -123,4 +123,22 @@
 plt.xlabel('x')
 plt.title(r'$||x||_1$')
 plt.legend()
+plt.tight_layout()
+
+###############################################################################
+# We consider now the TV norm. 
+TV = pyproximal.TV(dim=1, sigma=1.)
+
+x = np.arange(-1, 1, 0.1)
+print('||x||_{TV}: ', l1(x))
+
+tau = 0.5
+xp = TV.prox(x, tau)
+
+plt.figure(figsize=(7, 2))
+plt.plot(x, x, 'k', lw=2, label='x')
+plt.plot(x, xp, 'r', lw=2, label='prox(x)')
+plt.xlabel('x')
+plt.title(r'$||x||_{TV}$')
+plt.legend()
 plt.tight_layout()
diff --git a/pyproximal/optimization/primal.py b/pyproximal/optimization/primal.py
@@ -1,4 +1,5 @@
 import time
+import warnings
 import numpy as np
 
 from math import sqrt
@@ -101,11 +102,12 @@ def ProximalPoint(prox, x0, tau, niter=10, callback=None, show=False):
 
 def ProximalGradient(proxf, proxg, x0, tau=None, beta=0.5,
                      epsg=1., niter=10, niterback=100,
+                     acceleration=None,
                      callback=None, show=False):
-    r"""Proximal gradient
+    r"""Proximal gradient (optionnally accelerated)
 
-    Solves the following minimization problem using Proximal gradient
-    algorithm:
+    Solves the following minimization problem using (Accelerated) Proximal
+    gradient algorithm:
 
     .. math::
 
@@ -130,14 +132,16 @@ def ProximalGradient(proxf, proxg, x0, tau=None, beta=0.5,
         backtracking is used to adaptively estimate the best tau at each
         iteration. Finally note that :math:`\tau` can be chosen to be a vector
         when dealing with problems with multiple right-hand-sides
-    beta : obj:`float`, optional
+    beta : :obj:`float`, optional
         Backtracking parameter (must be between 0 and 1)
     epsg : :obj:`float` or :obj:`np.ndarray`, optional
         Scaling factor of g function
     niter : :obj:`int`, optional
         Number of iterations of iterative scheme
     niterback : :obj:`int`, optional
         Max number of iterations of backtracking
+    acceleration:  :obj:`str`, optional
+        Acceleration (``None``, ``vandenberghe`` or ``fista``)
     callback : :obj:`callable`, optional
         Function with signature (``callback(x)``) to call after each iteration
         where ``x`` is the current model vector
@@ -151,12 +155,15 @@ def ProximalGradient(proxf, proxg, x0, tau=None, beta=0.5,
 
     Notes
     -----
-    The Proximal point algorithm can be expressed by the following recursion:
+    The (Accelerated) Proximal point algorithm can be expressed by the
+    following recursion:
 
     .. math::
 
-        \mathbf{x}^{k+1} = \prox_{\tau^k \epsilon g}(\mathbf{x}^k -
-        \tau^k \nabla f(\mathbf{x}^k))
+        \mathbf{y}^{k+1} = \mathbf{x}^k + \omega^k
+        (\mathbf{x}^k - \mathbf{x}^{k-1})
+        \mathbf{x}^{k+1} = \prox_{\tau^k \epsilon g}(\mathbf{y}^{k+1}  -
+        \tau^k \nabla f(\mathbf{y}^{k+1})) \\
 
     where at each iteration :math:`\tau^k` can be estimated by back-tracking
     as follows:
@@ -173,7 +180,17 @@ def ProximalGradient(proxf, proxg, x0, tau=None, beta=0.5,
 
     where :math:`\tilde{f}_\tau(\mathbf{x}, \mathbf{y}) = f(\mathbf{y}) +
     \nabla f(\mathbf{y})^T (\mathbf{x} - \mathbf{y}) +
-    1/(2\tau)||\mathbf{x} - \mathbf{y}||_2^2`.
+    1/(2\tau)||\mathbf{x} - \mathbf{y}||_2^2`,
+    and
+    :math:`\omega^k = 0` for ``acceleration=None``,
+    :math:`\omega^k = k / (k + 3)` for ``acceleration=vandenberghe`` [1]_
+    or :math:`\omega^k = (t_{k-1}-1)/t_k` for ``acceleration=fista`` where
+    :math:`t_k = (1 + \sqrt{1+4t_{k-1}^{2}}) / 2` [2]_
+
+    .. [1] Vandenberghe, L., "Fast proximal gradient methods", 2010.
+    .. [2] Beck, A., and Teboulle, M. "A Fast Iterative Shrinkage-Thresholding
+       Algorithm for Linear Inverse Problems", SIAM Journal on
+       Imaging Sciences, vol. 2, pp. 183-202. 2009.
 
     """
     # check if epgs is a ve
@@ -182,9 +199,12 @@ def ProximalGradient(proxf, proxg, x0, tau=None, beta=0.5,
     else:
         epsg_print = 'Multi'
 
+    if acceleration not in [None, 'None', 'vandenberghe', 'fista']:
+        raise NotImplementedError('Acceleration should be None, vandenberghe '
+                                  'or fista')
     if show:
         tstart = time.time()
-        print('Proximal Gradient\n'
+        print('Accelerated Proximal Gradient\n'
               '---------------------------------------------------------\n'
               'Proximal operator (f): %s\n'
               'Proximal operator (g): %s\n'
@@ -201,13 +221,32 @@ def ProximalGradient(proxf, proxg, x0, tau=None, beta=0.5,
         backtracking = True
         tau = 1.
 
+    # initialize model
+    t = 1.
     x = x0.copy()
+    y = x.copy()
+
+    # iterate
     for iiter in range(niter):
+        xold = x.copy()
+
+        # proximal step
         if not backtracking:
-            x = proxg.prox(x - tau * proxf.grad(x), epsg * tau)
+            x = proxg.prox(y - tau * proxf.grad(y), epsg * tau)
         else:
-            x, tau = _backtracking(x, tau, proxf, proxg, epsg,
+            x, tau = _backtracking(y, tau, proxf, proxg, epsg,
                                    beta=beta, niterback=niterback)
+
+        # update y
+        if acceleration == 'vandenberghe':
+            omega = iiter / (iiter + 3)
+        elif acceleration == 'fista':
+            told = t
+            t = (1. + np.sqrt(1. + 4. * t ** 2)) / 2.
+            omega = ((told - 1.) / t)
+        else:
+            omega = 0
+        y = x + omega * (x - xold)
 
         # run callback
         if callback is not None:
@@ -233,40 +272,57 @@ def AcceleratedProximalGradient(proxf, proxg, x0, tau=None, beta=0.5,
                                 callback=None, show=False):
     r"""Accelerated Proximal gradient
 
-    Solves the following minimization problem using Accelerated Proximal
+    This is a thin wrapper around :func:`pyproximal.optimization.primal.ProximalGradient` with
+    ``vandenberghe`` or ``fista``acceleration. See :func:`pyproximal.optimization.primal.ProximalGradient`
+    for details.
+
+    """
+    warnings.warn('AcceleratedProximalGradient has been integrated directly into ProximalGradient '
+                  'from v0.5.0. It is recommended to start using ProximalGradient by selecting the '
+                  'appropriate acceleration parameter as this behaviour will become default in '
+                  'version v1.0.0 and AcceleratedProximalGradient will be removed.', FutureWarning)
+    return ProximalGradient(proxf, proxg, x0, tau=tau, beta=beta,
+                            epsg=epsg, niter=niter, niterback=niterback,
+                            acceleration=acceleration,
+                            callback=callback, show=show)
+
+
+def GeneralizedProximalGradient(proxfs, proxgs, x0, tau=None,
+                                epsg=1., niter=10,
+                                acceleration=None,
+                                callback=None, show=False):
+    r"""Generalized Proximal gradient
+
+    Solves the following minimization problem using Generalized Proximal
     gradient algorithm:
 
     .. math::
 
-        \mathbf{x} = \argmin_\mathbf{x} f(\mathbf{x}) + \epsilon g(\mathbf{x})
+        \mathbf{x} = \argmin_\mathbf{x} \sum_{i=1}^n f_i(\mathbf{x}) 
+        + \sum_{j=1}^m \tau_j g_j(\mathbf{x}),~~n,m \in \mathbb{N}^+
 
-    where :math:`f(\mathbf{x})` is a smooth convex function with a uniquely
-    defined gradient and :math:`g(\mathbf{x})` is any convex function that
-    has a known proximal operator.
+    where the :math:`f_i(\mathbf{x})` are smooth convex functions with a uniquely
+    defined gradient and the :math:`g_j(\mathbf{x})` are any convex function that
+    have a known proximal operator.
 
     Parameters
     ----------
-    proxf : :obj:`pyproximal.ProxOperator`
-        Proximal operator of f function (must have ``grad`` implemented)
-    proxg : :obj:`pyproximal.ProxOperator`
-        Proximal operator of g function
+    proxfs : :obj:`List of pyproximal.ProxOperator`
+        Proximal operators of the :math:`f_i` functions (must have ``grad`` implemented)
+    proxgs : :obj:`List of pyproximal.ProxOperator`
+        Proximal operators of the :math:`g_j` functions
     x0 : :obj:`numpy.ndarray`
         Initial vector
     tau : :obj:`float` or :obj:`numpy.ndarray`, optional
         Positive scalar weight, which should satisfy the following condition
         to guarantees convergence: :math:`\tau  \in (0, 1/L]` where ``L`` is
-        the Lipschitz constant of :math:`\nabla f`. When ``tau=None``,
+        the Lipschitz constant of :math:`\sum_{i=1}^n \nabla f_i`. When ``tau=None``,
         backtracking is used to adaptively estimate the best tau at each
-        iteration. Finally note that :math:`\tau` can be chosen to be a vector
-        when dealing with problems with multiple right-hand-sides
-    beta : obj:`float`, optional
-        Backtracking parameter (must be between 0 and 1)
+        iteration.
     epsg : :obj:`float` or :obj:`np.ndarray`, optional
         Scaling factor of g function
     niter : :obj:`int`, optional
         Number of iterations of iterative scheme
-    niterback : :obj:`int`, optional
-        Max number of iterations of backtracking
     acceleration:  :obj:`str`, optional
         Acceleration (``vandenberghe`` or ``fista``)
     callback : :obj:`callable`, optional
@@ -282,76 +338,75 @@ def AcceleratedProximalGradient(proxf, proxg, x0, tau=None, beta=0.5,
 
     Notes
     -----
-    The Accelerated Proximal point algorithm can be expressed by the
+    The Generalized Proximal point algorithm can be expressed by the
     following recursion:
 
     .. math::
-
-        \mathbf{x}^{k+1} = \prox_{\tau^k \epsilon g}(\mathbf{y}^{k+1} -
-        \tau^k \nabla f(\mathbf{y}^{k+1})) \\
-        \mathbf{y}^{k+1} = \mathbf{x}^k + \omega^k
-        (\mathbf{x}^k - \mathbf{x}^{k-1})
-
-    where :math:`\omega^k = k / (k + 3)` for ``acceleration=vandenberghe`` [1]_
-    or :math:`\omega^k = (t_{k-1}-1)/t_k` for ``acceleration=fista`` where
-    :math:`t_k = (1 + \sqrt{1+4t_{k-1}^{2}}) / 2` [2]_
-
-    .. [1] Vandenberghe, L., "Fast proximal gradient methods", 2010.
-    .. [2] Beck, A., and Teboulle, M. "A Fast Iterative Shrinkage-Thresholding
-       Algorithm for Linear Inverse Problems", SIAM Journal on
-       Imaging Sciences, vol. 2, pp. 183-202. 2009.
-
+        \text{for } j=1,\cdots,n, \\
+        ~~~~\mathbf z_j^{k+1} = \mathbf z_j^{k} + \eta_k (prox_{\frac{\tau^k}{\omega_j} g_j}(2 \mathbf{x}^{k} - z_j^{k}) 
+        - \tau^k \sum_{i=1}^n \nabla f_i(\mathbf{x}^{k})) - \mathbf{x}^{k} \\
+        \mathbf{x}^{k+1} = \sum_{j=1}^n \omega_j f_j \\
+
+    where :math:`\sum_{j=1}^n \omega_j=1`. 
     """
-    # check if epgs is a ve
+    # check if epgs is a vector
     if np.asarray(epsg).size == 1.:
         epsg_print = str(epsg)
     else:
         epsg_print = 'Multi'
 
-    if acceleration not in ['vandenberghe', 'fista']:
-        raise NotImplementedError('Acceleration should be vandenberghe '
+    if acceleration not in [None, 'None', 'vandenberghe', 'fista']:
+        raise NotImplementedError('Acceleration should be None, vandenberghe '
                                   'or fista')
     if show:
         tstart = time.time()
-        print('Accelerated Proximal Gradient\n'
+        print('Generalized Proximal Gradient\n'
               '---------------------------------------------------------\n'
-              'Proximal operator (f): %s\n'
-              'Proximal operator (g): %s\n'
-              'tau = %10e\tepsg = %s\tniter = %d\n' % (type(proxf),
-                                                       type(proxg),
-                                                       0 if tau is None else tau,
+              'Proximal operators (f): %s\n'
+              'Proximal operators (g): %s\n'
+              'tau = %10e\nepsg = %s\tniter = %d\n' % ([type(proxf) for proxf in proxfs],
+                                                         [type(proxg) for proxg in proxgs],
+                                                         0 if tau is None else tau,
                                                          epsg_print, niter))
         head = '   Itn       x[0]          f           g       J=f+eps*g'
         print(head)
 
-    backtracking = False
     if tau is None:
-        backtracking = True
         tau = 1.
 
     # initialize model
     t = 1.
     x = x0.copy()
     y = x.copy()
+    zs = [x.copy() for _ in range(len(proxgs))]
 
     # iterate
     for iiter in range(niter):
         xold = x.copy()
 
         # proximal step
-        if not backtracking:
-            x = proxg.prox(y - tau * proxf.grad(y), epsg * tau)
-        else:
-            x, tau = _backtracking(y, tau, proxf, proxg, epsg,
-                                   beta=beta, niterback=niterback)
+        grad = np.zeros_like(x)
+        for i, proxf in enumerate(proxfs):
+            grad += proxf.grad(x)
+
+        sol = np.zeros_like(x)
+        for i, proxg in enumerate(proxgs):
+            tmp = 2 * y - zs[i] - tau * grad
+            tmp[:] = proxg.prox(tmp, tau *len(proxgs) )
+            zs[i] += epsg * (tmp - y)
+            sol += zs[i] / len(proxgs)
+        x[:] = sol.copy()
 
         # update y
         if acceleration == 'vandenberghe':
             omega = iiter / (iiter + 3)
-        else:
+        elif acceleration== 'fista':
             told = t
             t = (1. + np.sqrt(1. + 4. * t ** 2)) / 2.
             omega = ((told - 1.) / t)
+        else:
+            omega = 0
+
         y = x + omega * (x - xold)
 
         # run callback
@@ -360,7 +415,7 @@ def AcceleratedProximalGradient(proxf, proxg, x0, tau=None, beta=0.5,
 
         if show:
             if iiter < 10 or niter - iiter < 10 or iiter % (niter // 10) == 0:
-                pf, pg = proxf(x), proxg(x)
+                pf, pg = np.sum([proxf(x) for proxf in proxfs]), np.sum([proxg(x) for proxg in proxgs])
                 msg = '%6g  %12.5e  %10.3e  %10.3e  %10.3e' % \
                       (iiter + 1, x[0] if x.ndim == 1 else x[0, 0],
                        pf, pg[0] if epsg_print == 'Multi' else pg,

diff --git a/pyproximal/proximal/L21.py b/pyproximal/proximal/L21.py
@@ -11,7 +11,7 @@ class L21(ProxOperator):
 
     Parameters
     ----------
-    ndims : :obj:`int`
+    ndim : :obj:`int`
         Number of dimensions :math:`N_{dim}`. Used to reshape the input array
         in a matrix of size :math:`N_{dim} \times N'_{x}` where
         :math:`N'_x = \frac{N_x}{N_{dim}}`. Note that the input