From 27ebea055ac060e30be8dacfb9e8c4429440d97c Mon Sep 17 00:00:00 2001
From: Ronny Bergmann <git@ronnybergmann.net>
Date: Mon, 16 Dec 2024 06:15:33 +0100
Subject: [PATCH] Remove the geodesic regression tutorial. (#430)

* Remove the geodesic regression tutorial.
* add changelog entry.
* Remove menu entry.
---
 Changelog.md                             |   6 +
 docs/make.jl                             |   1 -
 docs/src/tutorials/GeodesicRegression.md | 520 ----------------------
 src/documentation_glossary.jl            |   2 +-
 tutorials/GeodesicRegression.qmd         | 531 -----------------------
 5 files changed, 7 insertions(+), 1053 deletions(-)
 delete mode 100644 docs/src/tutorials/GeodesicRegression.md
 delete mode 100644 tutorials/GeodesicRegression.qmd

diff --git a/Changelog.md b/Changelog.md
index ec1943e8b2..14163c14a5 100644
--- a/Changelog.md
+++ b/Changelog.md
@@ -5,6 +5,12 @@ All notable Changes to the Julia package `Manopt.jl` will be documented in this
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.5.5] unreleased
+
+### Removed
+
+* The geodesic regression example, first because it is not correct, second because it should become part of ManoptExamples.jl once it is correct.
+
 ## [0.5.4] - December 11, 2024
 
 ### Added
diff --git a/docs/make.jl b/docs/make.jl
index 6167cc6593..fb8fb85f27 100755
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -45,7 +45,6 @@ tutorials_menu =
         "Implement a solver" => "tutorials/ImplementASolver.md",
         "Optimize on your own manifold" => "tutorials/ImplementOwnManifold.md",
         "Do constrained optimization" => "tutorials/ConstrainedOptimization.md",
-        "Do geodesic regression" => "tutorials/GeodesicRegression.md",
     ]
 # Check whether all tutorials are rendered, issue a warning if not (and quarto if not set)
 all_tutorials_exist = true
diff --git a/docs/src/tutorials/GeodesicRegression.md b/docs/src/tutorials/GeodesicRegression.md
deleted file mode 100644
index ec1ff1e61d..0000000000
--- a/docs/src/tutorials/GeodesicRegression.md
+++ /dev/null
@@ -1,520 +0,0 @@
-# How to perform Geodesic Regression
-Ronny Bergmann
-
-Geodesic regression generalizes [linear regression](https://en.wikipedia.org/wiki/Linear_regression)
-to Riemannian manifolds. Let’s first phrase it informally as follows:
-
-> For given data points $d_1,\ldots,d_n$ on a Riemannian manifold $\mathcal M$, find
-> the geodesic that “best explains” the data.
-
-The meaning of “best explain” has still to be clarified. We distinguish two cases: time labelled data and unlabelled data
-
-``` julia
-    using Manopt, ManifoldDiff, Manifolds, Random, Colors
-    using LinearAlgebra: svd
-    Random.seed!(42);
-```
-
-We use the following data, where we want to highlight one of the points.
-
-``` julia
-n = 7
-σ = π / 8
-S = Sphere(2)
-base = 1 / sqrt(2) * [1.0, 0.0, 1.0]
-dir = [-0.75, 0.5, 0.75]
-data_orig = [exp(S, base, dir, t) for t in range(-0.5, 0.5; length=n)]
-# add noise to the points on the geodesic
-data = map(p -> exp(S, p, rand(S; vector_at=p, σ=σ)), data_orig)
-highlighted = 4;
-```
-
-![The given data](img/regression/regression_data.png)
-
-## Time Labeled Data
-
-If for each data item $d_i$ we are also given a time point $t_i\in\mathbb R$, which are pairwise different,
-then we can use the least squares error to state the objective function as [Fletcher:2013](@cite)
-
-``` math
-F(p,X) = \frac{1}{2}\sum_{i=1}^n d_{\mathcal M}^2(γ_{p,X}(t_i), d_i),
-```
-
-where $d_{\mathcal M}$ is the Riemannian distance and $γ_{p,X}$ is the geodesic
-with $γ(0) = p$ and $\dot\gamma(0) = X$.
-
-For the real-valued case $\mathcal M = \mathbb R^m$ the solution $(p^*, X^*)$ is given in closed form
-as follows: with $d^* = \frac{1}{n}\displaystyle\sum_{i=1}^{n}d_i$ and $t^* = \frac{1}{n}\displaystyle\sum_{i=1}^n t_i$
-we get
-
-``` math
- X^* = \frac{\sum_{i=1}^n (d_i-d^*)(t-t^*)}{\sum_{i=1}^n (t_i-t^*)^2}
-\quad\text{ and }\quad
-p^* = d^* - t^*X^*
-```
-
-and hence the linear regression result is the line $γ_{p^*,X^*}(t) = p^* + tX^*$.
-
-On a Riemannian manifold we can phrase this as an optimization problem on the [tangent bundle](https://en.wikipedia.org/wiki/Tangent_bundle), which is
-the disjoint union of all tangent spaces, as
-
-``` math
-\operatorname*{arg\,min}_{(p,X) \in \mathrm{T}\mathcal M} F(p,X)
-```
-
-Due to linearity, the gradient of $F(p,X)$ is the sum of the single gradients of
-
-``` math
- \frac{1}{2}d_{\mathcal M}^2\bigl(γ_{p,X}(t_i),d_i\bigr)
- = \frac{1}{2}d_{\mathcal M}^2\bigl(\exp_p(t_iX),d_i\bigr)
- ,\quad i∈\{1,\ldots,n\}
-```
-
-which can be computed using a chain rule of the squared distance and the exponential map,
-see for example [BergmannGousenbourger:2018](@cite) for details or Equations (7) and (8) of [Fletcher:2013](@cite):
-
-``` julia
-M = TangentBundle(S)
-struct RegressionCost{T,S}
-    data::T
-    times::S
-end
-RegressionCost(data::T, times::S) where {T,S} = RegressionCost{T,S}(data, times)
-function (a::RegressionCost)(M, x)
-    pts = [geodesic(M.manifold, x[M, :point], x[M, :vector], ti) for ti in a.times]
-    return 1 / 2 * sum(distance.(Ref(M.manifold), pts, a.data) .^ 2)
-end
-struct RegressionGradient!{T,S}
-    data::T
-    times::S
-end
-function RegressionGradient!(data::T, times::S) where {T,S}
-    return RegressionGradient!{T,S}(data, times)
-end
-function (a::RegressionGradient!)(M, Y, x)
-    pts = [geodesic(M.manifold, x[M, :point], x[M, :vector], ti) for ti in a.times]
-    gradients = grad_distance.(Ref(M.manifold), a.data, pts)
-    Y[M, :point] .= sum(
-        ManifoldDiff.adjoint_differential_exp_basepoint.(
-            Ref(M.manifold),
-            Ref(x[M, :point]),
-            [ti * x[M, :vector] for ti in a.times],
-            gradients,
-        ),
-    )
-    Y[M, :vector] .= sum(
-        ManifoldDiff.adjoint_differential_exp_argument.(
-            Ref(M.manifold),
-            Ref(x[M, :point]),
-            [ti * x[M, :vector] for ti in a.times],
-            gradients,
-        ),
-    )
-    return Y
-end
-```
-
-For the Euclidean case, the result is given by the first principal component of a principal component analysis,
-see [PCR](https://en.wikipedia.org/wiki/Principal_component_regression) which is given by $p^* = \frac{1}{n}\displaystyle\sum_{i=1}^n d_i$ and
-the direction $X^*$ is obtained by defining the zero mean data matrix
-
-``` math
-D = \bigl(d_1-p^*, \ldots, d_n-p^*\bigr) \in \mathbb R^{m,n}
-```
-
-and taking $X^*$ as an eigenvector to the largest eigenvalue of $D^{\mathrm{T}}D$.
-
-We can do something similar, when considering the tangent space at the (Riemannian) mean
-of the data and then do a PCA on the coordinate coefficients with respect to a basis.
-
-``` julia
-m = mean(S, data)
-A = hcat(
-    map(x -> get_coordinates(S, m, log(S, m, x), DefaultOrthonormalBasis()), data)...
-)
-pca1 = get_vector(S, m, svd(A).U[:, 1], DefaultOrthonormalBasis())
-x0 = ArrayPartition(m, pca1)
-```
-
-    ([0.6998621681746481, -0.013681674945026638, 0.7141468737791822], [0.5931302057517893, -0.5459465115717783, -0.5917254139611094])
-
-The optimal “time labels” are then just the projections $t_i = ⟨d_i,X^*⟩$, $i=1,\ldots,n$.
-
-``` julia
-t = map(d -> inner(S, m, pca1, log(S, m, d)), data)
-```
-
-    7-element Vector{Float64}:
-      1.0763904949888323
-      0.4594060193318443
-     -0.5030195874833682
-      0.02135686940521725
-     -0.6158692507563633
-     -0.24431652575028764
-     -0.2259012492666664
-
-And we can call the gradient descent. Note that since `gradF!` works in place of `Y`, we have to set the
-`evaluation` type accordingly.
-
-``` julia
-y = gradient_descent(
-    M,
-    RegressionCost(data, t),
-    RegressionGradient!(data, t),
-    x0;
-    evaluation=InplaceEvaluation(),
-    stepsize=ArmijoLinesearch(
-        M;
-        initial_stepsize=1.0,
-        contraction_factor=0.990,
-        sufficient_decrease=0.05,
-        stop_when_stepsize_less=1e-9,
-    ),
-    stopping_criterion=StopAfterIteration(200) |
-                        StopWhenGradientNormLess(1e-8) |
-                        StopWhenStepsizeLess(1e-9),
-    debug=[:Iteration, " | ", :Cost, "\n", :Stop, 50],
-)
-```
-
-    Initial  | f(x): 0.142862
-    # 50     | f(x): 0.141113
-    # 100    | f(x): 0.141113
-    # 150    | f(x): 0.141113
-    # 200    | f(x): 0.141113
-    The algorithm reached its maximal number of iterations (200).
-
-    ([0.7119768725361988, 0.009463059143003981, 0.7021391482357537], [0.590008151835008, -0.5543272518659472, -0.5908038715512287])
-
-For the result, we can generate and plot all involved geodesics
-
-``` julia
-dense_t = range(-0.5, 0.5; length=100)
-geo = geodesic(S, y[M, :point], y[M, :vector], dense_t)
-init_geo = geodesic(S, x0[M, :point], x0[M, :vector], dense_t)
-geo_pts = geodesic(S, y[M, :point], y[M, :vector], t)
-geo_conn_highlighted = shortest_geodesic(
-    S, data[highlighted], geo_pts[highlighted], 0.5 .+ dense_t
-);
-```
-
-![Result of Geodesic Regression](img/regression/regression_result1.png)
-
-In this image, together with the blue data points, you see the geodesic of the initialization in black
-(evaluated on $[-\frac{1}{2},\frac{1}{2}]$),
-the final point on the tangent bundle in orange, as well as the resulting regression geodesic in teal,
-(on the same interval as the start) as well as small teal points indicating the time points on the geodesic corresponding to the data.
-Additionally, a thin blue line indicates the geodesic between a data point and its corresponding data point on the geodesic.
-While this would be the closest point in Euclidean space and hence the two directions (along the geodesic vs. to the data point) orthogonal, here we have
-
-``` julia
-inner(
-    S,
-    geo_pts[highlighted],
-    log(S, geo_pts[highlighted], geo_pts[highlighted + 1]),
-    log(S, geo_pts[highlighted], data[highlighted]),
-)
-```
-
-    0.002487393068917863
-
-But we also started with one of the best scenarios of equally spaced points on a geodesic obstructed by noise.
-
-This gets worse if you start with less evenly distributed data
-
-``` julia
-data2 = [exp(S, base, dir, t) for t in [-0.5, -0.49, -0.48, 0.1, 0.48, 0.49, 0.5]]
-data2 = map(p -> exp(S, p, rand(S; vector_at=p, σ=σ / 2)), data2)
-m2 = mean(S, data2)
-A2 = hcat(
-    map(x -> get_coordinates(S, m, log(S, m, x), DefaultOrthonormalBasis()), data2)...
-)
-pca2 = get_vector(S, m, svd(A2).U[:, 1], DefaultOrthonormalBasis())
-x1 = ArrayPartition(m, pca2)
-t2 = map(d -> inner(S, m2, pca2, log(S, m2, d)), data2)
-```
-
-    7-element Vector{Float64}:
-      0.8226008307680276
-      0.470952643700004
-      0.7974195537403082
-      0.01533949241264346
-     -0.6546705405852389
-     -0.8913273825362389
-     -0.5775954445730889
-
-then we run again
-
-``` julia
-y2 = gradient_descent(
-    M,
-    RegressionCost(data2, t2),
-    RegressionGradient!(data2, t2),
-    x1;
-    evaluation=InplaceEvaluation(),
-    stepsize=ArmijoLinesearch(
-        M;
-        initial_stepsize=1.0,
-        contraction_factor=0.990,
-        sufficient_decrease=0.05,
-        stop_when_stepsize_less=1e-9,
-    ),
-    stopping_criterion=StopAfterIteration(200) |
-                        StopWhenGradientNormLess(1e-8) |
-                        StopWhenStepsizeLess(1e-9),
-    debug=[:Iteration, " | ", :Cost, "\n", :Stop, 3],
-);
-```
-
-    Initial  | f(x): 0.089844
-    # 3      | f(x): 0.085364
-    # 6      | f(x): 0.085364
-    # 9      | f(x): 0.085364
-    # 12     | f(x): 0.085364
-    # 15     | f(x): 0.085364
-    # 18     | f(x): 0.085364
-    # 21     | f(x): 0.085364
-    # 24     | f(x): 0.085364
-    # 27     | f(x): 0.085364
-    # 30     | f(x): 0.085364
-    # 33     | f(x): 0.085364
-    # 36     | f(x): 0.085364
-    # 39     | f(x): 0.085364
-    # 42     | f(x): 0.085364
-    # 45     | f(x): 0.085364
-    # 48     | f(x): 0.085364
-    # 51     | f(x): 0.085364
-    # 54     | f(x): 0.085364
-    # 57     | f(x): 0.085364
-    # 60     | f(x): 0.085364
-    # 63     | f(x): 0.085364
-    # 66     | f(x): 0.085364
-    # 69     | f(x): 0.085364
-    # 72     | f(x): 0.085364
-    # 75     | f(x): 0.085364
-    # 78     | f(x): 0.085364
-    # 81     | f(x): 0.085364
-    # 84     | f(x): 0.085364
-    # 87     | f(x): 0.085364
-    # 90     | f(x): 0.085364
-    # 93     | f(x): 0.085364
-    # 96     | f(x): 0.085364
-    # 99     | f(x): 0.085364
-    # 102    | f(x): 0.085364
-    # 105    | f(x): 0.085364
-    # 108    | f(x): 0.085364
-    # 111    | f(x): 0.085364
-    # 114    | f(x): 0.085364
-    # 117    | f(x): 0.085364
-    # 120    | f(x): 0.085364
-    # 123    | f(x): 0.085364
-    # 126    | f(x): 0.085364
-    # 129    | f(x): 0.085364
-    # 132    | f(x): 0.085364
-    # 135    | f(x): 0.085364
-    # 138    | f(x): 0.085364
-    # 141    | f(x): 0.085364
-    # 144    | f(x): 0.085364
-    # 147    | f(x): 0.085364
-    # 150    | f(x): 0.085364
-    # 153    | f(x): 0.085364
-    # 156    | f(x): 0.085364
-    # 159    | f(x): 0.085364
-    # 162    | f(x): 0.085364
-    # 165    | f(x): 0.085364
-    # 168    | f(x): 0.085364
-    # 171    | f(x): 0.085364
-    # 174    | f(x): 0.085364
-    # 177    | f(x): 0.085364
-    # 180    | f(x): 0.085364
-    # 183    | f(x): 0.085364
-    # 186    | f(x): 0.085364
-    # 189    | f(x): 0.085364
-    # 192    | f(x): 0.085364
-    # 195    | f(x): 0.085364
-    # 198    | f(x): 0.085364
-    The algorithm reached its maximal number of iterations (200).
-
-For plotting we again generate all data
-
-``` julia
-geo2 = geodesic(S, y2[M, :point], y2[M, :vector], dense_t)
-init_geo2 = geodesic(S, x1[M, :point], x1[M, :vector], dense_t)
-geo_pts2 = geodesic(S, y2[M, :point], y2[M, :vector], t2)
-geo_conn_highlighted2 = shortest_geodesic(
-    S, data2[highlighted], geo_pts2[highlighted], 0.5 .+ dense_t
-);
-```
-
-![A second result with different time points](img/regression/regression_result2.png)
-
-## Unlabeled Data
-
-If we are not given time points $t_i$, then the optimization problem extends, informally speaking,
-to also finding the “best fitting” (in the sense of smallest error).
-To formalize, the objective function here reads
-
-``` math
-F(p, X, t) = \frac{1}{2}\sum_{i=1}^n d_{\mathcal M}^2(γ_{p,X}(t_i), d_i),
-```
-
-where $t = (t_1,\ldots,t_n) \in \mathbb R^n$ is now an additional parameter of the objective function.
-We write $F_1(p, X)$ to refer to the function on the tangent bundle for fixed values of $t$ (as the one in the last part)
-and $F_2(t)$ for the function $F(p, X, t)$ as a function in $t$ with fixed values $(p, X)$.
-
-For the Euclidean case, there is no necessity to optimize with respect to $t$, as we saw
-above for the initialization of the fixed time points.
-
-On a Riemannian manifold this can be stated as a problem on the product manifold $\mathcal N = \mathrm{T}\mathcal M \times \mathbb R^n$, i.e.
-
-``` julia
-N = M × Euclidean(length(t2))
-```
-
-    ProductManifold with 2 submanifolds:
-     TangentBundle(Sphere(2, ℝ))
-     Euclidean(7; field=ℝ)
-
-``` math
-  \operatorname*{arg\,min}_{\bigl((p,X),t\bigr)\in\mathcal N} F(p, X, t).
-```
-
-In this tutorial we present an approach to solve this using an alternating gradient descent scheme.
-To be precise, we define the cost function now on the product manifold
-
-``` julia
-struct RegressionCost2{T}
-    data::T
-end
-RegressionCost2(data::T) where {T} = RegressionCost2{T}(data)
-function (a::RegressionCost2)(N, x)
-    TM = N[1]
-    pts = [
-        geodesic(TM.manifold, x[N, 1][TM, :point], x[N, 1][TM, :vector], ti) for
-        ti in x[N, 2]
-    ]
-    return 1 / 2 * sum(distance.(Ref(TM.manifold), pts, a.data) .^ 2)
-end
-```
-
-The gradient in two parts, namely (a) the same gradient as before w.r.t. $(p,X) ∈ T\mathcal M$,
-just now with a fixed `t` in mind for the second component of the product manifold $\mathcal N$
-
-``` julia
-struct RegressionGradient2a!{T}
-    data::T
-end
-RegressionGradient2a!(data::T) where {T} = RegressionGradient2a!{T}(data)
-function (a::RegressionGradient2a!)(N, Y, x)
-    TM = N[1]
-    p = x[N, 1]
-    pts = [geodesic(TM.manifold, p[TM, :point], p[TM, :vector], ti) for ti in x[N, 2]]
-    gradients = Manopt.grad_distance.(Ref(TM.manifold), a.data, pts)
-    Y[TM, :point] .= sum(
-        ManifoldDiff.adjoint_differential_exp_basepoint.(
-            Ref(TM.manifold),
-            Ref(p[TM, :point]),
-            [ti * p[TM, :vector] for ti in x[N, 2]],
-            gradients,
-        ),
-    )
-    Y[TM, :vector] .= sum(
-        ManifoldDiff.adjoint_differential_exp_argument.(
-            Ref(TM.manifold),
-            Ref(p[TM, :point]),
-            [ti * p[TM, :vector] for ti in x[N, 2]],
-            gradients,
-        ),
-    )
-    return Y
-end
-```
-
-Finally, we additionally look for a fixed point $x=(p,X) ∈ \mathrm{T}\mathcal M$ at
-the gradient with respect to $t∈\mathbb R^n$, the second component, which is given by
-
-``` math
-  (\operatorname{grad}F_2(t))_i
-  = - ⟨\dot γ_{p,X}(t_i), \log_{γ_{p,X}(t_i)}d_i⟩_{γ_{p,X}(t_i)}, i = 1, \ldots, n.
-```
-
-``` julia
-struct RegressionGradient2b!{T}
-    data::T
-end
-RegressionGradient2b!(data::T) where {T} = RegressionGradient2b!{T}(data)
-function (a::RegressionGradient2b!)(N, Y, x)
-    TM = N[1]
-    p = x[N, 1]
-    pts = [geodesic(TM.manifold, p[TM, :point], p[TM, :vector], ti) for ti in x[N, 2]]
-    logs = log.(Ref(TM.manifold), pts, a.data)
-    pt = map(
-        d -> vector_transport_to(TM.manifold, p[TM, :point], p[TM, :vector], d), pts
-    )
-    Y .= -inner.(Ref(TM.manifold), pts, logs, pt)
-    return Y
-end
-```
-
-We can reuse the computed initial values from before, just that now we are on a product manifold
-
-``` julia
-x2 = ArrayPartition(x1, t2)
-F3 = RegressionCost2(data2)
-gradF3_vector = [RegressionGradient2a!(data2), RegressionGradient2b!(data2)];
-```
-
-and we run the algorithm
-
-``` julia
-y3 = alternating_gradient_descent(
-    N,
-    F3,
-    gradF3_vector,
-    x2;
-    evaluation=InplaceEvaluation(),
-    debug=[:Iteration, " | ", :Cost, "\n", :Stop, 50],
-    stepsize=ArmijoLinesearch(
-        M;
-        contraction_factor=0.999,
-        sufficient_decrease=0.066,
-        stop_when_stepsize_less=1e-11,
-        retraction_method=ProductRetraction(SasakiRetraction(2), ExponentialRetraction()),
-    ),
-    inner_iterations=1,
-)
-```
-
-    Initial  | f(x): 0.089844
-    # 50     | f(x): 0.091097
-    # 100    | f(x): 0.091097
-    The algorithm reached its maximal number of iterations (100).
-
-    (ArrayPartition{Float64, Tuple{Vector{Float64}, Vector{Float64}}}(([0.750222090700214, 0.031464227399200885, 0.6604368380243274], [0.6636489079535082, -0.3497538263293046, -0.737208025444054])), [0.7965909273713889, 0.43402264218923514, 0.755822122896529, 0.001059348203453764, -0.6421135044471217, -0.8635572995105818, -0.5546338813212247])
-
-which we render can collect into an image creating the geodesics again
-
-``` julia
-geo3 = geodesic(S, y3[N, 1][M, :point], y3[N, 1][M, :vector], dense_t)
-init_geo3 = geodesic(S, x1[M, :point], x1[M, :vector], dense_t)
-geo_pts3 = geodesic(S, y3[N, 1][M, :point], y3[N, 1][M, :vector], y3[N, 2])
-t3 = y3[N, 2]
-geo_conns = shortest_geodesic.(Ref(S), data2, geo_pts3, Ref(0.5 .+ 4*dense_t));
-```
-
-which yields
-
-![The third result](img/regression/regression_result3.png)
-
-Note that the geodesics from the data to the regression geodesic meet at a nearly orthogonal angle.
-
-**Acknowledgement.** Parts of this tutorial are based on the bachelor thesis of
-[Jeremias Arf](https://orcid.org/0000-0003-3765-0130).
-
-## Literature
-
-```@bibliography
-Pages = ["GeodesicRegression.md"]
-Canonical=false
-```
-
diff --git a/src/documentation_glossary.jl b/src/documentation_glossary.jl
index 5cf07ee491..e2f88d9483 100644
--- a/src/documentation_glossary.jl
+++ b/src/documentation_glossary.jl
@@ -99,7 +99,7 @@ _tex(args...; kwargs...) = glossary(:LaTeX, args...; kwargs...)
 define!(:Math, :distance, raw"\mathrm{d}")
 define!(:Math, :M, (; M="M") -> _math(:Manifold, :symbol; M=M))
 define!(:Math, :Manifold, :symbol, (; M="M") -> _tex(:Cal, M))
-define!(:Math, :Manifold, :descrption, "the Riemannian manifold")
+define!(:Math, :Manifold, :description, "the Riemannian manifold")
 define!(:Math, :Iterate, (; p="p", k="k") -> "$(p)^{($(k))}")
 define!(
     :Math,
diff --git a/tutorials/GeodesicRegression.qmd b/tutorials/GeodesicRegression.qmd
deleted file mode 100644
index 4b410eef04..0000000000
--- a/tutorials/GeodesicRegression.qmd
+++ /dev/null
@@ -1,531 +0,0 @@
----
-title: "How to perform geodesic regression"
-author: "Ronny Bergmann"
----
-
-Geodesic regression generalizes [linear regression](https://en.wikipedia.org/wiki/Linear_regression)
-to Riemannian manifolds. Let's first phrase it informally as follows:
-
-> For given data points $d_1,\ldots,d_n$ on a Riemannian manifold $\mathcal M$, find
-> the geodesic that “best explains” the data.
-
-The meaning of “best explain” has still to be clarified. We distinguish two cases: time labelled data and unlabelled data
-
-```{julia}
-#| echo: false
-#| code-fold: true
-#| output: false
-using Pkg;
-cd(@__DIR__)
-Pkg.activate("."); # for reproducibility use the local tutorial environment.
-img_folder = "img/regression";
-mkpath(img_folder)
-```
-```{julia}
-using Manopt, ManifoldDiff, Manifolds, Random, Colors, RecursiveArrayTools
-using LinearAlgebra: svd
-using ManifoldDiff: grad_distance
-Random.seed!(42);```{julia}
-#| echo: false
-#| code-fold: true
-#| output: false
-# define the colors for the plots
-black = RGBA{Float64}(colorant"#000000")
-TolVibrantOrange = RGBA{Float64}(colorant"#EE7733")
-TolVibrantBlue = RGBA{Float64}(colorant"#0077BB")
-TolVibrantTeal = RGBA{Float64}(colorant"#009988")
-TolVibrantMagenta = RGBA{Float64}(colorant"#EE3377")
-TolVibrantCyan = RGBA{Float64}(colorant"#33BBEE");
-render_size = 0
-```
-
-We use the following data, where we want to highlight one of the points.
-
-```{julia}
-n = 7
-σ = π / 8
-S = Sphere(2)
-base = 1 / sqrt(2) * [1.0, 0.0, 1.0]
-dir = [-0.75, 0.5, 0.75]
-data_orig = [exp(S, base, dir, t) for t in range(-0.5, 0.5; length=n)]
-# add noise to the points on the geodesic
-data = map(p -> exp(S, p, rand(S; vector_at=p, σ=σ)), data_orig)
-highlighted = 4;
-```
-```{julia}
-#| echo: false
-#| code-fold: true
-#| output: false
-# for the plots we choose one point to hightlight.
-(highlighted > n - 1) && error(
-    "Please choose a highlighted point from {1,...,$(n-1)} – you set it to 		highlighted.",
-)
-asymptote_export_S2_signals(
-    img_folder * "/regression_data.asy";
-    points=[data],
-    colors=Dict(:points => [TolVibrantBlue]),
-    dot_size=9.0,
-    camera_position=(1.0, 0.5, 0.5),
-    size=(600,0),
-);
-render_asymptote(img_folder * "/regression_data.asy"; render=render_size);
-```
-
-![The given data](img/regression/regression_data.png)
-
-## Time labeled data
-
-If for each data item $d_i$ we are also given a time point $t_i∈ℝ$, which are pairwise different,
-then we can use the least squares error to state the objective function as [Fletcher:2013](@cite)
-
-```math
-F(p,X) = \frac{1}{2}\sum_{i=1}^n d_{\mathcal M}^2(γ_{p,X}(t_i), d_i),
-```
-
-where $d_{\mathcal M}$ is the Riemannian distance and $γ_{p,X}$ is the geodesic
-with $γ(0) = p$ and $\dot\gamma(0) = X$.
-
-For the real-valued case $\mathcal M = ℝ^m$ the solution $(p^*, X^*)$ is given in closed form
-as follows: with $d^* = \frac{1}{n}\displaystyle\sum_{i=1}^{n}d_i$ and $t^* = \frac{1}{n}\displaystyle\sum_{i=1}^n t_i$
-we get
-
-```math
- X^* = \frac{\sum_{i=1}^n (d_i-d^*)(t-t^*)}{\sum_{i=1}^n (t_i-t^*)^2}
-\quad\text{ and }\quad
-p^* = d^* - t^*X^*
-```
-
-and hence the linear regression result is the line $γ_{p^*,X^*}(t) = p^* + tX^*$.
-
-On a Riemannian manifold we can phrase this as an optimization problem on the [tangent bundle](https://en.wikipedia.org/wiki/Tangent_bundle), which is
-the disjoint union of all tangent spaces, as
-
-```math
-\operatorname*{arg\,min}_{(p,X) ∈ \mathrm{T}\mathcal M} F(p,X)
-```
-
-Due to linearity, the gradient of $F(p,X)$ is the sum of the single gradients of
-
-```math
- \frac{1}{2}d_{\mathcal M}^2\bigl(γ_{p,X}(t_i),d_i\bigr)
- = \frac{1}{2}d_{\mathcal M}^2\bigl(\exp_p(t_iX),d_i\bigr)
- ,\quad i∈\{1,\ldots,n\}
-```
-
-which can be computed using a chain rule of the squared distance and the exponential map,
-see for example [BergmannGousenbourger:2018](@cite) for details or Equations (7) and (8) of [Fletcher:2013](@cite):
-
-```{julia}
-M = TangentBundle(S)
-struct RegressionCost{T,S}
-    data::T
-    times::S
-end
-RegressionCost(data::T, times::S) where {T,S} = RegressionCost{T,S}(data, times)
-function (a::RegressionCost)(M, x)
-    pts = [geodesic(M.manifold, x[M, :point], x[M, :vector], ti) for ti in a.times]
-    return 1 / 2 * sum(distance.(Ref(M.manifold), pts, a.data) .^ 2)
-end
-struct RegressionGradient!{T,S}
-    data::T
-    times::S
-end
-function RegressionGradient!(data::T, times::S) where {T,S}
-    return RegressionGradient!{T,S}(data, times)
-end
-function (a::RegressionGradient!)(M, Y, x)
-    pts = [geodesic(M.manifold, x[M, :point], x[M, :vector], ti) for ti in a.times]
-    gradients = grad_distance.(Ref(M.manifold), a.data, pts)
-    Y[M, :point] .= sum(
-        ManifoldDiff.adjoint_differential_exp_basepoint.(
-            Ref(M.manifold),
-            Ref(x[M, :point]),
-            [ti * x[M, :vector] for ti in a.times],
-            gradients,
-        ),
-    )
-    Y[M, :vector] .= sum(
-        ManifoldDiff.adjoint_differential_exp_argument.(
-            Ref(M.manifold),
-            Ref(x[M, :point]),
-            [ti * x[M, :vector] for ti in a.times],
-            gradients,
-        ),
-    )
-    return Y
-end
-```
-
-For the Euclidean case, the result is given by the first principal component of a principal component analysis,
-see [PCR](https://en.wikipedia.org/wiki/Principal_component_regression) which is given by $p^* = \frac{1}{n}\displaystyle\sum_{i=1}^n d_i$ and
-the direction $X^*$ is obtained by defining the zero mean data matrix
-
-```math
-D = \bigl(d_1-p^*, \ldots, d_n-p^*\bigr) ∈ ℝ^{m,n}
-```
-
-and taking $X^*$ as an eigenvector to the largest eigenvalue of $D^{\mathrm{T}}D$.
-
-We can do something similar, when considering the tangent space at the (Riemannian) mean
-of the data and then do a PCA on the coordinate coefficients with respect to a basis.
-
-
-```{julia}
-m = mean(S, data)
-A = hcat(
-    map(x -> get_coordinates(S, m, log(S, m, x), DefaultOrthonormalBasis()), data)...
-)
-pca1 = get_vector(S, m, svd(A).U[:, 1], DefaultOrthonormalBasis())
-x0 = ArrayPartition(m, pca1)
-```
-
-The optimal “time labels” are then just the projections $t_i$ $= ⟨d_i,X^*⟩$, $i=1,\ldots,n$.
-
-```{julia}
-t = map(d -> inner(S, m, pca1, log(S, m, d)), data)
-```
-
-And we can call the gradient descent. Note that since `gradF!` works in place of `Y`, we have to set the
-`evaluation` type accordingly.
-
-```{julia}
-y = gradient_descent(
-    M,
-    RegressionCost(data, t),
-    RegressionGradient!(data, t),
-    x0;
-    evaluation=InplaceEvaluation(),
-    stepsize=ArmijoLinesearch(;
-        initial_stepsize=1.0,
-        contraction_factor=0.990,
-        sufficient_decrease=0.05,
-        stop_when_stepsize_less=1e-9,
-    ),
-    stopping_criterion=StopAfterIteration(200) |
-                        StopWhenGradientNormLess(1e-8) |
-                        StopWhenStepsizeLess(1e-9),
-    debug=[:Iteration, " | ", :Cost, "\n", :Stop, 50],
-)
-```
-
-For the result, we can generate and plot all involved geodesics
-```{julia}
-dense_t = range(-0.5, 0.5; length=100)
-geo = geodesic(S, y[M, :point], y[M, :vector], dense_t)
-init_geo = geodesic(S, x0[M, :point], x0[M, :vector], dense_t)
-geo_pts = geodesic(S, y[M, :point], y[M, :vector], t)
-geo_conn_highlighted = shortest_geodesic(
-    S, data[highlighted], geo_pts[highlighted], 0.5 .+ dense_t
-);
-```
-
-```{julia}
-#| echo: false
-#| code-fold: true
-#| output: false
-asymptote_export_S2_signals(
-    img_folder * "/regression_result1.asy";
-    points=[data, [y[M, :point]], geo_pts],
-    curves=[init_geo, geo, geo_conn_highlighted],
-    tangent_vectors=[[Tuple([y[M, :point], y[M, :vector]])]],
-    colors=Dict(
-        :curves => [black, TolVibrantTeal, TolVibrantBlue],
-        :points => [TolVibrantBlue, TolVibrantOrange, TolVibrantTeal],
-        :tvectors => [TolVibrantOrange],
-    ),
-    dot_sizes=[9.0, 9.0, 6.0],
-    line_widths=[1.25, 1.5, 1.25, 1.5],
-    camera_position=(1.0, 0.5, 0.5),
-    size=(600,0),
-);
-render_asymptote(img_folder * "/regression_result1.asy"; render=render_size);
-```
-
-![Result of Geodesic Regression](img/regression/regression_result1.png)
-
-In this image, together with the blue data points, you see the geodesic of the initialization in black
-(evaluated on $[-\frac{1}{2},\frac{1}{2}]$),
-the final point on the tangent bundle in orange, as well as the resulting regression geodesic in teal,
-(on the same interval as the start) as well as small teal points indicating the time points on the geodesic corresponding to the data.
-Additionally, a thin blue line indicates the geodesic between a data point and its corresponding data point on the geodesic.
-While this would be the closest point in Euclidean space and hence the two directions (along the geodesic vs. to the data point) orthogonal, here we have
-
-```{julia}
-inner(
-    S,
-    geo_pts[highlighted],
-    log(S, geo_pts[highlighted], geo_pts[highlighted + 1]),
-    log(S, geo_pts[highlighted], data[highlighted]),
-)
-```
-
-But we also started with one of the best scenarios of equally spaced points on a geodesic obstructed by noise.
-
-This gets worse if you start with less evenly distributed data
-
-```{julia}
-data2 = [exp(S, base, dir, t) for t in [-0.5, -0.49, -0.48, 0.1, 0.48, 0.49, 0.5]]
-data2 = map(p -> exp(S, p, rand(S; vector_at=p, σ=σ / 2)), data2)
-m2 = mean(S, data2)
-A2 = hcat(
-    map(x -> get_coordinates(S, m, log(S, m, x), DefaultOrthonormalBasis()), data2)...
-)
-pca2 = get_vector(S, m, svd(A2).U[:, 1], DefaultOrthonormalBasis())
-x1 = ArrayPartition(m, pca2)
-t2 = map(d -> inner(S, m2, pca2, log(S, m2, d)), data2)
-```
-
-then we run again
-
-```{julia}
-y2 = gradient_descent(
-    M,
-    RegressionCost(data2, t2),
-    RegressionGradient!(data2, t2),
-    x1;
-    evaluation=InplaceEvaluation(),
-    stepsize=ArmijoLinesearch(;
-        initial_stepsize=1.0,
-        contraction_factor=0.990,
-        sufficient_decrease=0.05,
-        stop_when_stepsize_less=1e-9,
-    ),
-    stopping_criterion=StopAfterIteration(200) |
-                        StopWhenGradientNormLess(1e-8) |
-                        StopWhenStepsizeLess(1e-9),
-    debug=[:Iteration, " | ", :Cost, "\n", :Stop, 3],
-);
-```
-
-For plotting we again generate all data
-
-```{julia}
-geo2 = geodesic(S, y2[M, :point], y2[M, :vector], dense_t)
-init_geo2 = geodesic(S, x1[M, :point], x1[M, :vector], dense_t)
-geo_pts2 = geodesic(S, y2[M, :point], y2[M, :vector], t2)
-geo_conn_highlighted2 = shortest_geodesic(
-    S, data2[highlighted], geo_pts2[highlighted], 0.5 .+ dense_t
-);
-```
-```{julia}
-#| echo: false
-#| code-fold: true
-#| output: false
-asymptote_export_S2_signals(
-    img_folder * "/regression_result2.asy";
-    points=[data2, [y2[M, :point]], geo_pts2],
-    curves=[init_geo2, geo2, geo_conn_highlighted2],
-    tangent_vectors=[[Tuple([y2[M, :point], y2[M, :vector]])]],
-    colors=Dict(
-        :curves => [black, TolVibrantTeal, TolVibrantCyan],
-        :points => [TolVibrantBlue, TolVibrantOrange, TolVibrantTeal],
-        :tvectors => [TolVibrantOrange],
-    ),
-    dot_sizes=[9.0, 9.0, 6.0],
-    line_widths=[1.25, 1.5, 1.25, 1.5],
-    camera_position=(1.0, 0.5, 0.5),
-    size=(600, 0),
-);
-render_asymptote(img_folder * "/regression_result2.asy"; render=render_size);
-```
-
-![A second result with different time points](img/regression/regression_result2.png)
-
-## Unlabeled data
-
-If we are not given time points $t_i$, then the optimization problem extends, informally speaking,
-to also finding the “best fitting” (in the sense of smallest error).
-To formalize, the objective function here reads
-
-
-```math
-F(p, X, t) = \frac{1}{2}\sum_{i=1}^n d_{\mathcal M}^2(γ_{p,X}(t_i), d_i),
-```
-
-where $t = (t_1,\ldots,t_n) ∈ ℝ^n$ is now an additional parameter of the objective function.
-We write $F_1(p, X)$ to refer to the function on the tangent bundle for fixed values of $t$ (as the one in the last part)
-and $F_2(t)$ for the function $F(p, X, t)$ as a function in $t$ with fixed values $(p, X)$.
-
-For the Euclidean case, there is no necessity to optimize with respect to $t$, as we saw before for the initialization of the fixed time points.
-
-On a Riemannian manifold this can be stated as a problem on the product manifold $\mathcal N = \mathrm{T}\mathcal M \times ℝ^n$, or in code
-
-```{julia}
-N = M × Euclidean(length(t2))
-```
-
-```math
-  \operatorname*{arg\,min}_{\bigl((p,X),t\bigr)∈\mathcal N} F(p, X, t).
-```
-
-In this tutorial we present an approach to solve this using an alternating gradient descent scheme.
-To be precise, we define the cost function now on the product manifold
-
-```{julia}
-struct RegressionCost2{T}
-    data::T
-end
-RegressionCost2(data::T) where {T} = RegressionCost2{T}(data)
-function (a::RegressionCost2)(N, x)
-    TM = N[1]
-    pts = [
-        geodesic(TM.manifold, x[N, 1][TM, :point], x[N, 1][TM, :vector], ti) for
-        ti in x[N, 2]
-    ]
-    return 1 / 2 * sum(distance.(Ref(TM.manifold), pts, a.data) .^ 2)
-end
-```
-
-The gradient in two parts, namely (a) the same gradient as before w.r.t. $(p,X) ∈ T\mathcal M$,
-just now with a fixed `t` in mind for the second component of the product manifold $\mathcal N$
-
-
-```{julia}
-struct RegressionGradient2a!{T}
-    data::T
-end
-RegressionGradient2a!(data::T) where {T} = RegressionGradient2a!{T}(data)
-function (a::RegressionGradient2a!)(N, Y, x)
-    TM = N[1]
-    p = x[N, 1]
-    pts = [geodesic(TM.manifold, p[TM, :point], p[TM, :vector], ti) for ti in x[N, 2]]
-    gradients = grad_distance.(Ref(TM.manifold), a.data, pts)
-    Y[TM, :point] .= sum(
-        ManifoldDiff.adjoint_differential_exp_basepoint.(
-            Ref(TM.manifold),
-            Ref(p[TM, :point]),
-            [ti * p[TM, :vector] for ti in x[N, 2]],
-            gradients,
-        ),
-    )
-    Y[TM, :vector] .= sum(
-        ManifoldDiff.adjoint_differential_exp_argument.(
-            Ref(TM.manifold),
-            Ref(p[TM, :point]),
-            [ti * p[TM, :vector] for ti in x[N, 2]],
-            gradients,
-        ),
-    )
-    return Y
-end
-```
-
-Finally, we additionally look for a fixed point $x=(p,X) ∈ \mathrm{T}\mathcal M$ at
-the gradient with respect to $t∈ℝ^n$, the second component, which is given by
-
-```math
-  (\operatorname{grad}F_2(t))_i
-  = - ⟨\dot γ_{p,X}(t_i), \log_{γ_{p,X}(t_i)}d_i⟩_{γ_{p,X}(t_i)}, i = 1, \ldots, n.
-```
-
-```{julia}
-struct RegressionGradient2b!{T}
-    data::T
-end
-RegressionGradient2b!(data::T) where {T} = RegressionGradient2b!{T}(data)
-function (a::RegressionGradient2b!)(N, Y, x)
-    TM = N[1]
-    p = x[N, 1]
-    pts = [geodesic(TM.manifold, p[TM, :point], p[TM, :vector], ti) for ti in x[N, 2]]
-    logs = log.(Ref(TM.manifold), pts, a.data)
-    pt = map(
-        d -> vector_transport_to(TM.manifold, p[TM, :point], p[TM, :vector], d), pts
-    )
-    Y .= -inner.(Ref(TM.manifold), pts, logs, pt)
-    return Y
-end
-```
-
-We can reuse the computed initial values from before, just that now we are on a product manifold
-
-```{julia}
-x2 = ArrayPartition(x1, t2)
-F3 = RegressionCost2(data2)
-gradF3_vector = [RegressionGradient2a!(data2), RegressionGradient2b!(data2)];
-```
-
-and we run the algorithm
-
-```{julia}
-y3 = alternating_gradient_descent(
-    N,
-    F3,
-    gradF3_vector,
-    x2;
-    evaluation=InplaceEvaluation(),
-    debug=[:Iteration, " | ", :Cost, "\n", :Stop, 50],
-    stepsize=ArmijoLinesearch(;
-        contraction_factor=0.999,
-        sufficient_decrease=0.066,
-        stop_when_stepsize_less=1e-11,
-        retraction_method=ProductRetraction(SasakiRetraction(2), ExponentialRetraction()),
-    ),
-    inner_iterations=1,
-)
-```
-
-which we render can collect into an image creating the geodesics again
-
-```{julia}
-geo3 = geodesic(S, y3[N, 1][M, :point], y3[N, 1][M, :vector], dense_t)
-init_geo3 = geodesic(S, x1[M, :point], x1[M, :vector], dense_t)
-geo_pts3 = geodesic(S, y3[N, 1][M, :point], y3[N, 1][M, :vector], y3[N, 2])
-t3 = y3[N, 2]
-geo_conns = shortest_geodesic.(Ref(S), data2, geo_pts3, Ref(0.5 .+ 4*dense_t));
-```
-
-which yields
-
-```{julia}
-#| echo: false
-#| code-fold: true
-#| output: false
-asymptote_export_S2_signals(
-    img_folder * "/regression_result3.asy";
-    points=[data2, [y3[N, 1][M, :point]], geo_pts3],
-    curves=[init_geo3, geo3, geo_conns...],
-    tangent_vectors=[[Tuple([y3[N, 1][M, :point], y3[N, 1][M, :vector]])]],
-    colors=Dict(
-        :curves =>
-            [black, TolVibrantTeal, repeat([TolVibrantCyan], length(geo_conns))...],
-        :points => [TolVibrantBlue, TolVibrantOrange, TolVibrantTeal],
-        :tvectors => [TolVibrantOrange],
-    ),
-    dot_sizes=[9.0, 6.0, 9.0],
-    line_widths=[1.25, 1.5, repeat([1.25], length(geo_conns))..., 1.5],
-    camera_position=(1.0, 0.5, 0.5),
-    size=(600, 0),
-);
-render_asymptote(img_folder * "/regression_result3.asy"; render=render_size);
-```
-
-![The third result](img/regression/regression_result3.png)
-
-Note that the geodesics from the data to the regression geodesic meet at a nearly orthogonal angle.
-
-**Acknowledgement.** Parts of this tutorial are based on the bachelor thesis of
-[Jeremias Arf](https://orcid.org/0000-0003-3765-0130).
-
-## Technical details
-
-This tutorial is cached. It was last run on the following package versions.
-
-```{julia}
-#| code-fold: true
-using Pkg
-Pkg.status()
-```
-```{julia}
-#| code-fold: true
-using Dates
-now()
-```
-
-## Literature
-
-````{=commonmark}
-```@bibliography
-Pages = ["GeodesicRegression.md"]
-Canonical=false
-```
-````