From 5834003b8f44b95bdc2cecbf98abd16ea6f3770e Mon Sep 17 00:00:00 2001 From: Ronny Bergmann Date: Sat, 5 Oct 2024 17:08:17 +0200 Subject: [PATCH 01/34] For today just a bit of sketching of ideas and modelling. --- Changelog.md | 10 ++++++++ src/documentation_glossary.jl | 31 ++++++++++++++++++++--- src/plans/nonlinear_least_squares_plan.jl | 5 ++-- src/plans/vectorial_plan.jl | 1 - src/solvers/LevenbergMarquardt.jl | 12 ++++----- 5 files changed, 46 insertions(+), 13 deletions(-) diff --git a/Changelog.md b/Changelog.md index f149948a6c..4be564945b 100644 --- a/Changelog.md +++ b/Changelog.md @@ -5,6 +5,16 @@ All notable Changes to the Julia package `Manopt.jl` will be documented in this The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.5.3] - unreleased + +### Added + +* (planned) a robust variant of Levenberg Marquard + +### Changed + +* (planned) Levenberg-Marquardt now internally uses a `VectorGradientFunction` and hence also accepts a “gradient matrix” and a vector of gradient functions instead of a Jacobian (in a basis of the tangent vectors). + ## [0.5.2] – October 5, 2024 ### Added diff --git a/src/documentation_glossary.jl b/src/documentation_glossary.jl index 55b9aa4427..da42da0fb9 100644 --- a/src/documentation_glossary.jl +++ b/src/documentation_glossary.jl @@ -50,6 +50,7 @@ end # --- # LaTeX +define!(:LaTeX, :abs, (v) -> raw"\lvert " * "$v" * raw" \rvert") define!(:LaTeX, :argmin, raw"\operatorname{arg\,min}") define!(:LaTeX, :ast, raw"\ast") define!(:LaTeX, :bar, (letter) -> raw"\bar" * "$(letter)") @@ -86,6 +87,7 @@ define!(:LaTeX, :quad, raw"\quad") define!(:LaTeX, :reflect, raw"\operatorname{refl}") define!(:LaTeX, :retr, raw"\operatorname{retr}") define!(:LaTeX, :subgrad, raw"∂") +define!(:LaTeX, :sum, raw"\sum") define!(:LaTeX, :text, (letter) -> raw"\text{" * "$letter" * "}") define!(:LaTeX, :vert, raw"\vert") define!(:LaTeX, :widehat, (letter) -> raw"\widehat{" * "$letter" * "}") @@ -98,6 +100,7 @@ _tex(args...; kwargs...) = glossary(:LaTeX, args...; kwargs...) define!(:Math, :M, (; M="M") -> _math(:Manifold, :symbol; M=M)) define!(:Math, :Manifold, :symbol, (; M="M") -> _tex(:Cal, M)) define!(:Math, :Manifold, :descrption, "the Riemannian manifold") +define!(:Math, :M, (; M="M") -> _math(:Manifold, :symbol; M=M)) define!(:Math, :Iterate, (; p="p", k="k") -> "$(p)^{($(k))}") define!( :Math, @@ -219,13 +222,15 @@ define!( # # # Problems +_problem(args...; kwargs...) = glossary(:Problem, args...; kwargs...) + define!( :Problem, :Constrained, (; M="M", p="p") -> """ ```math \\begin{aligned} -\\min_{$p ∈ $(_tex(:Cal, M))} & f($p)\\\\ +$(_tex(:argmin))_{$p ∈ $(_math(:M; M=M))} & f($p)\\\\ $(_tex(:text, "subject to"))$(_tex(:quad))&g_i($p) ≤ 0 \\quad $(_tex(:text, " for ")) i= 1, …, m,\\\\ \\quad & h_j($p)=0 \\quad $(_tex(:text, " for ")) j=1,…,n, \\end{aligned} @@ -235,9 +240,29 @@ $(_tex(:text, "subject to"))$(_tex(:quad))&g_i($p) ≤ 0 \\quad $(_tex(:text, " define!( :Problem, :Default, - (; M="M", p="p") -> "\n```math\n$(_tex(:argmin))_{$p ∈ $(_tex(:Cal, M))} f($p)\n```\n", + (; M="M", p="p") -> """ +```math +$(_tex(:argmin))_{$p ∈ $(_math(:M; M=M))} f($p) +``` +""", ) -_problem(args...; kwargs...) = glossary(:Problem, args...; kwargs...) +define!( + :Problem, + :NonLinearLeastSquares, + (; M="M", p="p") -> """ +```math +$(_tex(:argmin))_{$p ∈ $(_math(:M; M=M))} $(_tex(:frac,1,2))$(_tex(:sum))_{i=1}^n ρ_i$(_tex(:bigl))( $(_tex(:norm, "f_i($p)"))^2 $(_tex(:bigr))) +``` + +where ``f: $(_math(:M; M=M)) → ℝ^n`` is written with component functions ``f_i: $(_math(:M; M=M)) \to ℝ``, +and each component function is continuously differentiable. +The functions ``ρ_i: ℝ → ℝ`` can be seen as regularisers of the single least squares terms +and are twice continuously differentiable. + +For the case ``ρ_i(x) = x`` this yields the Nonlinear Least Squares objective +""", +) + # # # Stopping Criteria diff --git a/src/plans/nonlinear_least_squares_plan.jl b/src/plans/nonlinear_least_squares_plan.jl index 007353d529..7f27bdd936 100644 --- a/src/plans/nonlinear_least_squares_plan.jl +++ b/src/plans/nonlinear_least_squares_plan.jl @@ -2,8 +2,9 @@ @doc """ NonlinearLeastSquaresObjective{T<:AbstractEvaluationType} <: AbstractManifoldObjective{T} -A type for nonlinear least squares problems. -`T` is a [`AbstractEvaluationType`](@ref) for the `F` and Jacobian functions. +An objective to model the nonlinear least squares problem + +$(_problem(:NonLinearLeastSquares)) Specify a nonlinear least squares problem diff --git a/src/plans/vectorial_plan.jl b/src/plans/vectorial_plan.jl index 21f2eb8f10..49df87614b 100644 --- a/src/plans/vectorial_plan.jl +++ b/src/plans/vectorial_plan.jl @@ -15,7 +15,6 @@ given as ``g: \mathcal M → ℝ^m`` with respect to a basis ``\mathcal B`` of ` This can be written as ``J_g(p) = (c_1^{\mathrm{T}},…,c_m^{\mathrm{T}})^{\mathrm{T}} \in ℝ^{m,d}``, that is, every row ``c_i`` of this matrix is a set of coefficients such that `get_coefficients(M, p, c, B)` is the tangent vector ``\oepratorname{grad} g_i(p)`` - for example ``g_i(p) ∈ ℝ^m`` or ``\operatorname{grad} g_i(p) ∈ T_p\mathcal M``, ``i=1,…,m``. diff --git a/src/solvers/LevenbergMarquardt.jl b/src/solvers/LevenbergMarquardt.jl index a4af0cbfa4..7ea82c11c7 100644 --- a/src/solvers/LevenbergMarquardt.jl +++ b/src/solvers/LevenbergMarquardt.jl @@ -7,20 +7,18 @@ _doc_LM = """ LevenbergMarquardt(M, f, jacobian_f, p, num_components=-1) LevenbergMarquardt!(M, f, jacobian_f, p, num_components=-1; kwargs...) -Solve an optimization problem of the form +compute the the Riemannian Levenberg-Marquardt algorithm [Peeters:1993, AdachiOkunoTakeda:2022](@cite) +to solve -$(_doc_LM_formula) +$(_problem(:NonLinearLeastSquares)) -where ``f: $(_math(:M)) → ℝ^d`` is a continuously differentiable function, -using the Riemannian Levenberg-Marquardt algorithm [Peeters:1993](@cite). -The implementation follows Algorithm 1 [AdachiOkunoTakeda:2022](@cite). The second signature performs the optimization in-place of `p`. # Input $(_var(:Argument, :M; type=true)) -* `f`: a cost function ``f: $(_math(:M))→ℝ^d`` -* `jacobian_f`: the Jacobian of ``f``. The Jacobian is supposed to accept a keyword argument +* `f`: a cost function ``f: $(_math(:M))→ℝ^d`` +* `grad_f`: the Jacobian of ``f``. The Jacobian is supposed to accept a keyword argument `basis_domain` which specifies basis of the tangent space at a given point in which the Jacobian is to be calculated. By default it should be the `DefaultOrthonormalBasis`. $(_var(:Argument, :p)) From 6fe69a8ab8e93438670ede11da3ec0db04ac8f4e Mon Sep 17 00:00:00 2001 From: Ronny Bergmann Date: Sat, 5 Oct 2024 17:14:05 +0200 Subject: [PATCH 02/34] fix a typo. --- src/documentation_glossary.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/documentation_glossary.jl b/src/documentation_glossary.jl index da42da0fb9..5ac9d221b5 100644 --- a/src/documentation_glossary.jl +++ b/src/documentation_glossary.jl @@ -251,7 +251,7 @@ define!( :NonLinearLeastSquares, (; M="M", p="p") -> """ ```math -$(_tex(:argmin))_{$p ∈ $(_math(:M; M=M))} $(_tex(:frac,1,2))$(_tex(:sum))_{i=1}^n ρ_i$(_tex(:bigl))( $(_tex(:norm, "f_i($p)"))^2 $(_tex(:bigr))) +$(_tex(:argmin))_{$p ∈ $(_math(:M; M=M))} $(_tex(:frac,1,2))$(_tex(:sum))_{i=1}^n ρ_i$(_tex(:bigl))( $(_tex(:abs, "f_i($p)"))^2 $(_tex(:bigr))) ``` where ``f: $(_math(:M; M=M)) → ℝ^n`` is written with component functions ``f_i: $(_math(:M; M=M)) \to ℝ``, From 37092ac9fcd5440eaf0268580acfaa3a29e9dde2 Mon Sep 17 00:00:00 2001 From: Ronny Bergmann Date: Sun, 6 Oct 2024 09:32:14 +0200 Subject: [PATCH 03/34] Fix a typo. --- src/documentation_glossary.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/documentation_glossary.jl b/src/documentation_glossary.jl index 5ac9d221b5..148cc678e8 100644 --- a/src/documentation_glossary.jl +++ b/src/documentation_glossary.jl @@ -254,7 +254,7 @@ define!( $(_tex(:argmin))_{$p ∈ $(_math(:M; M=M))} $(_tex(:frac,1,2))$(_tex(:sum))_{i=1}^n ρ_i$(_tex(:bigl))( $(_tex(:abs, "f_i($p)"))^2 $(_tex(:bigr))) ``` -where ``f: $(_math(:M; M=M)) → ℝ^n`` is written with component functions ``f_i: $(_math(:M; M=M)) \to ℝ``, +where ``f: $(_math(:M; M=M)) → ℝ^n`` is written with component functions ``f_i: $(_math(:M; M=M)) → ℝ``, and each component function is continuously differentiable. The functions ``ρ_i: ℝ → ℝ`` can be seen as regularisers of the single least squares terms and are twice continuously differentiable. From 972d633ae613ca951485a2ace32c5acd55a61b92 Mon Sep 17 00:00:00 2001 From: Ronny Bergmann Date: Mon, 7 Oct 2024 18:31:42 +0200 Subject: [PATCH 04/34] Start reworking the objective and its `get_cost`. --- src/documentation_glossary.jl | 16 ++-- src/plans/nonlinear_least_squares_plan.jl | 108 ++++++++++++++-------- src/plans/vectorial_plan.jl | 4 +- 3 files changed, 79 insertions(+), 49 deletions(-) diff --git a/src/documentation_glossary.jl b/src/documentation_glossary.jl index 148cc678e8..d8d98f8a67 100644 --- a/src/documentation_glossary.jl +++ b/src/documentation_glossary.jl @@ -51,7 +51,7 @@ end # LaTeX define!(:LaTeX, :abs, (v) -> raw"\lvert " * "$v" * raw" \rvert") -define!(:LaTeX, :argmin, raw"\operatorname{arg\,min}") +define!(:LaTeX, :argmin, raw"\operatorname*{arg\,min}") define!(:LaTeX, :ast, raw"\ast") define!(:LaTeX, :bar, (letter) -> raw"\bar" * "$(letter)") define!(:LaTeX, :big, raw"\big") @@ -237,15 +237,11 @@ $(_tex(:text, "subject to"))$(_tex(:quad))&g_i($p) ≤ 0 \\quad $(_tex(:text, " ``` """, ) -define!( - :Problem, - :Default, - (; M="M", p="p") -> """ -```math -$(_tex(:argmin))_{$p ∈ $(_math(:M; M=M))} f($p) -``` -""", -) +define!(:Problem, :Default, (; M="M", p="p") -> """ + ```math + $(_tex(:argmin))_{$p ∈ $(_math(:M; M=M))} f($p) + ``` + """) define!( :Problem, :NonLinearLeastSquares, diff --git a/src/plans/nonlinear_least_squares_plan.jl b/src/plans/nonlinear_least_squares_plan.jl index 7f27bdd936..359219ee6f 100644 --- a/src/plans/nonlinear_least_squares_plan.jl +++ b/src/plans/nonlinear_least_squares_plan.jl @@ -1,6 +1,6 @@ @doc """ - NonlinearLeastSquaresObjective{T<:AbstractEvaluationType} <: AbstractManifoldObjective{T} + NonlinearLeastSquaresObjective{E<:AbstractEvaluationType} <: AbstractManifoldObjective{T} An objective to model the nonlinear least squares problem @@ -9,57 +9,89 @@ $(_problem(:NonLinearLeastSquares)) Specify a nonlinear least squares problem # Fields -* `f` a function ``f: $(_math(:M)) → ℝ^d`` to minimize -* `jacobian!!` Jacobian of the function ``f`` -* `jacobian_tangent_basis` the basis of tangent space used for computing the Jacobian. -* `num_components` number of values returned by `f` (equal to `d`). -Depending on the [`AbstractEvaluationType`](@ref) `T` the function ``F`` has to be provided: +* `objective`: aa [`AbstractVectorGradientFunction`](@ref)`{E}` containing both the vector of cost functions ``f_i`` as well as their gradients ``$(_tex(:grad)) f_i``` +* `smoothing`: a single [`ManifoldHessianObjective`](@ref) ``ρ`` or a [`VectorHessianFunction`](@ref)`{T}` containing one smoothing function ``ρ_i``, + as well as their first and second derivative(s) ``ρ'`` and ``ρ''``. -* as a functions `(M::AbstractManifold, p) -> v` that allocates memory for `v` itself for - an [`AllocatingEvaluation`](@ref), -* as a function `(M::AbstractManifold, v, p) -> v` that works in place of `v` for a - [`InplaceEvaluation`](@ref). +This `NonlinearLeastSquaresObjective` then has the same [`AbstractEvaluationType`](@ref) `T` +as the (inner) `objective. +The smoothing is expected to be the smoothing is expected to be [`AllocatingEvaluation`](@ref), +since it works on a one-dimensional vector space ``ℝ`` only anyways. -Also the Jacobian ``jacF!!`` is required: - -* as a functions `(M::AbstractManifold, p; basis_domain::AbstractBasis) -> v` that allocates - memory for `v` itself for an [`AllocatingEvaluation`](@ref), -* as a function `(M::AbstractManifold, v, p; basis_domain::AbstractBasis) -> v` that works - in place of `v` for an [`InplaceEvaluation`](@ref). # Constructors - NonlinearLeastSquaresProblem(M, F, jacF, num_components; evaluation=AllocatingEvaluation(), jacobian_tangent_basis=DefaultOrthonormalBasis()) + NonlinearLeastSquaresObjective(f_i, grad_f_i, ρ::F, ρ_prime::F, ρ_prime_prime::F) where {F<:Function} + NonlinearLeastSquaresObjective(vf::AbstractVectorGradientFunction, ρ::Union{ManifoldHessianObjective, VectorHessianFunction}) # See also [`LevenbergMarquardt`](@ref), [`LevenbergMarquardtState`](@ref) """ -struct NonlinearLeastSquaresObjective{E<:AbstractEvaluationType,TC,TJ,TB<:AbstractBasis} <: - AbstractManifoldGradientObjective{E,TC,TJ} - f::TC - jacobian!!::TJ - jacobian_tangent_basis::TB - num_components::Int -end -function NonlinearLeastSquaresObjective( - f::TF, - jacobian_f::TJ, - num_components::Int; - evaluation::AbstractEvaluationType=AllocatingEvaluation(), - jacobian_tangent_basis::TB=DefaultOrthonormalBasis(), -) where {TF,TJ,TB<:AbstractBasis} - return NonlinearLeastSquaresObjective{typeof(evaluation),TF,TJ,TB}( - f, jacobian_f, jacobian_tangent_basis, num_components - ) +struct NonlinearLeastSquaresObjective{ + E<:AbstractEvaluationType, + F<:AbstractVectorGradientFunction{E}, + R<:Union{AbstractManifoldHessianObjective,VectorHessianFunction}, +} <: AbstractManifoldGradientObjective{E,F,F} + objective::F + smoothing::R end +# TODO: Write the ease-of-use constructor. + +# Cost +# (a) single ρ function get_cost( - M::AbstractManifold, nlso::NonlinearLeastSquaresObjective{AllocatingEvaluation}, p -) - return 1//2 * norm(nlso.f(M, p))^2 + M::AbstractManifold, + nlso::NonlinearLeastSquaresObjective{ + E, + AbstractVectorFunction{E,<:FunctionVectorialType}, + <:AbstractManifoldHessianObjective, + }, + p; + vector_space=Rn, +) where {E<:AbstractEvaluationType} + ρ = x -> get_cost(vector_space(1), nlso.smoothing, x) # Maybe introduce new type instead? + return 1//2 * sum(x -> ρ(abs(x)^2), get_value(nlso.objective, p) .^ 2) end +function get_cost( + M::AbstractManifold, + nlso::NonlinearLeastSquaresObjective{ + E, + AbstractVectorFunction{E,<:ComponentVectorialType}, + <:AbstractManifoldHessianObjective, + }, + p; + vector_space=Rn, +) where {E<:AbstractEvaluationType} + ρ = x -> get_cost(vector_space(1), nlso.smoothing, x) # Maybe introduce new type instead? + v = ρ(abs(get_value(M, nlso.objective, p, 1))^2) + for i in 2:length(nlso.objective) + v += ρ(abs(get_value(M, nlso.objective, p, i))^2) + end + return 1//2 * v +end +# (b) ρ_i +function get_cost( + M::AbstractManifold, + nlso::NonlinearLeastSquaresObjective{ + E,AbstractVectorFunction{E,<:ComponentVectorialType},<:VectorHessianFunction + }, + p; + vector_space=Rn, +) where {E<:AbstractEvaluationType} + v = get_value( + vector_space(1), nlso.smoothing, abs(get_value(M, nlso.objective, p, 1))^2, 1 + ) + for i in 2:length(nlso.objective) + v += get_value( + vector_space(1), nlso.smoothing, abs(get_value(M, nlso.objective, p, i))^2, i + ) + end + return 1//2 * v +end + function get_cost( M::AbstractManifold, nlso::NonlinearLeastSquaresObjective{InplaceEvaluation}, p ) @@ -68,6 +100,8 @@ function get_cost( return 1//2 * norm(residual_values)^2 end +# TODO: Continue here + """ get_gradient_from_Jacobian!( M::AbstractManifold, diff --git a/src/plans/vectorial_plan.jl b/src/plans/vectorial_plan.jl index 49df87614b..c381bf613d 100644 --- a/src/plans/vectorial_plan.jl +++ b/src/plans/vectorial_plan.jl @@ -1,8 +1,8 @@ -@doc raw""" +@doc """ AbstractVectorialType An abstract type for different representations of a vectorial function - ``f: \mathcal M → \mathbb R^m`` and its (component-wise) gradient/Jacobian + ``f: $(_math(:M)) → ℝ`` and its (component-wise) gradient/Jacobian """ abstract type AbstractVectorialType end From 2e2e09729eeff41c11bd414568fcef504df28967 Mon Sep 17 00:00:00 2001 From: Ronny Bergmann Date: Wed, 25 Dec 2024 16:08:39 +0100 Subject: [PATCH 05/34] Start designing the new Jacobian interface. --- docs/src/plans/objective.md | 7 +- src/Manopt.jl | 2 + src/documentation_glossary.jl | 18 +++-- src/plans/nonlinear_least_squares_plan.jl | 66 ++---------------- src/plans/vectorial_plan.jl | 84 ++++++++++++++++------- 5 files changed, 84 insertions(+), 93 deletions(-) diff --git a/docs/src/plans/objective.md b/docs/src/plans/objective.md index 48d10b2529..91d7a5954d 100644 --- a/docs/src/plans/objective.md +++ b/docs/src/plans/objective.md @@ -129,12 +129,6 @@ and internally get_gradient_function ``` -#### Internal helpers - -```@docs -get_gradient_from_Jacobian! -``` - ### Subgradient objective ```@docs @@ -262,6 +256,7 @@ Manopt.FunctionVectorialType #### Access functions ```@docs +Manopt.get_jacobian Manopt.get_value Manopt.get_value_function Base.length(::VectorGradientFunction) diff --git a/src/Manopt.jl b/src/Manopt.jl index ff2c9987a7..46d47a36a0 100644 --- a/src/Manopt.jl +++ b/src/Manopt.jl @@ -391,6 +391,8 @@ export get_proximal_map, get_proximal_map! export get_state, get_initial_stepsize, get_iterate, + get_jacobian, + get_jacobian!, get_gradients, get_gradients!, get_manifold, diff --git a/src/documentation_glossary.jl b/src/documentation_glossary.jl index 851af77d30..5f45660691 100644 --- a/src/documentation_glossary.jl +++ b/src/documentation_glossary.jl @@ -71,6 +71,8 @@ define!( "\n" * raw"\end{cases}", ) +define!(:LaTeX, :cdots, raw"\cdots") +define!(:LaTeX, :ddots, raw"\ddots") define!(:LaTeX, :deriv, (t = "t") -> raw"\frac{\mathrm{d}}{\mathrm{d}" * "$(t)" * "}") define!(:LaTeX, :displaystyle, raw"\displaystyle") define!(:LaTeX, :frac, (a, b) -> raw"\frac" * "{$a}{$b}") @@ -82,13 +84,21 @@ define!(:LaTeX, :log, raw"\log") define!(:LaTeX, :max, raw"\max") define!(:LaTeX, :min, raw"\min") define!(:LaTeX, :norm, (v; index = "") -> raw"\lVert " * "$v" * raw" \rVert" * "_{$index}") +define!( + :LaTeX, + :pmatrix, + (lines...) -> raw"\begin{pmatrix} " * join(lines, raw"\\ ") * raw"\end{pmatrix}", +) define!(:LaTeX, :prox, raw"\operatorname{prox}") define!(:LaTeX, :quad, raw"\quad") define!(:LaTeX, :reflect, raw"\operatorname{refl}") define!(:LaTeX, :retr, raw"\operatorname{retr}") define!(:LaTeX, :subgrad, raw"∂") define!(:LaTeX, :sum, raw"\sum") +define!(:LaTeX, :transp, (letter) -> raw"\text{" * "$letter" * "}") define!(:LaTeX, :text, (letter) -> raw"\text{" * "$letter" * "}") +define!(:LaTeX, :transp, raw"\mathrm{T}") +define!(:LaTeX, :vdots, raw"\vdots") define!(:LaTeX, :vert, raw"\vert") define!(:LaTeX, :widehat, (letter) -> raw"\widehat{" * "$letter" * "}") _tex(args...; kwargs...) = glossary(:LaTeX, args...; kwargs...) @@ -258,15 +268,13 @@ define!( :NonLinearLeastSquares, (; M="M", p="p") -> """ ```math -$(_tex(:argmin))_{$p ∈ $(_math(:M; M=M))} $(_tex(:frac,1,2))$(_tex(:sum))_{i=1}^n ρ_i$(_tex(:bigl))( $(_tex(:abs, "f_i($p)"))^2 $(_tex(:bigr))) +$(_tex(:argmin))_{$p ∈ $(_math(:M; M=M))} $(_tex(:frac,1,2)) ρ$(_tex(:bigl))( $(_tex(:sum))_{i=1}^n $(_tex(:abs, "f_i($p)"))^2 $(_tex(:bigr))) ``` where ``f: $(_math(:M; M=M)) → ℝ^n`` is written with component functions ``f_i: $(_math(:M; M=M)) → ℝ``, and each component function is continuously differentiable. -The functions ``ρ_i: ℝ → ℝ`` can be seen as regularisers of the single least squares terms -and are twice continuously differentiable. - -For the case ``ρ_i(x) = x`` this yields the Nonlinear Least Squares objective +The function ``ρ: ℝ → ℝ`` can be seen as smoothing or regularisation of the least squares term. +It is assumed to be twice continuously differentiable and its default is ``ρ(x) = x``. """, ) diff --git a/src/plans/nonlinear_least_squares_plan.jl b/src/plans/nonlinear_least_squares_plan.jl index 359219ee6f..5fe29a3b72 100644 --- a/src/plans/nonlinear_least_squares_plan.jl +++ b/src/plans/nonlinear_least_squares_plan.jl @@ -10,19 +10,17 @@ Specify a nonlinear least squares problem # Fields -* `objective`: aa [`AbstractVectorGradientFunction`](@ref)`{E}` containing both the vector of cost functions ``f_i`` as well as their gradients ``$(_tex(:grad)) f_i``` -* `smoothing`: a single [`ManifoldHessianObjective`](@ref) ``ρ`` or a [`VectorHessianFunction`](@ref)`{T}` containing one smoothing function ``ρ_i``, - as well as their first and second derivative(s) ``ρ'`` and ``ρ''``. +* `objective`: a [`AbstractVectorGradientFunction`](@ref)`{E}` containing both the vector of cost functions ``f_i`` as well as their gradients ``$(_tex(:grad)) f_i``` +* `smoothing`: a [`ManifoldHessianObjective`](@ref) of a smoothing function ``ρ: ℝ → ℝ``, hence including its first and second derivatives ``ρ'`` and ``ρ''``. This `NonlinearLeastSquaresObjective` then has the same [`AbstractEvaluationType`](@ref) `T` as the (inner) `objective. The smoothing is expected to be the smoothing is expected to be [`AllocatingEvaluation`](@ref), since it works on a one-dimensional vector space ``ℝ`` only anyways. - # Constructors - NonlinearLeastSquaresObjective(f_i, grad_f_i, ρ::F, ρ_prime::F, ρ_prime_prime::F) where {F<:Function} + NonlinearLeastSquaresObjective(f_i, grad_f_i, ρ::F, ρ_prime::G, ρ_prime_prime::H) where {F<:Function} NonlinearLeastSquaresObjective(vf::AbstractVectorGradientFunction, ρ::Union{ManifoldHessianObjective, VectorHessianFunction}) # See also @@ -41,7 +39,7 @@ end # TODO: Write the ease-of-use constructor. # Cost -# (a) single ρ +# (a) with ρ function get_cost( M::AbstractManifold, nlso::NonlinearLeastSquaresObjective{ @@ -52,46 +50,9 @@ function get_cost( p; vector_space=Rn, ) where {E<:AbstractEvaluationType} - ρ = x -> get_cost(vector_space(1), nlso.smoothing, x) # Maybe introduce new type instead? - return 1//2 * sum(x -> ρ(abs(x)^2), get_value(nlso.objective, p) .^ 2) -end -function get_cost( - M::AbstractManifold, - nlso::NonlinearLeastSquaresObjective{ - E, - AbstractVectorFunction{E,<:ComponentVectorialType}, - <:AbstractManifoldHessianObjective, - }, - p; - vector_space=Rn, -) where {E<:AbstractEvaluationType} - ρ = x -> get_cost(vector_space(1), nlso.smoothing, x) # Maybe introduce new type instead? - v = ρ(abs(get_value(M, nlso.objective, p, 1))^2) - for i in 2:length(nlso.objective) - v += ρ(abs(get_value(M, nlso.objective, p, i))^2) - end - return 1//2 * v + return get_cost(vector_space(1), nlso.smoothing, get_value(nlso.objective, p)^2) end -# (b) ρ_i -function get_cost( - M::AbstractManifold, - nlso::NonlinearLeastSquaresObjective{ - E,AbstractVectorFunction{E,<:ComponentVectorialType},<:VectorHessianFunction - }, - p; - vector_space=Rn, -) where {E<:AbstractEvaluationType} - v = get_value( - vector_space(1), nlso.smoothing, abs(get_value(M, nlso.objective, p, 1))^2, 1 - ) - for i in 2:length(nlso.objective) - v += get_value( - vector_space(1), nlso.smoothing, abs(get_value(M, nlso.objective, p, i))^2, i - ) - end - return 1//2 * v -end - +# (b) without ρ function get_cost( M::AbstractManifold, nlso::NonlinearLeastSquaresObjective{InplaceEvaluation}, p ) @@ -100,20 +61,7 @@ function get_cost( return 1//2 * norm(residual_values)^2 end -# TODO: Continue here - -""" - get_gradient_from_Jacobian!( - M::AbstractManifold, - X, - nlso::NonlinearLeastSquaresObjective{InplaceEvaluation}, - p, - Jval=zeros(nlso.num_components, manifold_dimension(M)), - ) - -Compute gradient of [`NonlinearLeastSquaresObjective`](@ref) `nlso` at point `p` in place of -`X`, with temporary Jacobian stored in the optional argument `Jval`. -""" +# TODO: Replace once we have the Jacobian implemented function get_gradient_from_Jacobian!( M::AbstractManifold, X, diff --git a/src/plans/vectorial_plan.jl b/src/plans/vectorial_plan.jl index c381bf613d..9fd4e3a6c1 100644 --- a/src/plans/vectorial_plan.jl +++ b/src/plans/vectorial_plan.jl @@ -68,9 +68,10 @@ format ``f`` is implemented as. There are three different representations of ``f``, which might be beneficial in one or the other situation: -* the [`FunctionVectorialType`](@ref), -* the [`ComponentVectorialType`](@ref), -* the [`CoordinateVectorialType`](@ref) with respect to a specific basis of the tangent space. +* the [`FunctionVectorialType`](@ref) storing a single function ``f`` that returns a vector, +* the [`ComponentVectorialType`](@ref) storing a vector of functions ``f_i`` that return a single value each, +* the [`CoordinateVectorialType`](@ref) storing functions with respect to a specific basis of the tangent space for gradients and Hessians. + Gradients of this type are usually referred to as Jacobians. For the [`ComponentVectorialType`](@ref) imagine that ``f`` could also be written using its component functions, @@ -89,9 +90,6 @@ For the [`FunctionVectorialType`](@ref) ``f`` is implemented as a single functi `f(M, p)`, that returns an `AbstractArray`. And advantage here is, that this is a single function. A disadvantage might be, that if this is expensive even to compute a single component, all of `f` has to be evaluated - -For the [`ComponentVectorialType`](@ref) of `f`, each of the component functions -is a (classical) objective. """ abstract type AbstractVectorFunction{E<:AbstractEvaluationType,FT<:AbstractVectorialType} <: Function end @@ -109,29 +107,29 @@ abstract type AbstractVectorGradientFunction{ E<:AbstractEvaluationType,FT<:AbstractVectorialType,JT<:AbstractVectorialType } <: AbstractVectorFunction{E,FT} end -@doc raw""" +@doc """ VectorGradientFunction{E, FT, JT, F, J, I} <: AbstractVectorGradientFunction{E, FT, JT} -Represent a function ``f:\mathcal M → ℝ^n`` including it first derivative, +Represent a function ``f:$(_math(:M)) → ℝ^n`` including it first derivative, either as a vector of gradients of a Jacobian -And hence has a gradient ``\oepratorname{grad} f_i(p) ∈ T_p\mathcal M``. +And hence has a gradient ``$(_tex(:grad)) f_i(p) ∈ $(_math(:TpM))`. Putting these gradients into a vector the same way as the functions, yields a [`ComponentVectorialType`](@ref) ```math -\operatorname{grad} f(p) = \Bigl( \operatorname{grad} f_1(p), \operatorname{grad} f_2(p), …, \operatorname{grad} f_n(p) \Bigr)^{\mathrm{T}} -∈ (T_p\mathcal M)^n +$(_tex(:grad)) f(p) = $(_tex(:Bigl))( $(_tex(:grad)) f_1(p), $(_tex(:grad)) f_2(p), …, $(_tex(:grad)) f_n(p) $(_tex(:Bigr)))^$(_tex(:transp)) +∈ ($(_math(:TpM)))^n ``` And advantage here is, that again the single components can be evaluated individually # Fields -* `value!!`: the cost function ``f``, which can take different formats -* `cost_type`: indicating / string data for the type of `f` -* `jacobian!!`: the Jacobian of ``f`` -* `jacobian_type`: indicating / storing data for the type of ``J_f`` +* `value!!::F`: the cost function ``f``, which can take different formats +* `cost_type::`[`AbstractVectorialType`](@ref): indicating / string data for the type of `f` +* `jacobian!!::G`: the Jacobian of ``f`` +* `jacobian_type::`[`AbstractVectorialType`](@ref): indicating / storing data for the type of ``J_f`` * `parameters`: the number `n` from, the size of the vector ``f`` returns. # Constructor @@ -195,24 +193,24 @@ or a single tangent space of the power manifold of lenth `n`. # Fields -* `value!!`: the cost function ``f``, which can take different formats -* `cost_type`: indicating / string data for the type of `f` -* `jacobian!!`: the Jacobian of ``f`` -* `jacobian_type`: indicating / storing data for the type of ``J_f`` -* `hessians!!`: the Hessians of ``f`` (in a component wise sense) -* `hessian_type`: indicating / storing data for the type of ``H_f`` +* `value!!::F`: the cost function ``f``, which can take different formats +* `cost_type::`[`AbstractVectorialType`](@ref): indicating / string data for the type of `f` +* `jacobian!!::G`: the Jacobian of ``f`` +* `jacobian_type::`[`AbstractVectorialType`](@ref): indicating / storing data for the type of ``J_f`` +* `hessians!!::H`: the Hessians of ``f`` (in a component wise sense) +* `hessian_type::`[`AbstractVectorialType`](@ref): indicating / storing data for the type of ``H_f`` * `parameters`: the number `n` from, the size of the vector ``f`` returns. # Constructor - VectorGradientFunction(f, Jf, Hess_f, range_dimension; + VectorHessianFunction(f, Jf, Hess_f, range_dimension; evaluation::AbstractEvaluationType=AllocatingEvaluation(), function_type::AbstractVectorialType=FunctionVectorialType(), jacobian_type::AbstractVectorialType=FunctionVectorialType(), hessian_type::AbstractVectorialType=FunctionVectorialType(), ) -Create a `VectorGradientFunction` of `f` and its Jacobian (vector of gradients) `Jf` +Create a `VectorHessianFunction` of `f` and its Jacobian (vector of gradients) `Jf` and (vector of) Hessians, where `f` maps into the Euclidean space of dimension `range_dimension`. Their types are specified by the `function_type`, and `jacobian_type`, and `hessian_type`, respectively. The Jacobian and Hessian can further be given as an allocating variant or an @@ -817,6 +815,46 @@ end get_hessian_function(vgf::VectorHessianFunction, recursive::Bool=false) = vgf.hessians!! +get_jacobian_basis(vgf::AbstractVectorGradientFunction) = DefaultBasis() +function get_jacobian_basis( + vgf::AbstractVectorGradientFunction{F,G,<:CoordinateVectorialType} +) where {F,G} + return vgf.jacobian_type.basis +end + +_doc_get_jacobian = """ + get_jacobian(M::AbstractManifold, vgf::AbstractVectorGradientFunction, p; basis = get_javobian_basis(vgf) ) + get_jacobian!(M::AbstractManifold, J, vgf::AbstractVectorGradientFunction, p; basis = get_javobian_basis(vgf) ) + +Evaluate the Jacobian of the vector function `vgf` on the manifold `M` at `p`. +Let ``n`` be the manifold dimension of `M`. +Representing every gradient ``$(_tex(:grad)) f_i(p) ∈ $(_math(:TpM))` as a vector of coefficients +``c_i = (c_{i,j})_{j=0}^n`` with respect to some basis, the Jacobian is given by + +```math + J_f = $(_tex(:pmatrix, + "c_{1,1} & $(_tex(:cdots)) & c_{1,n}", + "$(_tex(:vdots)) & $(_tex(:ddots)) & $(_tex(:vdots))", + "c_{n,1} & $(_tex(:cdots)) & c_{n,n}" + )) ∈ ℝ^{n×n}, +``` + +in other words, the Jacobian consists of the gradients in coordinates stored row-wise. + +For the [`ComponentVectorialType`](@ref) and the [`FunctionVectorialType`](@ref), +this is done in the tangent space basis provided by `basis=` directly. +For the [`CoordinateVectorialType`](@ref) the Jacobian is compiuted in the basis of this type, +if the basis does not agree with the `basis=`, a change of basis is performed. +""" + +function get_jacobian end +@doc "$(_doc_get_jacobian)" +get_jacobian(M::AbstractManifold, vgf::AbstractVectorGradientFunction, p) + +function get_jacobian! end + +# TODO: Implement a get_jacobian function that works with respect to a certain basis + @doc raw""" length(vgf::AbstractVectorFunction) From 1c08d95f77a21e39fe4dc51f79d6381e820b0079 Mon Sep 17 00:00:00 2001 From: Ronny Bergmann Date: Thu, 26 Dec 2024 09:31:18 +0100 Subject: [PATCH 06/34] start documenting the new model for LM and fixing its structure. --- Changelog.md | 10 ++--- docs/src/solvers/LevenbergMarquardt.md | 6 +++ src/plans/nonlinear_least_squares_plan.jl | 22 ++++++++++ src/solvers/LevenbergMarquardt.jl | 53 ++++++++++++++++++----- 4 files changed, 74 insertions(+), 17 deletions(-) diff --git a/Changelog.md b/Changelog.md index 6051cc0135..d61778b56b 100644 --- a/Changelog.md +++ b/Changelog.md @@ -23,7 +23,7 @@ a Jacobian, that requires bases for tangent spaces ### Added * An automated detection whether the tutorials are present - if not an also no quarto run is done, an automated `--exlcude-tutorials` option is added. + if not an also no quarto run is done, an automated `--exclude-tutorials` option is added. * Support for ManifoldDiff 0.4 * icons upfront external links when they link to another package or wikipedia. @@ -35,7 +35,7 @@ a Jacobian, that requires bases for tangent spaces ### Changed -* stabilize `max_Stepzise` to also work when `injectivity_radius` dos not exist. +* stabilize `max_stepsize` to also work when `injectivity_radius` dos not exist. It however would warn new users, that activate tutorial mode. * Start a `ManoptTestSuite` subpackage to store dummy types and common test helpers in. @@ -74,8 +74,8 @@ In general we introduce a few factories, that avoid having to pass the manifold ### Changed -* Any `Stepsize` now hase a `Stepsize` struct used internally as the original `struct`s before. The newly exported terms aim to fit `stepsize=...` in naming and create a `ManifoldDefaultsFactory` instead, so that any stepsize can be created without explicitly specifying the manifold. - * `ConstantStepsize` is no longer exported, use `ConstantLength` instead. The length parameter is now a positional argument following the (optonal) manifold. Besides that `ConstantLength` works as before,just that omitting the manifold fills the one specified in the solver now. +* Any `Stepsize` now has a `Stepsize` struct used internally as the original `struct`s before. The newly exported terms aim to fit `stepsize=...` in naming and create a `ManifoldDefaultsFactory` instead, so that any stepsize can be created without explicitly specifying the manifold. + * `ConstantStepsize` is no longer exported, use `ConstantLength` instead. The length parameter is now a positional argument following the (optional) manifold. Besides that `ConstantLength` works as before,just that omitting the manifold fills the one specified in the solver now. * `DecreasingStepsize` is no longer exported, use `DecreasingLength` instead. `ConstantLength` works as before,just that omitting the manifold fills the one specified in the solver now. * `ArmijoLinesearch` is now called `ArmijoLinesearchStepsize`. `ArmijoLinesearch` works as before,just that omitting the manifold fills the one specified in the solver now. * `WolfePowellLinesearch` is now called `WolfePowellLinesearchStepsize`, its constant `c_1` is now unified with Armijo and called `sufficient_decrease`, `c_2` was renamed to `sufficient_curvature`. Besides that, `WolfePowellLinesearch` works as before, just that omitting the manifold fills the one specified in the solver now. @@ -137,7 +137,7 @@ In general we introduce a few factories, that avoid having to pass the manifold * `ExactPenaltyMethodState(M, sub_problem; evaluation=...)` was added and `ExactPenaltyMethodState(M, sub_problem, sub_state; evaluation=...)` now has `p=rand(M)` as keyword argument instead of being the second positional one * `DifferenceOfConvexState(M, sub_problem; evaluation=...)` was added and `DifferenceOfConvexState(M, sub_problem, sub_state; evaluation=...)` now has `p=rand(M)` as keyword argument instead of being the second positional one * `DifferenceOfConvexProximalState(M, sub_problem; evaluation=...)` was added and `DifferenceOfConvexProximalState(M, sub_problem, sub_state; evaluation=...)` now has `p=rand(M)` as keyword argument instead of being the second positional one - * bumped `Manifolds.jl`to version 0.10; this mainly means that any algorithm working on a productmanifold and requiring `ArrayPartition` now has to explicitly do `using RecursiveArrayTools`. + * bumped `Manifolds.jl`to version 0.10; this mainly means that any algorithm working on a product manifold and requiring `ArrayPartition` now has to explicitly do `using RecursiveArrayTools`. ### Fixed * the `AverageGradientRule` filled its internal vector of gradients wrongly – or mixed it up in parallel transport. This is now fixed. diff --git a/docs/src/solvers/LevenbergMarquardt.md b/docs/src/solvers/LevenbergMarquardt.md index dfb7295edb..112bc6d08d 100644 --- a/docs/src/solvers/LevenbergMarquardt.md +++ b/docs/src/solvers/LevenbergMarquardt.md @@ -15,6 +15,12 @@ LevenbergMarquardt! LevenbergMarquardtState ``` +## Available smoothing functions + +```@docs +Manopt.smoothing_factory +``` + ## [Technical details](@id sec-lm-technical-details) The [`LevenbergMarquardt`](@ref) solver requires the following functions of a manifold to be available diff --git a/src/plans/nonlinear_least_squares_plan.jl b/src/plans/nonlinear_least_squares_plan.jl index 5fe29a3b72..88048fc4af 100644 --- a/src/plans/nonlinear_least_squares_plan.jl +++ b/src/plans/nonlinear_least_squares_plan.jl @@ -242,6 +242,28 @@ mutable struct LevenbergMarquardtState{ end end +""" + smoothing_factory(s::Union{Symbol, ManifoldHessianObjective}=:Identity, evaluation=AllocatingEvaluation()) + +Create a smoothing function from a symbol `s`. +If the smoothing is already a [`ManifoldHessianObjective`](@ref), this is returned unchanged. +All generated objectives are [`AllocatingEvaluation`](@ref). + +# Currently available smoothing functions + +| `Symbol` | ``ρ(s)`` | ``ρ'(s)`` | ``ρ''(s)[X]`` | Comment | +| -------- | ----- | ------ | ------- | ------- | +| `:Identity` | ``s`` | ``1`` | ``0`` | No smoothing, the default | +""" +function smoothing_factory(s) end + +smoothing_factory() = smoothing_factory(:Identity) +smoothing_factiory(s::ManifoldHessianObjective) = s +smoothing_factory(s::Symbol) = smoothing_factory(Val(s)) +function smoothing_factory(::Val{:Identity}) + return ManifoldHessianObjective((E, x) -> x, (E, x) -> one(x), (E, x, X) -> zero(X)) +end + function show(io::IO, lms::LevenbergMarquardtState) i = get_count(lms, :Iterations) Iter = (i > 0) ? "After $i iterations\n" : "" diff --git a/src/solvers/LevenbergMarquardt.jl b/src/solvers/LevenbergMarquardt.jl index 7ea82c11c7..3bdfd927c7 100644 --- a/src/solvers/LevenbergMarquardt.jl +++ b/src/solvers/LevenbergMarquardt.jl @@ -4,8 +4,12 @@ _doc_LM_formula = raw""" ``` """ _doc_LM = """ - LevenbergMarquardt(M, f, jacobian_f, p, num_components=-1) + LevenbergMarquardt(M, f, jacobian_f, p, num_components=-1; kwargs...) + LevenbergMarquardt(M, vgf, p; kwargs...) + LevenbergMarquardt(M, nlso, p; kwargs...) LevenbergMarquardt!(M, f, jacobian_f, p, num_components=-1; kwargs...) + LevenbergMarquardt!(M, vgf, p, num_components=-1; kwargs...) + LevenbergMarquardt!(M, nlso, p, num_components=-1; kwargs...) compute the the Riemannian Levenberg-Marquardt algorithm [Peeters:1993, AdachiOkunoTakeda:2022](@cite) to solve @@ -17,18 +21,27 @@ The second signature performs the optimization in-place of `p`. # Input $(_var(:Argument, :M; type=true)) -* `f`: a cost function ``f: $(_math(:M))→ℝ^d`` -* `grad_f`: the Jacobian of ``f``. The Jacobian is supposed to accept a keyword argument - `basis_domain` which specifies basis of the tangent space at a given point in which the - Jacobian is to be calculated. By default it should be the `DefaultOrthonormalBasis`. +* `f`: a cost function ``f: $(_math(:M))→ℝ^d``. + The cost function can be provided in two different ways + * as a single function returning a vector ``f(p)) ∈ ℝ^d`` + * as a vector of functions, where each single function returns a scalar ``f_i(p) ∈ ℝ`` + The type is determined by the `function_type=` keyword argument. +* `jacobian_f`: the Jacobian of ``f``. + The Jacobian can be provided in three different ways + * as a single function returning a vector of gradient vectors ``$(_tex(:grad)) f_i(p)`` + * as a vector of functions, where each single function returns a gradient vector ``$(_tex(:grad)) f_i(p)`` + * as a single function returning a (coefficient) matrix with respect to an [`AbstractBasis`](@extref AbstractBasis) + of the trangent space at `p`. + The type is determined by the `jacobian_type=` keyword argument. $(_var(:Argument, :p)) * `num_components`: length of the vector returned by the cost function (`d`). By default its value is -1 which means that it is determined automatically by - calling `f` one additional time. This is only possible when `evaluation` is `AllocatingEvaluation`, + calling `f` one additional time. This is only possible when `evaluation` is [`AllocatingEvaluation`](@ref), for mutating evaluation this value must be explicitly specified. -These can also be passed as a [`NonlinearLeastSquaresObjective`](@ref), -then the keyword `jacobian_tangent_basis` below is ignored +You can also provide the cost and its Jacobian already as a[`VectorGradientFunction`](@ref) `vgf`, +Alternatively, passing a [`NonlinearLeastSquaresObjective`](@ref) `nlso`, +then both the keyword `jacobian_tangent_basis` and the `smoothing` are ignored. # Keyword arguments @@ -38,11 +51,13 @@ $(_var(:Keyword, :evaluation)) residual (objective) at minimum is equal to 0. * `damping_term_min=0.1`: initial (and also minimal) value of the damping term * `β=5.0`: parameter by which the damping term is multiplied when the current new point is rejected +* `function_type=`[`FunctionVectorialType`](@ref)`: an [`AbstractVectorialType`](@ref) specifying the type of cost function provided. * `initial_jacobian_f`: the initial Jacobian of the cost function `f`. By default this is a matrix of size `num_components` times the manifold dimension of similar type as `p`. * `initial_residual_values`: the initial residual vector of the cost function `f`. By default this is a vector of length `num_components` of similar type as `p`. -* `jacobian_tangent_basis`: an [`AbstractBasis`](@extref `ManifoldsBase.AbstractBasis`) specify the basis of the tangent space for `jacobian_f`. +* `jacobian_type=`[`FunctionVectorialType`](@ref)`: an [`AbstractVectorialType`](@ref) specifying the type of Jacobian provided. +* `smoothing`: specify the function ``ρ`` as an [`ManifoldHessianObjective`](@ref) object. $(_var(:Keyword, :retraction_method)) $(_var(:Keyword, :stopping_criterion; default="[`StopAfterIteration`](@ref)`(200)`$(_sc(:Any))[`StopWhenGradientNormLess`](@ref)`(1e-12)`")) @@ -72,7 +87,8 @@ function LevenbergMarquardt( p, num_components::Int=-1; evaluation::AbstractEvaluationType=AllocatingEvaluation(), - jacobian_tangent_basis::AbstractBasis=DefaultOrthonormalBasis(), + function_type::AbstractVectorialType=FunctionVectorialType(), + jacobian_type::AbstractVectorialType=FunctionVectorialType(), kwargs..., ) if num_components == -1 @@ -86,13 +102,26 @@ function LevenbergMarquardt( ) end end - nlso = NonlinearLeastSquaresObjective( + vgf = VectorGradientFunction( f, jacobian_f, num_components; evaluation=evaluation, - jacobian_tangent_basis=jacobian_tangent_basis, + function_type=function_type, + jacobian_type=jacobian_type, ) + return LevenbergMarquardt(M, vgf, p; evaluation=evaluation, kwargs...) +end +function LevenbergMarquardt( + M::AbstractManifold, + vgf::VectorGradientFunction, + p; + evaluation::AbstractEvaluationType=AllocatingEvaluation(), + smoothing=:Idenity, + kwargs..., +) + _smoothing = smoothing_factory(smoothing) + nlso = NonlinearLeastSquaresObjective(vgf, _smoothing; evaluation=evaluation) return LevenbergMarquardt(M, nlso, p; evaluation=evaluation, kwargs...) end function LevenbergMarquardt( From 7e260779e9d0b2a61639e9f03b9f59b3d44c24b9 Mon Sep 17 00:00:00 2001 From: Ronny Bergmann Date: Thu, 26 Dec 2024 14:05:23 +0100 Subject: [PATCH 07/34] Fix a few typos. --- docs/src/solvers/ChambollePock.md | 2 +- docs/src/solvers/DouglasRachford.md | 2 +- docs/src/solvers/LevenbergMarquardt.md | 2 +- docs/src/solvers/augmented_Lagrangian_method.md | 2 +- docs/src/solvers/difference_of_convex.md | 2 +- docs/src/solvers/exact_penalty_method.md | 2 +- docs/src/solvers/particle_swarm.md | 2 +- docs/src/solvers/primal_dual_semismooth_Newton.md | 2 +- docs/src/solvers/truncated_conjugate_gradient_descent.md | 2 +- docs/src/solvers/trust_regions.md | 2 +- src/plans/nonlinear_least_squares_plan.jl | 2 +- src/solvers/LevenbergMarquardt.jl | 2 +- 12 files changed, 12 insertions(+), 12 deletions(-) diff --git a/docs/src/solvers/ChambollePock.md b/docs/src/solvers/ChambollePock.md index 7c1af4e431..71816e9766 100644 --- a/docs/src/solvers/ChambollePock.md +++ b/docs/src/solvers/ChambollePock.md @@ -116,7 +116,7 @@ The [`ChambollePock`](@ref) solver requires the following functions of a manifol * A [`retract!`](@extref ManifoldsBase :doc:`retractions`)`(M, q, p, X)`; it is recommended to set the [`default_retraction_method`](@extref `ManifoldsBase.default_retraction_method-Tuple{AbstractManifold}`) to a favourite retraction. If this default is set, a `retraction_method=` or `retraction_method_dual=` (for ``\mathcal N``) does not have to be specified. * An [`inverse_retract!`](@extref ManifoldsBase :doc:`retractions`)`(M, X, p, q)`; it is recommended to set the [`default_inverse_retraction_method`](@extref `ManifoldsBase.default_inverse_retraction_method-Tuple{AbstractManifold}`) to a favourite retraction. If this default is set, a `inverse_retraction_method=` or `inverse_retraction_method_dual=` (for ``\mathcal N``) does not have to be specified. * A [`vector_transport_to!`](@extref ManifoldsBase :doc:`vector_transports`)`M, Y, p, X, q)`; it is recommended to set the [`default_vector_transport_method`](@extref `ManifoldsBase.default_vector_transport_method-Tuple{AbstractManifold}`) to a favourite retraction. If this default is set, a `vector_transport_method=` or `vector_transport_method_dual=` (for ``\mathcal N``) does not have to be specified. -* A [`copyto!](@extref `Base.copyto!-Tuple{AbstractManifold, Any, Any}`)`(M, q, p)` and [`copy`](@extref `Base.copy-Tuple{AbstractManifold, Any}`)`(M,p)` for points. +* A [`copyto!`](@extref `Base.copyto!-Tuple{AbstractManifold, Any, Any}`)`(M, q, p)` and [`copy`](@extref `Base.copy-Tuple{AbstractManifold, Any}`)`(M,p)` for points. ## Literature diff --git a/docs/src/solvers/DouglasRachford.md b/docs/src/solvers/DouglasRachford.md index f7a04ccff0..165f767472 100644 --- a/docs/src/solvers/DouglasRachford.md +++ b/docs/src/solvers/DouglasRachford.md @@ -74,7 +74,7 @@ The [`DouglasRachford`](@ref) solver requires the following functions of a manif * A [`retract!`](@extref ManifoldsBase :doc:`retractions`)`(M, q, p, X)`; it is recommended to set the [`default_retraction_method`](@extref `ManifoldsBase.default_retraction_method-Tuple{AbstractManifold}`) to a favourite retraction. If this default is set, a `retraction_method=` does not have to be specified. * An [`inverse_retract!`](@extref ManifoldsBase :doc:`retractions`)`(M, X, p, q)`; it is recommended to set the [`default_inverse_retraction_method`](@extref `ManifoldsBase.default_inverse_retraction_method-Tuple{AbstractManifold}`) to a favourite retraction. If this default is set, a `inverse_retraction_method=` does not have to be specified. -* A [`copyto!](@extref `Base.copyto!-Tuple{AbstractManifold, Any, Any}`)`(M, q, p)` and [`copy`](@extref `Base.copy-Tuple{AbstractManifold, Any}`)`(M,p)` for points. +* A [`copyto!`](@extref `Base.copyto!-Tuple{AbstractManifold, Any, Any}`)`(M, q, p)` and [`copy`](@extref `Base.copy-Tuple{AbstractManifold, Any}`)`(M,p)` for points. By default, one of the stopping criteria is [`StopWhenChangeLess`](@ref), which requires diff --git a/docs/src/solvers/LevenbergMarquardt.md b/docs/src/solvers/LevenbergMarquardt.md index 112bc6d08d..ea998307ed 100644 --- a/docs/src/solvers/LevenbergMarquardt.md +++ b/docs/src/solvers/LevenbergMarquardt.md @@ -27,7 +27,7 @@ The [`LevenbergMarquardt`](@ref) solver requires the following functions of a ma * A [`retract!`](@extref ManifoldsBase :doc:`retractions`)`(M, q, p, X)`; it is recommended to set the [`default_retraction_method`](@extref `ManifoldsBase.default_retraction_method-Tuple{AbstractManifold}`) to a favourite retraction. If this default is set, a `retraction_method=` does not have to be specified. * the [`norm`](@extref `LinearAlgebra.norm-Tuple{AbstractManifold, Any, Any}`) as well, to stop when the norm of the gradient is small, but if you implemented `inner`, the norm is provided already. -* A [`copyto!](@extref `Base.copyto!-Tuple{AbstractManifold, Any, Any}`)`(M, q, p)` and [`copy`](@extref `Base.copy-Tuple{AbstractManifold, Any}`)`(M,p)` for points. +* A [`copyto!`](@extref `Base.copyto!-Tuple{AbstractManifold, Any, Any}`)`(M, q, p)` and [`copy`](@extref `Base.copy-Tuple{AbstractManifold, Any}`)`(M,p)` for points. ## Literature diff --git a/docs/src/solvers/augmented_Lagrangian_method.md b/docs/src/solvers/augmented_Lagrangian_method.md index 1a830f3a6c..1dfb306694 100644 --- a/docs/src/solvers/augmented_Lagrangian_method.md +++ b/docs/src/solvers/augmented_Lagrangian_method.md @@ -26,7 +26,7 @@ AugmentedLagrangianGrad The [`augmented_Lagrangian_method`](@ref) solver requires the following functions of a manifold to be available -* A [`copyto!](@extref `Base.copyto!-Tuple{AbstractManifold, Any, Any}`)`(M, q, p)` and [`copy`](@extref `Base.copy-Tuple{AbstractManifold, Any}`)`(M,p)` for points. +* A [`copyto!`](@extref `Base.copyto!-Tuple{AbstractManifold, Any, Any}`)`(M, q, p)` and [`copy`](@extref `Base.copy-Tuple{AbstractManifold, Any}`)`(M,p)` for points. * Everything the subsolver requires, which by default is the [`quasi_Newton`](@ref) method * A [`zero_vector`](@extref `ManifoldsBase.zero_vector-Tuple{AbstractManifold, Any}`)`(M,p)`. diff --git a/docs/src/solvers/difference_of_convex.md b/docs/src/solvers/difference_of_convex.md index 33082d90b1..37e9a472d1 100644 --- a/docs/src/solvers/difference_of_convex.md +++ b/docs/src/solvers/difference_of_convex.md @@ -67,7 +67,7 @@ which either requires * A [`retract!`](@extref ManifoldsBase :doc:`retractions`)`(M, q, p, X)`; it is recommended to set the [`default_retraction_method`](@extref `ManifoldsBase.default_retraction_method-Tuple{AbstractManifold}`) to a favourite retraction. If this default is set, a `retraction_method=` or `retraction_method_dual=` (for ``\mathcal N``) does not have to be specified. * An [`inverse_retract!`](@extref ManifoldsBase :doc:`retractions`)`(M, X, p, q)`; it is recommended to set the [`default_inverse_retraction_method`](@extref `ManifoldsBase.default_inverse_retraction_method-Tuple{AbstractManifold}`) to a favourite retraction. If this default is set, a `inverse_retraction_method=` or `inverse_retraction_method_dual=` (for ``\mathcal N``) does not have to be specified or the [`distance`](@extref `ManifoldsBase.distance-Tuple{AbstractManifold, Any, Any}`)`(M, p, q)` for said default inverse retraction. -* A [`copyto!](@extref `Base.copyto!-Tuple{AbstractManifold, Any, Any}`)`(M, q, p)` and [`copy`](@extref `Base.copy-Tuple{AbstractManifold, Any}`)`(M,p)` for points. +* A [`copyto!`](@extref `Base.copyto!-Tuple{AbstractManifold, Any, Any}`)`(M, q, p)` and [`copy`](@extref `Base.copy-Tuple{AbstractManifold, Any}`)`(M,p)` for points. * By default the tangent vector storing the gradient is initialized calling [`zero_vector`](@extref `ManifoldsBase.zero_vector-Tuple{AbstractManifold, Any}`)`(M,p)`. * everything the subsolver requires, which by default is the [`trust_regions`](@ref) or if you do not provide a Hessian [`gradient_descent`](@ref). diff --git a/docs/src/solvers/exact_penalty_method.md b/docs/src/solvers/exact_penalty_method.md index 7237fe4236..4bb3490664 100644 --- a/docs/src/solvers/exact_penalty_method.md +++ b/docs/src/solvers/exact_penalty_method.md @@ -30,7 +30,7 @@ LogarithmicSumOfExponentials The [`exact_penalty_method`](@ref) solver requires the following functions of a manifold to be available -* A [`copyto!](@extref `Base.copyto!-Tuple{AbstractManifold, Any, Any}`)`(M, q, p)` and [`copy`](@extref `Base.copy-Tuple{AbstractManifold, Any}`)`(M,p)` for points. +* A [`copyto!`](@extref `Base.copyto!-Tuple{AbstractManifold, Any, Any}`)`(M, q, p)` and [`copy`](@extref `Base.copy-Tuple{AbstractManifold, Any}`)`(M,p)` for points. * Everything the subsolver requires, which by default is the [`quasi_Newton`](@ref) method * A [`zero_vector`](@extref `ManifoldsBase.zero_vector-Tuple{AbstractManifold, Any}`)`(M,p)`. diff --git a/docs/src/solvers/particle_swarm.md b/docs/src/solvers/particle_swarm.md index c804cfe5e4..91b80f9098 100644 --- a/docs/src/solvers/particle_swarm.md +++ b/docs/src/solvers/particle_swarm.md @@ -30,7 +30,7 @@ The [`particle_swarm`](@ref) solver requires the following functions of a manifo * A [`vector_transport_to!`](@extref ManifoldsBase :doc:`vector_transports`)`M, Y, p, X, q)`; it is recommended to set the [`default_vector_transport_method`](@extref `ManifoldsBase.default_vector_transport_method-Tuple{AbstractManifold}`) to a favourite retraction. If this default is set, a `vector_transport_method=` does not have to be specified. * By default the stopping criterion uses the [`norm`](@extref `LinearAlgebra.norm-Tuple{AbstractManifold, Any, Any}`) as well, to stop when the norm of the gradient is small, but if you implemented `inner`, the norm is provided already. * Tangent vectors storing the social and cognitive vectors are initialized calling [`zero_vector`](@extref `ManifoldsBase.zero_vector-Tuple{AbstractManifold, Any}`)`(M,p)`. -* A [`copyto!](@extref `Base.copyto!-Tuple{AbstractManifold, Any, Any}`)`(M, q, p)` and [`copy`](@extref `Base.copy-Tuple{AbstractManifold, Any}`)`(M,p)` for points. +* A [`copyto!`](@extref `Base.copyto!-Tuple{AbstractManifold, Any, Any}`)`(M, q, p)` and [`copy`](@extref `Base.copy-Tuple{AbstractManifold, Any}`)`(M,p)` for points. * The [`distance`](@extref `ManifoldsBase.distance-Tuple{AbstractManifold, Any, Any}`)`(M, p, q)` when using the default stopping criterion, which uses [`StopWhenChangeLess`](@ref). ## Literature diff --git a/docs/src/solvers/primal_dual_semismooth_Newton.md b/docs/src/solvers/primal_dual_semismooth_Newton.md index e2e0607f79..d570816600 100644 --- a/docs/src/solvers/primal_dual_semismooth_Newton.md +++ b/docs/src/solvers/primal_dual_semismooth_Newton.md @@ -78,7 +78,7 @@ The [`primal_dual_semismooth_Newton`](@ref) solver requires the following functi * A [`retract!`](@extref ManifoldsBase :doc:`retractions`)`(M, q, p, X)`; it is recommended to set the [`default_retraction_method`](@extref `ManifoldsBase.default_retraction_method-Tuple{AbstractManifold}`) to a favourite retraction. If this default is set, a `retraction_method=` does not have to be specified. * An [`inverse_retract!`](@extref ManifoldsBase :doc:`retractions`)`(M, X, p, q)`; it is recommended to set the [`default_inverse_retraction_method`](@extref `ManifoldsBase.default_inverse_retraction_method-Tuple{AbstractManifold}`) to a favourite retraction. If this default is set, a `inverse_retraction_method=` does not have to be specified. * A [`vector_transport_to!`](@extref ManifoldsBase :doc:`vector_transports`)`M, Y, p, X, q)`; it is recommended to set the [`default_vector_transport_method`](@extref `ManifoldsBase.default_vector_transport_method-Tuple{AbstractManifold}`) to a favourite retraction. If this default is set, a `vector_transport_method=` does not have to be specified. -* A [`copyto!](@extref `Base.copyto!-Tuple{AbstractManifold, Any, Any}`)`(M, q, p)` and [`copy`](@extref `Base.copy-Tuple{AbstractManifold, Any}`)`(M,p)` for points. +* A [`copyto!`](@extref `Base.copyto!-Tuple{AbstractManifold, Any, Any}`)`(M, q, p)` and [`copy`](@extref `Base.copy-Tuple{AbstractManifold, Any}`)`(M,p)` for points. * A [`get_basis`](@extref `ManifoldsBase.get_basis-Tuple{AbstractManifold, Any, ManifoldsBase.AbstractBasis}`) for the [`DefaultOrthonormalBasis`](@extref `ManifoldsBase.DefaultOrthonormalBasis`) on ``\mathcal M`` * [`exp`](@extref `Base.exp-Tuple{AbstractManifold, Any, Any}`) and [`log`](@extref `Base.log-Tuple{AbstractManifold, Any, Any}`) (on ``\mathcal M``) * A [`DiagonalizingOrthonormalBasis`](@extref `ManifoldsBase.DiagonalizingOrthonormalBasis`) to compute the differentials of the exponential and logarithmic map diff --git a/docs/src/solvers/truncated_conjugate_gradient_descent.md b/docs/src/solvers/truncated_conjugate_gradient_descent.md index aabcacd116..1a331d7cac 100644 --- a/docs/src/solvers/truncated_conjugate_gradient_descent.md +++ b/docs/src/solvers/truncated_conjugate_gradient_descent.md @@ -51,7 +51,7 @@ The [`trust_regions`](@ref) solver requires the following functions of a manifol * if you do not provide a `trust_region_radius=`, then [`injectivity_radius`](@extref `ManifoldsBase.injectivity_radius-Tuple{AbstractManifold}`) on the manifold `M` is required. * the [`norm`](@extref `LinearAlgebra.norm-Tuple{AbstractManifold, Any, Any}`) as well, to stop when the norm of the gradient is small, but if you implemented `inner`, the norm is provided already. * A [`zero_vector!`](@extref `ManifoldsBase.zero_vector-Tuple{AbstractManifold, Any}`)`(M,X,p)`. -* A [`copyto!](@extref `Base.copyto!-Tuple{AbstractManifold, Any, Any}`)`(M, q, p)` and [`copy`](@extref `Base.copy-Tuple{AbstractManifold, Any}`)`(M,p)` for points. +* A [`copyto!`](@extref `Base.copyto!-Tuple{AbstractManifold, Any, Any}`)`(M, q, p)` and [`copy`](@extref `Base.copy-Tuple{AbstractManifold, Any}`)`(M,p)` for points. ## Literature diff --git a/docs/src/solvers/trust_regions.md b/docs/src/solvers/trust_regions.md index 14548e373e..755b04a443 100644 --- a/docs/src/solvers/trust_regions.md +++ b/docs/src/solvers/trust_regions.md @@ -66,7 +66,7 @@ The [`trust_regions`](@ref) solver requires the following functions of a manifol * A [`retract!`](@extref ManifoldsBase :doc:`retractions`)`(M, q, p, X)`; it is recommended to set the [`default_retraction_method`](@extref `ManifoldsBase.default_retraction_method-Tuple{AbstractManifold}`) to a favourite retraction. If this default is set, a `retraction_method=` does not have to be specified. * By default the stopping criterion uses the [`norm`](@extref `LinearAlgebra.norm-Tuple{AbstractManifold, Any, Any}`) as well, to stop when the norm of the gradient is small, but if you implemented `inner`, the norm is provided already. * if you do not provide an initial `max_trust_region_radius`, a [`manifold_dimension`](@extref `ManifoldsBase.manifold_dimension-Tuple{AbstractManifold}`) is required. -* A [`copyto!](@extref `Base.copyto!-Tuple{AbstractManifold, Any, Any}`)`(M, q, p)` and [`copy`](@extref `Base.copy-Tuple{AbstractManifold, Any}`)`(M,p)` for points. +* A [`copyto!`](@extref `Base.copyto!-Tuple{AbstractManifold, Any, Any}`)`(M, q, p)` and [`copy`](@extref `Base.copy-Tuple{AbstractManifold, Any}`)`(M,p)` for points. * By default the tangent vectors are initialized calling [`zero_vector`](@extref `ManifoldsBase.zero_vector-Tuple{AbstractManifold, Any}`)`(M,p)`. diff --git a/src/plans/nonlinear_least_squares_plan.jl b/src/plans/nonlinear_least_squares_plan.jl index 88048fc4af..57f220f25b 100644 --- a/src/plans/nonlinear_least_squares_plan.jl +++ b/src/plans/nonlinear_least_squares_plan.jl @@ -252,7 +252,7 @@ All generated objectives are [`AllocatingEvaluation`](@ref). # Currently available smoothing functions | `Symbol` | ``ρ(s)`` | ``ρ'(s)`` | ``ρ''(s)[X]`` | Comment | -| -------- | ----- | ------ | ------- | ------- | +|:-------- |:-----:|:------:|:-------:|:------- | | `:Identity` | ``s`` | ``1`` | ``0`` | No smoothing, the default | """ function smoothing_factory(s) end diff --git a/src/solvers/LevenbergMarquardt.jl b/src/solvers/LevenbergMarquardt.jl index 3bdfd927c7..615f6b4ac8 100644 --- a/src/solvers/LevenbergMarquardt.jl +++ b/src/solvers/LevenbergMarquardt.jl @@ -30,7 +30,7 @@ $(_var(:Argument, :M; type=true)) The Jacobian can be provided in three different ways * as a single function returning a vector of gradient vectors ``$(_tex(:grad)) f_i(p)`` * as a vector of functions, where each single function returns a gradient vector ``$(_tex(:grad)) f_i(p)`` - * as a single function returning a (coefficient) matrix with respect to an [`AbstractBasis`](@extref AbstractBasis) + * as a single function returning a (coefficient) matrix with respect to an [`AbstractBasis`](@extref `ManifoldsBase.AbstractBasis`) of the trangent space at `p`. The type is determined by the `jacobian_type=` keyword argument. $(_var(:Argument, :p)) From b06dd2fbf436165eb5ece2ab041ba284c9a92ffc Mon Sep 17 00:00:00 2001 From: Ronny Bergmann Date: Thu, 26 Dec 2024 17:30:54 +0100 Subject: [PATCH 08/34] add scaling to the factory. --- src/plans/nonlinear_least_squares_plan.jl | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/src/plans/nonlinear_least_squares_plan.jl b/src/plans/nonlinear_least_squares_plan.jl index 57f220f25b..d3d4aafc4e 100644 --- a/src/plans/nonlinear_least_squares_plan.jl +++ b/src/plans/nonlinear_least_squares_plan.jl @@ -243,7 +243,9 @@ mutable struct LevenbergMarquardtState{ end """ - smoothing_factory(s::Union{Symbol, ManifoldHessianObjective}=:Identity, evaluation=AllocatingEvaluation()) + smoothing_factory(s::Symbol) + smoothing_factory((s,α)::Tuple{Symbol,<:Real}) + smoothing_factory(o::ManifoldHessianObjective) Create a smoothing function from a symbol `s`. If the smoothing is already a [`ManifoldHessianObjective`](@ref), this is returned unchanged. @@ -254,6 +256,11 @@ All generated objectives are [`AllocatingEvaluation`](@ref). | `Symbol` | ``ρ(s)`` | ``ρ'(s)`` | ``ρ''(s)[X]`` | Comment | |:-------- |:-----:|:------:|:-------:|:------- | | `:Identity` | ``s`` | ``1`` | ``0`` | No smoothing, the default | + +As well as a scaled variant for any ``ρ`` as ``ρ_α(s) = α^2 ρ$(_tex(:bigl))($(_tex(:frac, "s", "α^2"))$(_tex(:bigr)))``. +which yields ``ρ_α'(s) = ρ'$(_tex(:bigl))($(_tex(:frac, "s", "α^2"))$(_tex(:bigr)))`` and ``ρ_α''(s)[X] = $(_tex(:bigl))($(_tex(:frac, "1", "α^2"))$(_tex(:bigr)))ρ''$(_tex(:bigl))($(_tex(:frac, "s", "α^2"))$(_tex(:bigr)))[X]``. + +Scaling is activated by calling `smoothing_factory((symbol, α))` or `smoothing_factory((o, α))`. """ function smoothing_factory(s) end @@ -263,6 +270,17 @@ smoothing_factory(s::Symbol) = smoothing_factory(Val(s)) function smoothing_factory(::Val{:Identity}) return ManifoldHessianObjective((E, x) -> x, (E, x) -> one(x), (E, x, X) -> zero(X)) end +function smoothing_factory((s, α)::Tuple{Symbol,<:Real}) + o = smoothing_factory(s) + return smoothing_factory((o, α)) +end +function smoothing_factory((o, α)::Tuple{ManifoldHessianObjective,<:Real}) + return ManifoldHessianObjective( + (E, x) -> α^2 * get_cost(E, o, x / α^2), + (E, x) -> get_gradient(E, o, x / α^2), + (E, x, X) -> get_hessian(E, o, x / α^2, X) / α^2, + ) +end function show(io::IO, lms::LevenbergMarquardtState) i = get_count(lms, :Iterations) From f18a8a314666dfb637c6d0831f5393b4e5f2cbf5 Mon Sep 17 00:00:00 2001 From: Ronny Bergmann Date: Thu, 26 Dec 2024 17:46:32 +0100 Subject: [PATCH 09/34] Sketch roadmap. --- src/plans/nonlinear_least_squares_plan.jl | 22 ++++++++++++++++++ src/solvers/LevenbergMarquardt.jl | 27 +++++------------------ 2 files changed, 28 insertions(+), 21 deletions(-) diff --git a/src/plans/nonlinear_least_squares_plan.jl b/src/plans/nonlinear_least_squares_plan.jl index d3d4aafc4e..29368e420a 100644 --- a/src/plans/nonlinear_least_squares_plan.jl +++ b/src/plans/nonlinear_least_squares_plan.jl @@ -61,6 +61,26 @@ function get_cost( return 1//2 * norm(residual_values)^2 end + +function get_jacobian!( + dmp::DefaultManoptProblem{mT,<:NonlinearLeastSquaresObjective{AllocatingEvaluation}}, + J, + p, + basis_domain::AbstractBasis, +) where {mT} + nlso = get_objective(dmp) + return copyto!(J, nlso.jacobian!!(get_manifold(dmp), p; basis_domain=basis_domain)) +end +function get_jacobian!( + dmp::DefaultManoptProblem{mT,<:NonlinearLeastSquaresObjective{InplaceEvaluation}}, + J, + p, + basis_domain::AbstractBasis, +) where {mT} + nlso = get_objective(dmp) + return nlso.jacobian!!(get_manifold(dmp), J, p; basis_domain=basis_domain) +end + # TODO: Replace once we have the Jacobian implemented function get_gradient_from_Jacobian!( M::AbstractManifold, @@ -251,6 +271,8 @@ Create a smoothing function from a symbol `s`. If the smoothing is already a [`ManifoldHessianObjective`](@ref), this is returned unchanged. All generated objectives are [`AllocatingEvaluation`](@ref). + + # Currently available smoothing functions | `Symbol` | ``ρ(s)`` | ``ρ'(s)`` | ``ρ''(s)[X]`` | Comment | diff --git a/src/solvers/LevenbergMarquardt.jl b/src/solvers/LevenbergMarquardt.jl index 615f6b4ac8..a97c27bb0f 100644 --- a/src/solvers/LevenbergMarquardt.jl +++ b/src/solvers/LevenbergMarquardt.jl @@ -219,6 +219,7 @@ function initialize_solver!( ) where {mT<:AbstractManifold} M = get_manifold(dmp) get_objective(dmp).f(M, lms.residual_values, lms.p) + # TODO: Replace with a call to the Jacobian get_gradient_from_Jacobian!(M, lms.X, get_objective(dmp), lms.p, lms.jacF) return lms end @@ -231,25 +232,8 @@ function _maybe_get_basis(M::AbstractManifold, p, B::AbstractBasis) end end -function get_jacobian!( - dmp::DefaultManoptProblem{mT,<:NonlinearLeastSquaresObjective{AllocatingEvaluation}}, - jacF, - p, - basis_domain::AbstractBasis, -) where {mT} - nlso = get_objective(dmp) - return copyto!(jacF, nlso.jacobian!!(get_manifold(dmp), p; basis_domain=basis_domain)) -end -function get_jacobian!( - dmp::DefaultManoptProblem{mT,<:NonlinearLeastSquaresObjective{InplaceEvaluation}}, - jacF, - p, - basis_domain::AbstractBasis, -) where {mT} - nlso = get_objective(dmp) - return nlso.jacobian!!(get_manifold(dmp), jacF, p; basis_domain=basis_domain) -end - +# TODO: Adapt to vectorial function call instead of .f - maybe even skip or rename? +# TODO: It will just ne “get_value” of the vgf. function get_residuals!( dmp::DefaultManoptProblem{mT,<:NonlinearLeastSquaresObjective{AllocatingEvaluation}}, residuals, @@ -268,15 +252,16 @@ end function step_solver!( dmp::DefaultManoptProblem{mT,<:NonlinearLeastSquaresObjective}, lms::LevenbergMarquardtState, - k::Integer, + ::Integer, ) where {mT<:AbstractManifold} # `o.residual_values` is either initialized by `initialize_solver!` or taken from the previous iteration M = get_manifold(dmp) nlso = get_objective(dmp) + # TODO: Replace with obtaining a basis from the vectorial function basis_ox = _maybe_get_basis(M, lms.p, nlso.jacobian_tangent_basis) # a new Jacobian is only needed if the last step was successful if lms.last_step_successful - get_jacobian!(dmp, lms.jacF, lms.p, basis_ox) + get_jacobian!(dmp, lms.jacF, lms.p) end λk = lms.damping_term * norm(lms.residual_values)^2 From 3135c1b4982d28f6789753f79c40cff997cfe2ef Mon Sep 17 00:00:00 2001 From: Ronny Bergmann Date: Thu, 26 Dec 2024 22:44:01 +0100 Subject: [PATCH 10/34] Implement get_jacobian and get_jacobian! for vector functions. adapt the vectorial plan such that the functions are ordered alphabetically. --- src/plans/nonlinear_least_squares_plan.jl | 1 - src/plans/vectorial_plan.jl | 795 ++++++++++++++-------- src/solvers/LevenbergMarquardt.jl | 2 +- 3 files changed, 526 insertions(+), 272 deletions(-) diff --git a/src/plans/nonlinear_least_squares_plan.jl b/src/plans/nonlinear_least_squares_plan.jl index 29368e420a..14791be463 100644 --- a/src/plans/nonlinear_least_squares_plan.jl +++ b/src/plans/nonlinear_least_squares_plan.jl @@ -61,7 +61,6 @@ function get_cost( return 1//2 * norm(residual_values)^2 end - function get_jacobian!( dmp::DefaultManoptProblem{mT,<:NonlinearLeastSquaresObjective{AllocatingEvaluation}}, J, diff --git a/src/plans/vectorial_plan.jl b/src/plans/vectorial_plan.jl index 9fd4e3a6c1..298e9dcc81 100644 --- a/src/plans/vectorial_plan.jl +++ b/src/plans/vectorial_plan.jl @@ -10,8 +10,8 @@ abstract type AbstractVectorialType end CoordinateVectorialType{B<:AbstractBasis} <: AbstractVectorialType A type to indicate that gradient of the constraints is implemented as a -Jacobian matrix with respect to a certain basis, that is if the constraints are -given as ``g: \mathcal M → ℝ^m`` with respect to a basis ``\mathcal B`` of ``T_p\mathcal M``, at ``p∈ \mathcal M`` +Jacobian matrix with respect to a certain basis, that is if the vectfor function +are is ``f: \mathcal M → ℝ^m`` with and we have a asis ``\mathcal B`` of ``T_p\mathcal M``, at ``p∈ \mathcal M`` This can be written as ``J_g(p) = (c_1^{\mathrm{T}},…,c_m^{\mathrm{T}})^{\mathrm{T}} \in ℝ^{m,d}``, that is, every row ``c_i`` of this matrix is a set of coefficients such that `get_coefficients(M, p, c, B)` is the tangent vector ``\oepratorname{grad} g_i(p)`` @@ -54,9 +54,14 @@ struct ComponentVectorialType <: AbstractVectorialType end A type to indicate that constraints are implemented one whole functions, for example ``g(p) ∈ ℝ^m`` or ``\operatorname{grad} g(p) ∈ (T_p\mathcal M)^m``. + +This type internally stores the [`AbstractPowerRepresentation`](@ref), +when it makes sense, especially for Hessian and gradient functions. """ struct FunctionVectorialType <: AbstractVectorialType end +# TODO store range type in this type instead of having a keyword argument in so many function. + @doc raw""" AbstractVectorFunction{E, FT} <: Function @@ -181,10 +186,10 @@ function VectorGradientFunction( ) end -@doc raw""" +_doc_vhf = """ VectorHessianFunction{E, FT, JT, HT, F, J, H, I} <: AbstractVectorGradientFunction{E, FT, JT} -Represent a function ``f:\mathcal M → ℝ^n`` including it first derivative, +Represent a function ``f:$(_math(:M)) M → ℝ^n`` including it first derivative, either as a vector of gradients of a Jacobian, and the Hessian, as a vector of Hessians of the component functions. @@ -195,11 +200,11 @@ or a single tangent space of the power manifold of lenth `n`. * `value!!::F`: the cost function ``f``, which can take different formats * `cost_type::`[`AbstractVectorialType`](@ref): indicating / string data for the type of `f` -* `jacobian!!::G`: the Jacobian of ``f`` +* `jacobian!!::G`: the Jacobian ``J_f`` of ``f`` * `jacobian_type::`[`AbstractVectorialType`](@ref): indicating / storing data for the type of ``J_f`` * `hessians!!::H`: the Hessians of ``f`` (in a component wise sense) * `hessian_type::`[`AbstractVectorialType`](@ref): indicating / storing data for the type of ``H_f`` -* `parameters`: the number `n` from, the size of the vector ``f`` returns. +* `range_dimension`: the number `n` from, the size of the vector ``f`` returns. # Constructor @@ -216,6 +221,8 @@ Their types are specified by the `function_type`, and `jacobian_type`, and `hess respectively. The Jacobian and Hessian can further be given as an allocating variant or an inplace-variant, specified by the `evaluation=` keyword. """ + +@doc "$(_doc_vhf)" struct VectorHessianFunction{ E<:AbstractEvaluationType, FT<:AbstractVectorialType, @@ -259,54 +266,505 @@ function VectorHessianFunction( ) end -@doc raw""" - get_value(M::AbstractManifold, vgf::AbstractVectorFunction, p[, i=:]) +_vgf_index_to_length(b::BitVector, n) = sum(b) +_vgf_index_to_length(::Colon, n) = n +_vgf_index_to_length(i::AbstractArray{<:Integer}, n) = length(i) +_vgf_index_to_length(r::UnitRange{<:Integer}, n) = length(r) -Evaluate the vector function [`VectorGradientFunction`](@ref) `vgf` at `p`. -The `range` can be used to specify a potential range, but is currently only present for consistency. +# +# +# ---- Hessian +@doc raw""" + get_hessian(M::AbstractManifold, vgf::VectorHessianFunction, p, X, i) + get_hessian(M::AbstractManifold, vgf::VectorHessianFunction, p, X, i, range) + get_hessian!(M::AbstractManifold, X, vgf::VectorHessianFunction, p, X, i) + get_hessian!(M::AbstractManifold, X, vgf::VectorHessianFunction, p, X, i, range) -The `i` can be a linear index, you can provide +Evaluate the Hessians of the vector function `vgf` on the manifold `M` at `p` in direction `X` +and the values given in `range`, specifying the representation of the gradients. +Since `i` is assumed to be a linear index, you can provide * a single integer * a `UnitRange` to specify a range to be returned like `1:3` * a `BitVector` specifying a selection * a `AbstractVector{<:Integer}` to specify indices -* `:` to return the vector of all gradients, which is also the default - +* `:` to return the vector of all gradients """ -get_value(M::AbstractManifold, vgf::AbstractVectorFunction, p, i) -function get_value( - M::AbstractManifold, vgf::AbstractVectorFunction{E,<:FunctionVectorialType}, p, i=: -) where {E<:AbstractEvaluationType} - c = vgf.value!!(M, p) - if isa(c, Number) - return c - else - return c[i] +get_hessian( + M::AbstractManifold, + vgf::VectorHessianFunction, + p, + X, + i, + range::Union{AbstractPowerRepresentation,Nothing}=nothing, +) + +# Generic case, allocate (a) a single tangent vector +function get_hessian( + M::AbstractManifold, + vhf::VectorHessianFunction, + p, + X, + i::Integer, + range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), +) + Y = zero_vector(M, p) + return get_hessian!(M, Y, vhf, p, X, i, range) +end +# (b) UnitRange and AbstractVector allow to use length for BitVector its sum +function get_hessian( + M::AbstractManifold, + vhf::VectorHessianFunction, + p, + X, + i=:, # as long as the length can be found it should work, see _vgf_index_to_length + range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), +) + n = _vgf_index_to_length(i, vhf.range_dimension) + pM = PowerManifold(M, range, n) + P = fill(p, pM) + Y = zero_vector(pM, P) + return get_hessian!(M, Y, vhf, p, X, i, range) +end + +# +# +# Part I: allocation +# I (a) a vector of functions +function get_hessian!( + M::AbstractManifold, + Y, + vhf::VectorHessianFunction{<:AllocatingEvaluation,FT,JT,<:ComponentVectorialType}, + p, + X, + i::Integer, + ::Union{AbstractPowerRepresentation,Nothing}=nothing, +) where {FT,JT} + return copyto!(M, Y, p, vhf.hessians!![i](M, p, X)) +end +function get_hessian!( + M::AbstractManifold, + Y, + vhf::VectorHessianFunction{<:AllocatingEvaluation,FT,JT,<:ComponentVectorialType}, + p, + X, + i, + range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), +) where {FT,JT} + n = _vgf_index_to_length(i, vhf.range_dimension) + pM = PowerManifold(M, range, n) + rep_size = representation_size(M) + # In the resulting `X` the indices are linear, + # in `jacobian[i]` the functions f are ordered in a linear sense + for (j, f) in zip(1:n, vhf.hessians!![i]) + copyto!(M, _write(pM, rep_size, Y, (j,)), f(M, p, X)) end + return Y end -function get_value( +function get_hessian!( M::AbstractManifold, - vgf::AbstractVectorFunction{E,<:ComponentVectorialType}, + Y, + vgf::VectorHessianFunction{<:AllocatingEvaluation,FT,JT,<:ComponentVectorialType}, p, + X, + i::Colon, + range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), +) where {FT,JT} + n = _vgf_index_to_length(i, vgf.range_dimension) + pM = PowerManifold(M, range, n) + rep_size = representation_size(M) + for (j, f) in enumerate(vgf.hessians!!) + copyto!(M, _write(pM, rep_size, Y, (j,)), p, f(M, p, X)) + end + return Y +end +# Part I(c) A single gradient function +function get_hessian!( + M::AbstractManifold, + Y, + vhf::VectorHessianFunction{<:AllocatingEvaluation,FT,JT,<:FunctionVectorialType}, + p, + X, + i, + range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), +) where {FT,JT} + n = _vgf_index_to_length(i, vhf.range_dimension) + mP = PowerManifold(M, range, n) + copyto!(mP, Y, vhf.hessians!!(M, p, X)[mP, i]) + return Y +end +function get_hessian!( + M::AbstractManifold, + Y, + vhf::VectorHessianFunction{<:AllocatingEvaluation,FT,JT,<:FunctionVectorialType}, + p, + X, i::Integer, -) where {E<:AbstractEvaluationType} - return vgf.value!![i](M, p) + range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), +) where {FT,JT} + mP = PowerManifold(M, range, vhf.range_dimension) + copyto!(M, Y, p, vhf.hessians!!(M, p, X)[mP, i]) + return Y end -function get_value( - M::AbstractManifold, vgf::AbstractVectorFunction{E,<:ComponentVectorialType}, p, i=: -) where {E<:AbstractEvaluationType} - return [f(M, p) for f in vgf.value!![i]] +# +# +# Part II: in-place evaluations +# (a) a vector of functions +function get_hessian!( + M::AbstractManifold, + Y, + vhf::VectorHessianFunction{<:InplaceEvaluation,FT,JT,<:ComponentVectorialType}, + p, + X, + i::Integer, + ::Union{AbstractPowerRepresentation,Nothing}=nothing, +) where {FT,JT} + return vhf.hessians!![i](M, Y, p, X) +end +function get_hessian!( + M::AbstractManifold, + Y, + vhf::VectorHessianFunction{<:InplaceEvaluation,FT,JT,<:ComponentVectorialType}, + p, + X, + i, + range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), +) where {FT,JT} + n = _vgf_index_to_length(i, vhf.range_dimension) + pM = PowerManifold(M, range, n) + rep_size = representation_size(M) + # In the resulting X the indices are linear, + # in jacobian[i] have the functions f are also given n a linear sense + for (j, f) in zip(1:n, vhf.hessians!![i]) + f(M, _write(pM, rep_size, Y, (j,)), p, X) + end + return Y +end +# II(b) a single function +function get_hessian!( + M::AbstractManifold, + Y, + vhf::VectorHessianFunction{<:InplaceEvaluation,FT,JT,<:FunctionVectorialType}, + p, + X, + i::Integer, + range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), +) where {FT,JT} + pM = PowerManifold(M, range, vhf.range_dimension...) + P = fill(p, pM) + y = zero_vector(pM, P) + vhf.hessians!!(M, y, p, X) + copyto!(M, Y, p, y[pM, i]) + return Y +end +function get_hessian!( + M::AbstractManifold, + Y, + vhf::VectorHessianFunction{<:InplaceEvaluation,FT,JT,<:FunctionVectorialType}, + p, + X, + i, + range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), +) where {FT,JT} + #Single access for function is a bit expensive + n = _vgf_index_to_length(i, vhf.range_dimension) + pM_out = PowerManifold(M, range, n) + pM_temp = PowerManifold(M, range, vhf.range_dimension) + P = fill(p, pM_temp) + y = zero_vector(pM_temp, P) + vhf.hessians!!(M, y, p, X) + # Luckily all documented access functions work directly on `x[pM_temp,...]` + copyto!(pM_out, Y, P[pM_temp, i], y[pM_temp, i]) + return Y end -@doc raw""" - get_value_function(vgf::VectorGradientFunction, recursive=false) +get_hessian_function(vgf::VectorHessianFunction, recursive::Bool=false) = vgf.hessians!! -return the internally stored function computing [`get_value`](@ref). +# +# +# ---- Jacobian +_doc_get_jacobian = """ + get_jacobian(M::AbstractManifold, vgf::AbstractVectorGradientFunction, p; basis = get_javobian_basis(vgf) ) + get_jacobian!(M::AbstractManifold, J, vgf::AbstractVectorGradientFunction, p; basis = get_javobian_basis(vgf) ) + +Evaluate the Jacobian of the vector function `vgf` on the manifold `M` at `p`. +Let ``n`` be the manifold dimension of `M`. +Representing every gradient ``$(_tex(:grad)) f_i(p) ∈ $(_math(:TpM))` as a vector of coefficients +``c_i = (c_{i,j})_{j=0}^n`` with respect to some basis, the Jacobian is given by + +```math + J_f = $(_tex(:pmatrix, + "c_{1,1} & $(_tex(:cdots)) & c_{1,n}", + "$(_tex(:vdots)) & $(_tex(:ddots)) & $(_tex(:vdots))", + "c_{n,1} & $(_tex(:cdots)) & c_{n,n}" + )) ∈ ℝ^{n×n}, +``` + +in other words, the Jacobian consists of the gradients in coordinates stored row-wise. + +For the [`ComponentVectorialType`](@ref) and the [`FunctionVectorialType`](@ref), +this is done in the tangent space basis provided by `basis=` directly. +For the [`CoordinateVectorialType`](@ref) the Jacobian is computed in the basis of this type, +if the basis does not agree with the `basis=`, a change of basis is performed + +# Keyword Arguments +* `basis` basis with respect to which the Jacobian is computed. +* `range` for the [`FunctionVectorialType`](@ref) specify the range type of the resulting gradient, + that is, the [`AbstractPowerRepresentation`](@extref `ManifoldsBase.AbstractPowerRepresentation`) the resulting power manifold has. """ -function get_value_function(vgf::VectorGradientFunction, recursive=false) - return vgf.value!! + +function get_jacobian end +@doc "$(_doc_get_jacobian)" +get_jacobian(M::AbstractManifold, vgf::AbstractVectorGradientFunction, p) + +# A small helper function to change the basis of a Jacobian +""" + _change_basis!(M::AbstractManifold, p, JF, from_basis::B1, to_basis::B; X=zero_vector(M,p)) + +Given a jacobian matrix `JF` on a manifold `M` at `p` with respect to the `from_basis` +in the tangent space of `p` on `M`. Change the basis of the Jacobian to `to_basis` in place of `JF`. + +# Keyword Arguments +* `X` a temporary vector to store a generated vector, before decomposing it again with respect to the new basis +""" +function _change_basis!( + M, p, JF, from_basis::B1, to_basis::B2; X=zero_vector(M, p) +) where {B1<:AbstractBasis,B2<:AbstractBasis} + # change every row to new basis + for i in 1:size(JF, 1) # every row + get_vector!(M, X, p, JF[i, :], from_basis) + get_coordinates!(M, JF[i, :], p, X, to_basis) + end + return JF end +# case we have the same basis: nothing to do, just return JF +_change_basis!(M, p, JF, from_basis::B, to_basis_new) where {B<:AbstractBasis} = JF + +# Part I: allocating vgf – allocating jacobian +# (a) We have a single gradient function +function get_jacobian( + M::AbstractManifold, + vgf::AbstractVectorGradientFunction{<:AllocatingEvaluation,FT,<:FunctionVectorialType}, + p; + basis::B=DefaultOrthonormalBasis(), + range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), +) where {FT,B<:AbstractBasis} + n = vgf.range_dimension + d = manifold_dimension(M, p) + gradients = vgf.jacobian!!(M, p) + mP = PowerManifold(M, range, vgf.range_dimension) + # generate the first row to get an eltype + c1 = get_coordinates(M, p, gradients[mP, 1], basis) + JF = zeros(eltype(c1), n, d) + JF[1, :] .= c1 + for i in 2:n + JF[i, :] .= get_coordinates(M, p, gradients[mP, i], basis) + end + return JF +end +# (b) We have a vector of gradient functions +function get_jacobian( + M::AbstractManifold, + vgf::AbstractVectorGradientFunction{<:AllocatingEvaluation,FT,<:ComponentVectorialType}, + p; + basis=get_jacobian_basis(vgf), + kwargs..., +) where {FT} + n = vgf.range_dimension + d = manifold_dimension(M, p) + # generate the first row to get an eltype + c1 = get_coordinates(M, p, vgf.jacobian!![1](M, p), basis) + JF = zeros(eltype(c1), n, d) + JF[1, :] .= c1 + for i in 2:n + JF[i, :] .= get_coordinates(M, p, vgf.jacobian!![i](M, p), basis) + end + return JF +end +# (c) We have a Jacobian function +function get_jacobian( + M::AbstractManifold, + vgf::AbstractVectorGradientFunction{ + <:AllocatingEvaluation,FT,<:CoordinateVectorialType + }, + p; + basis=get_jacobian_basis(vgf), + kwargs..., +) where {FT} + JF = vgf.jacobian!!(M, p) + _change_basis!(JF, vgf.jacobian_type.basis, basis) + return JF +end + +# Part II: mutating vgf – allocating jacobian +# (a) We have a single gradient function +function get_jacobian( + M::AbstractManifold, + vgf::AbstractVectorGradientFunction{<:InplaceEvaluation,FT,<:FunctionVectorialType}, + p; + basis::B=DefaultOrthonormalBasis(), + range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), +) where {FT,B<:AbstractBasis} + n = vgf.range_dimension + d = manifold_dimension(M, p) + mP = PowerManifold(M, range, vgf.range_dimension) + gradients = zero_vector(mP, fill(p, pM)) + vgf.jacobian!!(M, gradients, p) + # generate the first row to get an eltype + c1 = get_coordinates(M, p, gradients[mP, 1], basis) + JF = zeros(eltype(c1), n, d) + JF[1, :] .= c1 + for i in 2:n + JF[i, :] .= get_coordinates(M, p, gradients[mP, i], basis) + end + return JF +end +# (b) We have a vector of gradient functions +function get_jacobian( + M::AbstractManifold, + vgf::AbstractVectorGradientFunction{<:InplaceEvaluation,FT,<:ComponentVectorialType}, + p; + basis=get_jacobian_basis(vgf), +) where {FT} + n = vgf.range_dimension + d = manifold_dimension(M, p) + # generate the first row to get an eltype + X = zero_vector(M, p) + vgf.jacobian!![1](M, X, p) + c1 = get_coordinates(M, p, X, basis) + JF = zeros(eltype(c1), n, d) + JF[1, :] .= c1 + for i in 2:n + vgf.jacobian!![i](M, X, p) + JF[i, :] .= get_coordinates(M, p, X, basis) + end + return JF + JF = vgf.jacobian!!(M, p) + _change_basis!(JF, vgf.jacobian_type.basis, basis) + return JF +end +# (c) We have a Jacobian function +function get_jacobian( + M::AbstractManifold, + vgf::AbstractVectorGradientFunction{<:InplaceEvaluation,FT,<:CoordinateVectorialType}, + p; + basis=get_jacobian_basis(vgf), +) where {FT} + c = get_coordinats(M, p, zero_vector(M, p)) + JF = zeros(eltype(c), vgf.range_dimension, manifold_dimension(M, p)) + vgf.jacobian!!(M, JF, p) + _change_basis!(JF, vgf.jacobian_type.basis, basis) + return JF +end + +function get_jacobian! end +@doc "$(_doc_get_jacobian)" +get_jacobian!(M::AbstractManifold, JF, vgf::AbstractVectorGradientFunction, p) + +# Part I: allocating vgf – inplace jacobian +# (a) We have a single gradient function +function get_jacobian!( + M::AbstractManifold, + JF, + vgf::AbstractVectorGradientFunction{<:AllocatingEvaluation,FT,<:FunctionVectorialType}, + p; + basis::B=DefaultOrthonormalBasis(), + range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), +) where {FT,B<:AbstractBasis} + n = vgf.range_dimension + d = manifold_dimension(M, p) + gradients = vgf.jacobian!!(M, p) + mP = PowerManifold(M, range, vgf.range_dimension) + for i in 1:n + JF[i, :] .= get_coordinates(M, p, gradients[mP, i], basis) + end + return JF +end +# (b) We have a vector of gradient functions +function get_jacobian!( + M::AbstractManifold, + JF, + vgf::AbstractVectorGradientFunction{<:AllocatingEvaluation,FT,<:ComponentVectorialType}, + p; + basis=get_jacobian_basis(vgf), +) where {FT} + for i in 1:(vgf.range_dimension) + JF[i, :] .= get_coordinates(M, p, vgf.jacobian!![i](M, p), basis) + end + return JF +end +# (c) We have a Jacobian function +function get_jacobian!( + M::AbstractManifold, + JF, + vgf::AbstractVectorGradientFunction{ + <:AllocatingEvaluation,FT,<:CoordinateVectorialType + }, + p; + basis=get_jacobian_basis(vgf), +) where {FT} + JF .= vgf.jacobian!!(M, p) + _change_basis!(JF, vgf.jacobian_type.basis, basis) + return JF +end + +# Part II: mutating vgf – allocating jacobian +# (a) We have a single gradient function +function get_jacobian!( + M::AbstractManifold, + JF, + vgf::AbstractVectorGradientFunction{<:InplaceEvaluation,FT,<:FunctionVectorialType}, + p; + basis::B=DefaultOrthonormalBasis(), +) where {FT,B<:AbstractBasis} + gradients = zero_vector(mP, fill(p, pM)) + vgf.jacobian!!(M, gradients, p) + for i in 1:n + JF[i, :] .= get_coordinates(M, p, gradients[mP, i], basis) + end + return JF +end +# (b) We have a vector of gradient functions +function get_jacobian!( + M::AbstractManifold, + JF, + vgf::AbstractVectorGradientFunction{<:InplaceEvaluation,FT,<:ComponentVectorialType}, + p; + basis=get_jacobian_basis(vgf), + X=zero_vector(M, p), +) where {FT} + for i in 1:n + vgf.jacobian!![i](M, X, p) + JF[i, :] .= get_coordinates(M, p, X, basis) + end + return JF + JF = vgf.jacobian!!(M, p) + _change_basis!(JF, vgf.jacobian_type.basis, basis) + return JF +end +# (c) We have a Jacobian function +function get_jacobian!( + M::AbstractManifold, + JF, + vgf::AbstractVectorGradientFunction{<:InplaceEvaluation,FT,<:CoordinateVectorialType}, + p; + basis=get_jacobian_basis(vgf), +) where {FT} + vgf.jacobian!!(M, JF, p) + _change_basis!(JF, vgf.jacobian_type.basis, basis) + return JF +end + +get_jacobian_basis(vgf::AbstractVectorGradientFunction) = DefaultBasis() +function get_jacobian_basis( + vgf::AbstractVectorGradientFunction{F,G,<:CoordinateVectorialType} +) where {F,G} + return vgf.jacobian_type.basis +end + +# +# +# ---- Gradient @doc raw""" get_gradient(M::AbstractManifold, vgf::VectorGradientFunction, p, i) get_gradient(M::AbstractManifold, vgf::VectorGradientFunction, p, i, range) @@ -328,13 +786,8 @@ get_gradient( vgf::AbstractVectorGradientFunction, p, i, - range::Union{AbstractPowerRepresentation,Nothing}=nothing, -) - -_vgf_index_to_length(b::BitVector, n) = sum(b) -_vgf_index_to_length(::Colon, n) = n -_vgf_index_to_length(i::AbstractArray{<:Integer}, n) = length(i) -_vgf_index_to_length(r::UnitRange{<:Integer}, n) = length(r) + range::Union{AbstractPowerRepresentation,Nothing}=nothing, +) # Generic case, allocate (a) a single tangent vector function get_gradient( @@ -608,252 +1061,54 @@ get_gradient_function(vgf::VectorGradientFunction, recursive=false) = vgf.jacobi # # -# ---- Hessian +# ---- Value @doc raw""" - get_hessian(M::AbstractManifold, vgf::VectorHessianFunction, p, X, i) - get_hessian(M::AbstractManifold, vgf::VectorHessianFunction, p, X, i, range) - get_hessian!(M::AbstractManifold, X, vgf::VectorHessianFunction, p, X, i) - get_hessian!(M::AbstractManifold, X, vgf::VectorHessianFunction, p, X, i, range) + get_value(M::AbstractManifold, vgf::AbstractVectorFunction, p[, i=:]) -Evaluate the Hessians of the vector function `vgf` on the manifold `M` at `p` in direction `X` -and the values given in `range`, specifying the representation of the gradients. +Evaluate the vector function [`VectorGradientFunction`](@ref) `vgf` at `p`. +The `range` can be used to specify a potential range, but is currently only present for consistency. + +The `i` can be a linear index, you can provide -Since `i` is assumed to be a linear index, you can provide * a single integer * a `UnitRange` to specify a range to be returned like `1:3` * a `BitVector` specifying a selection * a `AbstractVector{<:Integer}` to specify indices -* `:` to return the vector of all gradients +* `:` to return the vector of all gradients, which is also the default """ -get_hessian( - M::AbstractManifold, - vgf::VectorHessianFunction, - p, - X, - i, - range::Union{AbstractPowerRepresentation,Nothing}=nothing, -) - -# Generic case, allocate (a) a single tangent vector -function get_hessian( - M::AbstractManifold, - vhf::VectorHessianFunction, - p, - X, - i::Integer, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), -) - Y = zero_vector(M, p) - return get_hessian!(M, Y, vhf, p, X, i, range) -end -# (b) UnitRange and AbstractVector allow to use length for BitVector its sum -function get_hessian( - M::AbstractManifold, - vhf::VectorHessianFunction, - p, - X, - i=:, # as long as the length can be found it should work, see _vgf_index_to_length - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), -) - n = _vgf_index_to_length(i, vhf.range_dimension) - pM = PowerManifold(M, range, n) - P = fill(p, pM) - Y = zero_vector(pM, P) - return get_hessian!(M, Y, vhf, p, X, i, range) -end - -# -# -# Part I: allocation -# I (a) a vector of functions -function get_hessian!( - M::AbstractManifold, - Y, - vhf::VectorHessianFunction{<:AllocatingEvaluation,FT,JT,<:ComponentVectorialType}, - p, - X, - i::Integer, - ::Union{AbstractPowerRepresentation,Nothing}=nothing, -) where {FT,JT} - return copyto!(M, Y, p, vhf.hessians!![i](M, p, X)) -end -function get_hessian!( - M::AbstractManifold, - Y, - vhf::VectorHessianFunction{<:AllocatingEvaluation,FT,JT,<:ComponentVectorialType}, - p, - X, - i, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), -) where {FT,JT} - n = _vgf_index_to_length(i, vhf.range_dimension) - pM = PowerManifold(M, range, n) - rep_size = representation_size(M) - # In the resulting `X` the indices are linear, - # in `jacobian[i]` the functions f are ordered in a linear sense - for (j, f) in zip(1:n, vhf.hessians!![i]) - copyto!(M, _write(pM, rep_size, Y, (j,)), f(M, p, X)) - end - return Y -end -function get_hessian!( - M::AbstractManifold, - Y, - vgf::VectorHessianFunction{<:AllocatingEvaluation,FT,JT,<:ComponentVectorialType}, - p, - X, - i::Colon, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), -) where {FT,JT} - n = _vgf_index_to_length(i, vgf.range_dimension) - pM = PowerManifold(M, range, n) - rep_size = representation_size(M) - for (j, f) in enumerate(vgf.hessians!!) - copyto!(M, _write(pM, rep_size, Y, (j,)), p, f(M, p, X)) - end - return Y -end -# Part I(c) A single gradient function -function get_hessian!( - M::AbstractManifold, - Y, - vhf::VectorHessianFunction{<:AllocatingEvaluation,FT,JT,<:FunctionVectorialType}, - p, - X, - i, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), -) where {FT,JT} - n = _vgf_index_to_length(i, vhf.range_dimension) - mP = PowerManifold(M, range, n) - copyto!(mP, Y, vhf.hessians!!(M, p, X)[mP, i]) - return Y -end -function get_hessian!( - M::AbstractManifold, - Y, - vhf::VectorHessianFunction{<:AllocatingEvaluation,FT,JT,<:FunctionVectorialType}, - p, - X, - i::Integer, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), -) where {FT,JT} - mP = PowerManifold(M, range, vhf.range_dimension) - copyto!(M, Y, p, vhf.hessians!!(M, p, X)[mP, i]) - return Y -end -# -# -# Part II: in-place evaluations -# (a) a vector of functions -function get_hessian!( - M::AbstractManifold, - Y, - vhf::VectorHessianFunction{<:InplaceEvaluation,FT,JT,<:ComponentVectorialType}, - p, - X, - i::Integer, - ::Union{AbstractPowerRepresentation,Nothing}=nothing, -) where {FT,JT} - return vhf.hessians!![i](M, Y, p, X) -end -function get_hessian!( - M::AbstractManifold, - Y, - vhf::VectorHessianFunction{<:InplaceEvaluation,FT,JT,<:ComponentVectorialType}, - p, - X, - i, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), -) where {FT,JT} - n = _vgf_index_to_length(i, vhf.range_dimension) - pM = PowerManifold(M, range, n) - rep_size = representation_size(M) - # In the resulting X the indices are linear, - # in jacobian[i] have the functions f are also given n a linear sense - for (j, f) in zip(1:n, vhf.hessians!![i]) - f(M, _write(pM, rep_size, Y, (j,)), p, X) +get_value(M::AbstractManifold, vgf::AbstractVectorFunction, p, i) +function get_value( + M::AbstractManifold, vgf::AbstractVectorFunction{E,<:FunctionVectorialType}, p, i=: +) where {E<:AbstractEvaluationType} + c = vgf.value!!(M, p) + if isa(c, Number) + return c + else + return c[i] end - return Y end -# II(b) a single function -function get_hessian!( +function get_value( M::AbstractManifold, - Y, - vhf::VectorHessianFunction{<:InplaceEvaluation,FT,JT,<:FunctionVectorialType}, + vgf::AbstractVectorFunction{E,<:ComponentVectorialType}, p, - X, i::Integer, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), -) where {FT,JT} - pM = PowerManifold(M, range, vhf.range_dimension...) - P = fill(p, pM) - y = zero_vector(pM, P) - vhf.hessians!!(M, y, p, X) - copyto!(M, Y, p, y[pM, i]) - return Y -end -function get_hessian!( - M::AbstractManifold, - Y, - vhf::VectorHessianFunction{<:InplaceEvaluation,FT,JT,<:FunctionVectorialType}, - p, - X, - i, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), -) where {FT,JT} - #Single access for function is a bit expensive - n = _vgf_index_to_length(i, vhf.range_dimension) - pM_out = PowerManifold(M, range, n) - pM_temp = PowerManifold(M, range, vhf.range_dimension) - P = fill(p, pM_temp) - y = zero_vector(pM_temp, P) - vhf.hessians!!(M, y, p, X) - # Luckily all documented access functions work directly on `x[pM_temp,...]` - copyto!(pM_out, Y, P[pM_temp, i], y[pM_temp, i]) - return Y +) where {E<:AbstractEvaluationType} + return vgf.value!![i](M, p) end - -get_hessian_function(vgf::VectorHessianFunction, recursive::Bool=false) = vgf.hessians!! - -get_jacobian_basis(vgf::AbstractVectorGradientFunction) = DefaultBasis() -function get_jacobian_basis( - vgf::AbstractVectorGradientFunction{F,G,<:CoordinateVectorialType} -) where {F,G} - return vgf.jacobian_type.basis +function get_value( + M::AbstractManifold, vgf::AbstractVectorFunction{E,<:ComponentVectorialType}, p, i=: +) where {E<:AbstractEvaluationType} + return [f(M, p) for f in vgf.value!![i]] end -_doc_get_jacobian = """ - get_jacobian(M::AbstractManifold, vgf::AbstractVectorGradientFunction, p; basis = get_javobian_basis(vgf) ) - get_jacobian!(M::AbstractManifold, J, vgf::AbstractVectorGradientFunction, p; basis = get_javobian_basis(vgf) ) - -Evaluate the Jacobian of the vector function `vgf` on the manifold `M` at `p`. -Let ``n`` be the manifold dimension of `M`. -Representing every gradient ``$(_tex(:grad)) f_i(p) ∈ $(_math(:TpM))` as a vector of coefficients -``c_i = (c_{i,j})_{j=0}^n`` with respect to some basis, the Jacobian is given by - -```math - J_f = $(_tex(:pmatrix, - "c_{1,1} & $(_tex(:cdots)) & c_{1,n}", - "$(_tex(:vdots)) & $(_tex(:ddots)) & $(_tex(:vdots))", - "c_{n,1} & $(_tex(:cdots)) & c_{n,n}" - )) ∈ ℝ^{n×n}, -``` - -in other words, the Jacobian consists of the gradients in coordinates stored row-wise. +@doc raw""" + get_value_function(vgf::VectorGradientFunction, recursive=false) -For the [`ComponentVectorialType`](@ref) and the [`FunctionVectorialType`](@ref), -this is done in the tangent space basis provided by `basis=` directly. -For the [`CoordinateVectorialType`](@ref) the Jacobian is compiuted in the basis of this type, -if the basis does not agree with the `basis=`, a change of basis is performed. +return the internally stored function computing [`get_value`](@ref). """ - -function get_jacobian end -@doc "$(_doc_get_jacobian)" -get_jacobian(M::AbstractManifold, vgf::AbstractVectorGradientFunction, p) - -function get_jacobian! end - -# TODO: Implement a get_jacobian function that works with respect to a certain basis +function get_value_function(vgf::VectorGradientFunction, recursive=false) + return vgf.value!! +end @doc raw""" length(vgf::AbstractVectorFunction) diff --git a/src/solvers/LevenbergMarquardt.jl b/src/solvers/LevenbergMarquardt.jl index a97c27bb0f..d184135462 100644 --- a/src/solvers/LevenbergMarquardt.jl +++ b/src/solvers/LevenbergMarquardt.jl @@ -233,7 +233,7 @@ function _maybe_get_basis(M::AbstractManifold, p, B::AbstractBasis) end # TODO: Adapt to vectorial function call instead of .f - maybe even skip or rename? -# TODO: It will just ne “get_value” of the vgf. +# TODO: It will just ne “get_value” of the vgf - adapted by smoothing?. function get_residuals!( dmp::DefaultManoptProblem{mT,<:NonlinearLeastSquaresObjective{AllocatingEvaluation}}, residuals, From 494743d870677f71814f771d404bcaa70db6d981 Mon Sep 17 00:00:00 2001 From: Ronny Bergmann Date: Fri, 27 Dec 2024 11:10:22 +0100 Subject: [PATCH 11/34] Fix docs, return to a 1/a^2 scaling. --- docs/src/plans/objective.md | 2 ++ src/documentation_glossary.jl | 6 +++--- src/plans/nonlinear_least_squares_plan.jl | 2 +- src/solvers/LevenbergMarquardt.jl | 9 ++------- 4 files changed, 8 insertions(+), 11 deletions(-) diff --git a/docs/src/plans/objective.md b/docs/src/plans/objective.md index 91d7a5954d..01c1f5c6b7 100644 --- a/docs/src/plans/objective.md +++ b/docs/src/plans/objective.md @@ -257,6 +257,7 @@ Manopt.FunctionVectorialType ```@docs Manopt.get_jacobian +Manopt.get_jacobian! Manopt.get_value Manopt.get_value_function Base.length(::VectorGradientFunction) @@ -266,6 +267,7 @@ Base.length(::VectorGradientFunction) ```@docs Manopt._to_iterable_indices +Manopt._change_basis! ``` ### Subproblem objective diff --git a/src/documentation_glossary.jl b/src/documentation_glossary.jl index 5f45660691..ee4d53cad5 100644 --- a/src/documentation_glossary.jl +++ b/src/documentation_glossary.jl @@ -268,13 +268,13 @@ define!( :NonLinearLeastSquares, (; M="M", p="p") -> """ ```math -$(_tex(:argmin))_{$p ∈ $(_math(:M; M=M))} $(_tex(:frac,1,2)) ρ$(_tex(:bigl))( $(_tex(:sum))_{i=1}^n $(_tex(:abs, "f_i($p)"))^2 $(_tex(:bigr))) +$(_tex(:argmin))_{$p ∈ $(_math(:M; M=M))} $(_tex(:frac,1,2)) $(_tex(:sum))_{i=1}^n s_i$(_tex(:bigl))($(_tex(:bigl))( $(_tex(:abs, "f_i($p)"))^2 $(_tex(:bigr))) ``` where ``f: $(_math(:M; M=M)) → ℝ^n`` is written with component functions ``f_i: $(_math(:M; M=M)) → ℝ``, and each component function is continuously differentiable. -The function ``ρ: ℝ → ℝ`` can be seen as smoothing or regularisation of the least squares term. -It is assumed to be twice continuously differentiable and its default is ``ρ(x) = x``. +The function ``s_i: ℝ → ℝ`` can be seen as smoothing or regularisation of the least squares term. +It is assumed to be twice continuously differentiable and its default is ``s_i(x) = x`` for all ``i=1,…n``. """, ) diff --git a/src/plans/nonlinear_least_squares_plan.jl b/src/plans/nonlinear_least_squares_plan.jl index 14791be463..e16c1df22c 100644 --- a/src/plans/nonlinear_least_squares_plan.jl +++ b/src/plans/nonlinear_least_squares_plan.jl @@ -278,7 +278,7 @@ All generated objectives are [`AllocatingEvaluation`](@ref). |:-------- |:-----:|:------:|:-------:|:------- | | `:Identity` | ``s`` | ``1`` | ``0`` | No smoothing, the default | -As well as a scaled variant for any ``ρ`` as ``ρ_α(s) = α^2 ρ$(_tex(:bigl))($(_tex(:frac, "s", "α^2"))$(_tex(:bigr)))``. +As well as a scaled variant for any ``ρ`` as ``ρ_α(s) = α ρ$(_tex(:bigl))($(_tex(:frac, "s", "α^2"))$(_tex(:bigr)))``. which yields ``ρ_α'(s) = ρ'$(_tex(:bigl))($(_tex(:frac, "s", "α^2"))$(_tex(:bigr)))`` and ``ρ_α''(s)[X] = $(_tex(:bigl))($(_tex(:frac, "1", "α^2"))$(_tex(:bigr)))ρ''$(_tex(:bigl))($(_tex(:frac, "s", "α^2"))$(_tex(:bigr)))[X]``. Scaling is activated by calling `smoothing_factory((symbol, α))` or `smoothing_factory((o, α))`. diff --git a/src/solvers/LevenbergMarquardt.jl b/src/solvers/LevenbergMarquardt.jl index d184135462..830af00d3a 100644 --- a/src/solvers/LevenbergMarquardt.jl +++ b/src/solvers/LevenbergMarquardt.jl @@ -1,8 +1,3 @@ -_doc_LM_formula = raw""" -```math -\operatorname*{arg\,min}_{p ∈ \mathcal M} \frac{1}{2} \lVert f(p) \rVert^2, -``` -""" _doc_LM = """ LevenbergMarquardt(M, f, jacobian_f, p, num_components=-1; kwargs...) LevenbergMarquardt(M, vgf, p; kwargs...) @@ -23,7 +18,7 @@ The second signature performs the optimization in-place of `p`. $(_var(:Argument, :M; type=true)) * `f`: a cost function ``f: $(_math(:M))→ℝ^d``. The cost function can be provided in two different ways - * as a single function returning a vector ``f(p)) ∈ ℝ^d`` + * as a single function returning a vector ``f(p) ∈ ℝ^d`` * as a vector of functions, where each single function returns a scalar ``f_i(p) ∈ ℝ`` The type is determined by the `function_type=` keyword argument. * `jacobian_f`: the Jacobian of ``f``. @@ -57,8 +52,8 @@ $(_var(:Keyword, :evaluation)) * `initial_residual_values`: the initial residual vector of the cost function `f`. By default this is a vector of length `num_components` of similar type as `p`. * `jacobian_type=`[`FunctionVectorialType`](@ref)`: an [`AbstractVectorialType`](@ref) specifying the type of Jacobian provided. -* `smoothing`: specify the function ``ρ`` as an [`ManifoldHessianObjective`](@ref) object. $(_var(:Keyword, :retraction_method)) +* `smoothing`: specify a smoothing function ``s`` for all ``s_i=s`` as an [`ManifoldHessianObjective`](@ref) or a vector of smoothing functions ``(s_1,…s_n)`` together as a [`VectorHessianFunction`](@ref). $(_var(:Keyword, :stopping_criterion; default="[`StopAfterIteration`](@ref)`(200)`$(_sc(:Any))[`StopWhenGradientNormLess`](@ref)`(1e-12)`")) $(_note(:OtherKeywords)) From ce1637961bff4738d6c41ba63a1cbbbd4d4db02e Mon Sep 17 00:00:00 2001 From: Ronny Bergmann Date: Sat, 28 Dec 2024 10:01:15 +0100 Subject: [PATCH 12/34] Make cases inline and suitable for tables. Finish a smoothing factory. --- src/Manopt.jl | 1 + src/documentation_glossary.jl | 5 +- src/plans/nonlinear_least_squares_plan.jl | 152 +++++++++++++++++++--- 3 files changed, 135 insertions(+), 23 deletions(-) diff --git a/src/Manopt.jl b/src/Manopt.jl index 3e40120b8d..5e073ac9a1 100644 --- a/src/Manopt.jl +++ b/src/Manopt.jl @@ -389,6 +389,7 @@ export get_state, forward_operator!, get_objective, get_unconstrained_objective +export smoothing_factory export get_hessian, get_hessian! export ApproxHessianFiniteDifference export is_state_decorator, dispatch_state_decorator diff --git a/src/documentation_glossary.jl b/src/documentation_glossary.jl index ee4d53cad5..45583702c0 100644 --- a/src/documentation_glossary.jl +++ b/src/documentation_glossary.jl @@ -66,9 +66,7 @@ define!( :cases, (c...) -> raw"\begin{cases}" * - "\n" * - "$(join([" $(ci)" for ci in c], raw"\\\\"*"\n"))" * - "\n" * + "$(join([" $(ci)" for ci in c], raw"\\\\ "))" * raw"\end{cases}", ) define!(:LaTeX, :cdots, raw"\cdots") @@ -93,6 +91,7 @@ define!(:LaTeX, :prox, raw"\operatorname{prox}") define!(:LaTeX, :quad, raw"\quad") define!(:LaTeX, :reflect, raw"\operatorname{refl}") define!(:LaTeX, :retr, raw"\operatorname{retr}") +define!(:LaTeX, :sqrt, (s) -> raw"\sqrt{" * "$s}") define!(:LaTeX, :subgrad, raw"∂") define!(:LaTeX, :sum, raw"\sum") define!(:LaTeX, :transp, (letter) -> raw"\text{" * "$letter" * "}") diff --git a/src/plans/nonlinear_least_squares_plan.jl b/src/plans/nonlinear_least_squares_plan.jl index e16c1df22c..ea074c6d50 100644 --- a/src/plans/nonlinear_least_squares_plan.jl +++ b/src/plans/nonlinear_least_squares_plan.jl @@ -262,47 +262,159 @@ mutable struct LevenbergMarquardtState{ end """ - smoothing_factory(s::Symbol) - smoothing_factory((s,α)::Tuple{Symbol,<:Real}) + smoothing_factory(s::Symbol=:Identity) + smoothing_factory((s,α)::Tuple{Union{Symbol, ManifoldHessianObjective,<:Real}) + smoothing_factory((s,k)::Tuple{Union{Symbol, ManifoldHessianObjective,<:Int}) + smoothing_factory(S::NTuple{n, <:Union{Symbol, Tuple{S, Int} S<: Tuple{Symbol, <:Real}}} where n) smoothing_factory(o::ManifoldHessianObjective) Create a smoothing function from a symbol `s`. -If the smoothing is already a [`ManifoldHessianObjective`](@ref), this is returned unchanged. -All generated objectives are [`AllocatingEvaluation`](@ref). +For a single symbol `s`, the corresponding smoothing function is returned as a [`ManifoldHessianObjective`](@ref) +If the argument already is a [`ManifoldHessianObjective`](@ref), it is returned unchanged. +For a tuple `(s, α)`, the smoothing function is scaled by `α` as ``s_α(x) = α s$(_tex(:bigl))($(_tex(:frac, "x", "α^2"))$(_tex(:bigr)))``, +which yields ``s_α'(x) = s'$(_tex(:bigl))($(_tex(:frac, "x", "α^2"))$(_tex(:bigr)))`` and ``s_α''(x)[X] = $(_tex(:bigl))($(_tex(:frac, "1", "α^2"))$(_tex(:bigr)))s''$(_tex(:bigl))($(_tex(:frac, "x", "α^2"))$(_tex(:bigr)))[X]``. + +For a tuple `(s, k)`, a [`VectorHessianFunction`](@ref) is returned, where every component is the smooting function indicated by `s` + +Finally for a tuple containing the above four cases, a [`VectorHessianFunction`](@ref) is returned, +containing all smoothing functions with their repetitions mentioned + +# Examples + +* `smoothing_factory(:Identity)`: returns the identity function as a single smoothing function +* `smoothing_factory(:Identity, 2)`: returns a `VectorHessianFunction` with two identity functions +* `smoothing_factory(mho, 0.5)`: returns a `ManifoldHessianObjective` with the scaled variant of the given `mho`, for example the one returned in the first example +* `smoothing_factory( ( (:Identity, 2), (:Huber, 3) ))`: returns a `VectorHessianFunction` with 5 components, the first 2 `:Identity` the last 3 `:Huber` # Currently available smoothing functions -| `Symbol` | ``ρ(s)`` | ``ρ'(s)`` | ``ρ''(s)[X]`` | Comment | +| `Symbol` | ``s(x)`` | ``s'(x)`` | ``s''(x)`` | Comment | |:-------- |:-----:|:------:|:-------:|:------- | -| `:Identity` | ``s`` | ``1`` | ``0`` | No smoothing, the default | - -As well as a scaled variant for any ``ρ`` as ``ρ_α(s) = α ρ$(_tex(:bigl))($(_tex(:frac, "s", "α^2"))$(_tex(:bigr)))``. -which yields ``ρ_α'(s) = ρ'$(_tex(:bigl))($(_tex(:frac, "s", "α^2"))$(_tex(:bigr)))`` and ``ρ_α''(s)[X] = $(_tex(:bigl))($(_tex(:frac, "1", "α^2"))$(_tex(:bigr)))ρ''$(_tex(:bigl))($(_tex(:frac, "s", "α^2"))$(_tex(:bigr)))[X]``. +| `:Identity` | ``x`` | ``1`` | ``0`` | No smoothing, the default | +| `:Huber` | ``$(_tex(:cases, "x & $(_tex(:text, " for ")) x ≤ 1", "2$(_tex(:sqrt, "x")) - 1 & $(_tex(:text, " for ")) x > 1"))`` | ``$(_tex(:cases, "1 & $(_tex(:text, " for ")) x ≤ 1", "$(_tex(:frac, "1", _tex(:sqrt, "x"))) & $(_tex(:text, " for ")) x > 1"))`` | ``$(_tex(:cases, "0 & $(_tex(:text, " for ")) x ≤ 1", "-$(_tex(:frac, "1", "x^{3/2}")) & $(_tex(:text, " for ")) x > 1"))`` | | +| `:Tukey` | ``$(_tex(:cases, "$(_tex(:frac, "1", "3")) (1-(1-x)^3) & $(_tex(:text, " for ")) x ≤ 1", "$(_tex(:frac, "1", "3")) & $(_tex(:text, " for ")) x > 1"))`` | ``$(_tex(:cases, "(1-s)^2 & $(_tex(:text, " for ")) x ≤ 1", "0 & $(_tex(:text, " for ")) x > 1"))`` | ``$(_tex(:cases, "s-2 & $(_tex(:text, " for ")) x ≤ 1", "0 & $(_tex(:text, " for ")) x > 1"))`` | | -Scaling is activated by calling `smoothing_factory((symbol, α))` or `smoothing_factory((o, α))`. +Note that in the implementation the second derivative follows the general scheme of hessians +and actually implements s''(x)[X] = s''(x)X``. """ function smoothing_factory(s) end smoothing_factory() = smoothing_factory(:Identity) -smoothing_factiory(s::ManifoldHessianObjective) = s -smoothing_factory(s::Symbol) = smoothing_factory(Val(s)) -function smoothing_factory(::Val{:Identity}) - return ManifoldHessianObjective((E, x) -> x, (E, x) -> one(x), (E, x, X) -> zero(X)) +smoothing_factory(o::ManifoldHessianObjective) = o +smoothing_factory(o::VectorHessianFunction) = o +function smoothing_factory(s::Symbol) + return ManifoldHessianObjective(_smoothing_factory(Val(s))...) end function smoothing_factory((s, α)::Tuple{Symbol,<:Real}) - o = smoothing_factory(s) - return smoothing_factory((o, α)) + s, s_p, s_pp = _smoothing_factory(s, α) + return ManifoldHessianObjective(s, s_p, s_pp) end function smoothing_factory((o, α)::Tuple{ManifoldHessianObjective,<:Real}) - return ManifoldHessianObjective( - (E, x) -> α^2 * get_cost(E, o, x / α^2), - (E, x) -> get_gradient(E, o, x / α^2), - (E, x, X) -> get_hessian(E, o, x / α^2, X) / α^2, + s, s_p, s_pp = _smoothing_factory(o, α) + return ManifoldHessianObjective(s, s_p, s_pp) +end +function smoothing_factory((s, k)::Tuple{Symbol,<:Int}) + s, s_p, s_pp = _smoothing_factory(s, k) + return VectorHessianFunction( + s, + s_p, + s_pp, + k; + function_type=ComponentVectorialType(), + jacobian_type=ComponentVectorialType(), + hessian_type=ComponentVectorialType(), + ) +end +function smoothing_factory((o, k)::Tuple{ManifoldHessianObjective,<:Int}) + s, s_p, s_pp = _smoothing_factory(o, k) + return VectorHessianFunction( + s, + s_p, + s_pp, + k; + function_type=ComponentVectorialType(), + jacobian_type=ComponentVectorialType(), + hessian_type=ComponentVectorialType(), + ) +end +function smoothing_factory( + S::NTuple{ + n, + <:Union{ + Symbol, + ManifoldHessianObjective, + Tuple{Symbol,<:Int}, + Tuple{Symbol,<:Real}, + Tuple{ManifoldHessianObjective,<:Int}, + Tuple{ManifoldHessianObjective,<:Real}, + }, + } where {n}, +) + s = Function[] + s_p = Function[] + s_pp = Function[] + # collect all functions including their copies into a large vector + for t in S + _s, _s_p, _s_pp = _smoothing_factory(t...) + push!(s, _s...) + push!(s_p, _s_p...) + push!(s_pp, _s_pp...) + end + k = length(s) + return VectorHessianFunction( + s, + s_p, + s_pp, + k; + function_type=ComponentVectorialType(), + jacobian_type=ComponentVectorialType(), + hessian_type=ComponentVectorialType(), ) end +# Inner functions that split any smoothing function into its ρ, ρ' and ρ'' parts +function _smoothing_factory(o::ManifoldHessianObjective) + return (E, x) -> get_cost(E, o, x), + (E, x) -> get_gradient(E, o, x), + (E, x, X) -> get_hessian(E, o, x, X) +end +function _smoothing_factory(o::ManifoldHessianObjective, α::Real) + return (E, x) -> α^2 * get_cost(E, o, x / α^2), + (E, x) -> get_gradient(E, o, x / α^2), + (E, x, X) -> get_hessian(E, o, x / α^2, X) / α^2 +end +function _smoothing_factory(s::Symbol, α::Real) + s, s_p, s_pp = _smoothing_factory(Val(s)) + return (E, x) -> α^2 * s(E, x / α^2), + (E, x) -> s_p(E, x / α^2), + (E, x, X) -> s_pp(E, x / α^2, X) / α^2 +end +function _smoothing_factory(o::ManifoldHessianObjective, k::Int) + return fill((E, x) -> get_cost(E, o, x), k), + fill((E, x) -> get_gradient(E, o, x), k), + fill((E, x, X) -> get_hessian(E, o, x, X), k) +end +function _smoothing_factory(s::Symbol, k::Int) + s, s_p, s_pp = _smoothing_factory(Val(s)) + return fill(s, k), fill(s_p, k), fill(s_pp, k) +end +# Library +function _smoothing_factory(::Val{:Identity}) + return (E, x) -> x, (E, x) -> one(x), (E, x, X) -> zero(X) +end +function _smoothing_factory(::Val{:Huber}) + return (E, x) -> x <= 1 ? x : 2 * sqrt(x) - 1, + (E, x) -> x <= 1 ? 1 : 1 / sqrt(x), + (E, x, X) -> (x <= 1 ? 0 : -1 / (2x^(3 / 2))) * X +end +function _smoothing_factory(::Val{:Tukey}) + return (E, x) -> x <= 1 ? 1 / 3 * (1 - (1 - x)^3) : 1 / 3, + (E, x) -> x <= 1 ? (1 - s)^2 : 0, + (E, x, X) -> (x <= 1 ? x - 2 : 0) * X +end +# TODO: Vectorial cases: (symbol, int) function show(io::IO, lms::LevenbergMarquardtState) i = get_count(lms, :Iterations) Iter = (i > 0) ? "After $i iterations\n" : "" From 346d7f502297a2b903d44fa888d2c47dda6f850c Mon Sep 17 00:00:00 2001 From: Ronny Bergmann Date: Sat, 28 Dec 2024 14:57:49 +0100 Subject: [PATCH 13/34] Rework the remaining code to provide vgf fully and have the chain rule with smoothing implemented. --- src/plans/nonlinear_least_squares_plan.jl | 261 ++++++++++++++++------ src/plans/vectorial_plan.jl | 89 +++++--- src/solvers/LevenbergMarquardt.jl | 55 ++--- 3 files changed, 258 insertions(+), 147 deletions(-) diff --git a/src/plans/nonlinear_least_squares_plan.jl b/src/plans/nonlinear_least_squares_plan.jl index ea074c6d50..39dc35834b 100644 --- a/src/plans/nonlinear_least_squares_plan.jl +++ b/src/plans/nonlinear_least_squares_plan.jl @@ -11,7 +11,7 @@ Specify a nonlinear least squares problem # Fields * `objective`: a [`AbstractVectorGradientFunction`](@ref)`{E}` containing both the vector of cost functions ``f_i`` as well as their gradients ``$(_tex(:grad)) f_i``` -* `smoothing`: a [`ManifoldHessianObjective`](@ref) of a smoothing function ``ρ: ℝ → ℝ``, hence including its first and second derivatives ``ρ'`` and ``ρ''``. +* `smoothing`: a [`ManifoldHessianObjective`](@ref) or a [`Vector of a smoothing function ``ρ: ℝ → ℝ``, hence including its first and second derivatives ``ρ'`` and ``ρ''``. This `NonlinearLeastSquaresObjective` then has the same [`AbstractEvaluationType`](@ref) `T` as the (inner) `objective. @@ -36,99 +36,219 @@ struct NonlinearLeastSquaresObjective{ smoothing::R end -# TODO: Write the ease-of-use constructor. +function NonlinearLeastSquaresObjective( + vgf::F; smoothing=:Identity +) where {F<:AbstractVectorGradientFunction} + s = smoothing_factory(smoothing) + return NonlinearLeastSquaresObjective(vgf, s) +end # Cost -# (a) with ρ +# (a) with for a single smoothing function function get_cost( M::AbstractManifold, nlso::NonlinearLeastSquaresObjective{ - E, - AbstractVectorFunction{E,<:FunctionVectorialType}, - <:AbstractManifoldHessianObjective, + E,AbstractVectorFunction{E,<:ComponentVectorialType},H }, p; vector_space=Rn, + kwargs..., +) where {E<:AbstractEvaluationType,H<:AbstractManifoldHessianObjective} + v = 0.0 + for i in 1:length(nlso.objective) + v += get_cost(vector_space(1), nlso.smoothing, get_value(nlso.objective, p, i)^2) + end + return v +end +function get_cost( + M::AbstractManifold, + nlso::NonlinearLeastSquaresObjective{ + E,AbstractVectorFunction{E,<:FunctionVectorialType},H + }, + p; + vector_space=Rn, + value_cache=get_value(M, nlso.objective, p), +) where {E<:AbstractEvaluationType,H<:AbstractManifoldHessianObjective} + return sum( + get_cost(vector_space(1), nlso.smoothing, value_cache[i]) for + i in 1:length(value_cache) + ) +end +# (b) vectorial ρ +function get_cost( + M::AbstractManifold, + nlso::NonlinearLeastSquaresObjective{ + E,AbstractVectorFunction{E,<:ComponentVectorialType},<:AbstractVectorFunction + }, + p; + vector_space=Rn, + kwargs..., ) where {E<:AbstractEvaluationType} - return get_cost(vector_space(1), nlso.smoothing, get_value(nlso.objective, p)^2) + v = 0.0 + for i in 1:length(nlso.objective) + v += get_cost(vector_space(1), nlso.smoothing, get_value(nlso.objective, p, i)^2, i) + end + return v end -# (b) without ρ function get_cost( - M::AbstractManifold, nlso::NonlinearLeastSquaresObjective{InplaceEvaluation}, p -) - residual_values = zeros(nlso.num_components) - nlso.f(M, residual_values, p) - return 1//2 * norm(residual_values)^2 + M::AbstractManifold, + nlso::NonlinearLeastSquaresObjective{ + E,AbstractVectorFunction{E,<:FunctionVectorialType},<:AbstractVectorFunction + }, + p; + vector_space=Rn, + value_cache=get_value(M, nlso.objective, p), +) where {E<:AbstractEvaluationType} + return sum( + get_cost(vector_space(1), nlso.smoothing, value_cache[i], i) for + i in 1:length(value_cache) + ) end -function get_jacobian!( - dmp::DefaultManoptProblem{mT,<:NonlinearLeastSquaresObjective{AllocatingEvaluation}}, - J, - p, - basis_domain::AbstractBasis, +function get_jacobian( + dmp::DefaultManoptProblem{mT,<:NonlinearLeastSquaresObjective}, p; kwargs... ) where {mT} nlso = get_objective(dmp) - return copyto!(J, nlso.jacobian!!(get_manifold(dmp), p; basis_domain=basis_domain)) + M = get_manifold(dmp) + J = zeros(length(nlso.objective), manifold_dimension(M)) + get_jacobian!(M, J, nlso, p; kwargs...) + return J end function get_jacobian!( - dmp::DefaultManoptProblem{mT,<:NonlinearLeastSquaresObjective{InplaceEvaluation}}, - J, - p, - basis_domain::AbstractBasis, + dmp::DefaultManoptProblem{mT,<:NonlinearLeastSquaresObjective}, J, p; kwargs... ) where {mT} nlso = get_objective(dmp) - return nlso.jacobian!!(get_manifold(dmp), J, p; basis_domain=basis_domain) + M = get_manifold(dmp) + get_jacobian!(M, J, nlso, p; kwargs...) + return J end -# TODO: Replace once we have the Jacobian implemented -function get_gradient_from_Jacobian!( - M::AbstractManifold, - X, - nlso::NonlinearLeastSquaresObjective{InplaceEvaluation}, - p, - Jval=zeros(nlso.num_components, manifold_dimension(M)), +function get_jacobian( + M::AbstractManifold, nlso::NonlinearLeastSquaresObjective, p; kwargs... ) - basis_p = _maybe_get_basis(M, p, nlso.jacobian_tangent_basis) - nlso.jacobian!!(M, Jval, p; basis_domain=basis_p) - residual_values = zeros(nlso.num_components) - nlso.f(M, residual_values, p) - get_vector!(M, X, p, transpose(Jval) * residual_values, basis_p) - return X + J = zeros(length(nlso.objective), manifold_dimension(M)) + get_jacobian!(M, J, nlso, p; kwargs...) + return J end - -function get_gradient( - M::AbstractManifold, nlso::NonlinearLeastSquaresObjective{AllocatingEvaluation}, p -) - basis_x = _maybe_get_basis(M, p, nlso.jacobian_tangent_basis) - Jval = nlso.jacobian!!(M, p; basis_domain=basis_x) - residual_values = nlso.f(M, p) - return get_vector(M, p, transpose(Jval) * residual_values, basis_x) +# Cases: (a) single smoothing function +function get_jacobian!( + M::AbstractManifold, + J, + nlso::NonlinearLeastSquaresObjective{E,AHVF,<:AbstractManifoldGradientObjective}, + p; + value_cache=get_value(M, nlso.objective, p), + kwargs..., +) where {E,AHVF} + get_jacobian!(M, J, nlso.objective, p; kwargs...) + for i in 1:length(nlso.objective) # s'(f_i(p)) * f_i'(p) + J[i, :] .*= get_gradient(vector_space(1), nlso.smoothing, value_cache[i]) + end + return J +end +# Cases: (b) vectorial smoothing function +function get_jacobian!( + M::AbstractManifold, + J, + nlso::NonlinearLeastSquaresObjective{E,AHVF,<:AbstractVectorGradientFunction}, + p; + basis::AbstractBasis=get_jacobian_basis(nlso.objective), + value_cache=get_value(M, nlso.objective, p), +) where {E,AHVF} + get_jacobian!(M, J, nlso.objective, p; basis=basis) + for i in 1:length(nlso.objective) # s_i'(f_i(p)) * f_i'(p) + J[i, :] .*= get_gradient(vector_space(1), nlso.smoothing, value_cache[i], i) + end + return J end + function get_gradient( - M::AbstractManifold, nlso::NonlinearLeastSquaresObjective{InplaceEvaluation}, p + M::AbstractManifold, nlso::NonlinearLeastSquaresObjective, p; kwargs... ) - basis_x = _maybe_get_basis(M, p, nlso.jacobian_tangent_basis) - Jval = zeros(nlso.num_components, manifold_dimension(M)) - nlso.jacobian!!(M, Jval, p; basis_domain=basis_x) - residual_values = zeros(nlso.num_components) - nlso.f(M, residual_values, p) - return get_vector(M, p, transpose(Jval) * residual_values, basis_x) + X = zero_vector(M, p) + return get_gradient!(M, X, nlso, p; kwargs...) end - function get_gradient!( - M::AbstractManifold, X, nlso::NonlinearLeastSquaresObjective{AllocatingEvaluation}, p + M::AbstractManifold, + X, + nlso::NonlinearLeastSquaresObjective, + p; + basis=get_jacobian_basis(nlso.objective), + jacobian_cache=get_jacobian(M, nlso, p; basis=basis), + value_cache=get_residuals(M, nlso, p), ) - basis_x = _maybe_get_basis(M, p, nlso.jacobian_tangent_basis) - Jval = nlso.jacobian!!(M, p; basis_domain=basis_x) - residual_values = nlso.f(M, p) - return get_vector!(M, X, p, transpose(Jval) * residual_values, basis_x) + return get_vector!(M, X, p, transpose(jacobian_cache) * value_cache, basis) end -function get_gradient!( - M::AbstractManifold, X, nlso::NonlinearLeastSquaresObjective{InplaceEvaluation}, p +# Residuals +# (a) with for a single smoothing function +function get_residuals( + M::AbstractManifold, nlso::NonlinearLeastSquaresObjective, p; kwargs... ) - get_gradient_from_Jacobian!(M, X, nlso, p) - return X + V = zeros(length(nlso.objective)) + return get_residuals!(M, V, nlso, p; kwargs...) +end +function get_residuals!( + M::AbstractManifold, + V, + nlso::NonlinearLeastSquaresObjective{ + E,AbstractVectorFunction{E,<:ComponentVectorialType},H + }, + p; + vector_space=Rn, + kwargs..., +) where {E<:AbstractEvaluationType,H<:AbstractManifoldHessianObjective} + for i in 1:length(nlso.objective) + V[i] = get_cost(vector_space(1), nlso.smoothing, get_value(nlso.objective, p, i)^2) + end + return V +end +function get_residuals!( + M::AbstractManifold, + V, + nlso::NonlinearLeastSquaresObjective{ + E,AbstractVectorFunction{E,<:FunctionVectorialType},H + }, + p; + vector_space=Rn, + value_cache=get_value(M, nlso.objective, p), +) where {E<:AbstractEvaluationType,H<:AbstractManifoldHessianObjective} + for i in 1:length(value_cache) + V[i] = get_cost(vector_space(1), nlso.smoothing, value_cache[i]) + end + return V +end +# (b) vectorial ρ +function get_residuals!( + M::AbstractManifold, + V, + nlso::NonlinearLeastSquaresObjective{ + E,AbstractVectorFunction{E,<:ComponentVectorialType},<:AbstractVectorFunction + }, + p; + vector_space=Rn, + kwargs..., +) where {E<:AbstractEvaluationType} + for i in 1:length(nlso.objective) + V[i] = get_cost( + vector_space(1), nlso.smoothing, get_value(nlso.objective, p, i)^2, i + ) + end + return V +end +function get_residuals!( + M::AbstractManifold, + V, + nlso::NonlinearLeastSquaresObjective{ + E,AbstractVectorFunction{E,<:FunctionVectorialType},<:AbstractVectorFunction + }, + p; + vector_space=Rn, + value_cache=get_value(M, nlso.objective, p), +) where {E<:AbstractEvaluationType} + for i in 1:length(value_cache) + V[i] = get_cost(vector_space(1), nlso.smoothing, value_cache[i], i) + end + return V end @doc """ @@ -145,7 +265,7 @@ $(_var(:Field, :retraction_method)) * `residual_values`: value of ``F`` calculated in the solver setup or the previous iteration * `residual_values_temp`: value of ``F`` for the current proposal point $(_var(:Field, :stopping_criterion, "stop")) -* `jacF`: the current Jacobian of ``F`` +* `jacobian`: the current Jacobian of ``F`` * `gradient`: the current gradient of ``F`` * `step_vector`: the tangent vector at `x` that is used to move to the next point * `last_stepsize`: length of `step_vector` @@ -160,7 +280,7 @@ $(_var(:Field, :stopping_criterion, "stop")) # Constructor - LevenbergMarquardtState(M, initial_residual_values, initial_jacF; kwargs...) + LevenbergMarquardtState(M, initial_residual_values, initial_jacobian; kwargs...) Generate the Levenberg-Marquardt solver state. @@ -194,7 +314,7 @@ mutable struct LevenbergMarquardtState{ retraction_method::TRTM residual_values::Tresidual_values candidate_residual_values::Tresidual_values - jacF::TJac + jacobian::TJac X::TGrad step_vector::TGrad last_stepsize::Tparams @@ -207,7 +327,7 @@ mutable struct LevenbergMarquardtState{ function LevenbergMarquardtState( M::AbstractManifold, initial_residual_values::Tresidual_values, - initial_jacF::TJac; + initial_jacobian::TJac; p::P=rand(M), X::TGrad=zero_vector(M, p), stopping_criterion::StoppingCriterion=StopAfterIteration(200) | @@ -247,7 +367,7 @@ mutable struct LevenbergMarquardtState{ retraction_method, initial_residual_values, copy(initial_residual_values), - initial_jacF, + initial_jacobian, X, allocate(M, X), zero(Tparams), @@ -277,6 +397,7 @@ For a tuple `(s, α)`, the smoothing function is scaled by `α` as ``s_α(x) = which yields ``s_α'(x) = s'$(_tex(:bigl))($(_tex(:frac, "x", "α^2"))$(_tex(:bigr)))`` and ``s_α''(x)[X] = $(_tex(:bigl))($(_tex(:frac, "1", "α^2"))$(_tex(:bigr)))s''$(_tex(:bigl))($(_tex(:frac, "x", "α^2"))$(_tex(:bigr)))[X]``. For a tuple `(s, k)`, a [`VectorHessianFunction`](@ref) is returned, where every component is the smooting function indicated by `s` +If the argument already is a [`VectorHessianFunction`](@ref), it is returned unchanged. Finally for a tuple containing the above four cases, a [`VectorHessianFunction`](@ref) is returned, containing all smoothing functions with their repetitions mentioned @@ -284,9 +405,9 @@ containing all smoothing functions with their repetitions mentioned # Examples * `smoothing_factory(:Identity)`: returns the identity function as a single smoothing function -* `smoothing_factory(:Identity, 2)`: returns a `VectorHessianFunction` with two identity functions -* `smoothing_factory(mho, 0.5)`: returns a `ManifoldHessianObjective` with the scaled variant of the given `mho`, for example the one returned in the first example -* `smoothing_factory( ( (:Identity, 2), (:Huber, 3) ))`: returns a `VectorHessianFunction` with 5 components, the first 2 `:Identity` the last 3 `:Huber` +* `smoothing_factory(:Identity, 2)`: returns a [`VectorHessianFunction`](@ref) with two identity functions +* `smoothing_factory(mho, 0.5)`: returns a [`ManifoldHessianObjective`](@ref) with the scaled variant of the given `mho`, for example the one returned in the first example +* `smoothing_factory( ( (:Identity, 2), (:Huber, 3) ))`: returns a [`VectorHessianFunction`](@ref) with 5 components, the first 2 `:Identity` the last 3 `:Huber` # Currently available smoothing functions diff --git a/src/plans/vectorial_plan.jl b/src/plans/vectorial_plan.jl index 298e9dcc81..f9b2c03b6d 100644 --- a/src/plans/vectorial_plan.jl +++ b/src/plans/vectorial_plan.jl @@ -50,7 +50,7 @@ for example ``g_i(p) ∈ ℝ^m`` or ``\operatorname{grad} g_i(p) ∈ T_p\mathcal struct ComponentVectorialType <: AbstractVectorialType end @doc raw""" - FunctionVectorialType <: AbstractVectorialType + FunctionVectorialType{P<:AbstractPowerRepresentation} <: AbstractVectorialType A type to indicate that constraints are implemented one whole functions, for example ``g(p) ∈ ℝ^m`` or ``\operatorname{grad} g(p) ∈ (T_p\mathcal M)^m``. @@ -58,9 +58,14 @@ for example ``g(p) ∈ ℝ^m`` or ``\operatorname{grad} g(p) ∈ (T_p\mathcal M) This type internally stores the [`AbstractPowerRepresentation`](@ref), when it makes sense, especially for Hessian and gradient functions. """ -struct FunctionVectorialType <: AbstractVectorialType end +struct FunctionVectorialType{P<:AbstractPowerRepresentation} <: AbstractVectorialType + range::P +end + +get_range(vt::FunctionVectorialType) = vt.range +get_range(::AbstractVectorialType) = NestedPowerRepresentation() -# TODO store range type in this type instead of having a keyword argument in so many function. +FunctionVectorialType() = FunctionVectorialType(NestedPowerRepresentation()) @doc raw""" AbstractVectorFunction{E, FT} <: Function @@ -306,7 +311,7 @@ function get_hessian( p, X, i::Integer, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vhf.hessian_type), ) Y = zero_vector(M, p) return get_hessian!(M, Y, vhf, p, X, i, range) @@ -318,7 +323,7 @@ function get_hessian( p, X, i=:, # as long as the length can be found it should work, see _vgf_index_to_length - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vhf.hessian_type), ) n = _vgf_index_to_length(i, vhf.range_dimension) pM = PowerManifold(M, range, n) @@ -349,7 +354,7 @@ function get_hessian!( p, X, i, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vhf.hessian_type), ) where {FT,JT} n = _vgf_index_to_length(i, vhf.range_dimension) pM = PowerManifold(M, range, n) @@ -368,7 +373,7 @@ function get_hessian!( p, X, i::Colon, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vhf.hessian_type), ) where {FT,JT} n = _vgf_index_to_length(i, vgf.range_dimension) pM = PowerManifold(M, range, n) @@ -386,7 +391,7 @@ function get_hessian!( p, X, i, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vhf.hessian_type), ) where {FT,JT} n = _vgf_index_to_length(i, vhf.range_dimension) mP = PowerManifold(M, range, n) @@ -400,7 +405,7 @@ function get_hessian!( p, X, i::Integer, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vhf.hessian_type), ) where {FT,JT} mP = PowerManifold(M, range, vhf.range_dimension) copyto!(M, Y, p, vhf.hessians!!(M, p, X)[mP, i]) @@ -428,7 +433,7 @@ function get_hessian!( p, X, i, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vhf.hessian_type), ) where {FT,JT} n = _vgf_index_to_length(i, vhf.range_dimension) pM = PowerManifold(M, range, n) @@ -448,7 +453,7 @@ function get_hessian!( p, X, i::Integer, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vhf.hessian_type), ) where {FT,JT} pM = PowerManifold(M, range, vhf.range_dimension...) P = fill(p, pM) @@ -464,7 +469,7 @@ function get_hessian!( p, X, i, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vhf.hessian_type), ) where {FT,JT} #Single access for function is a bit expensive n = _vgf_index_to_length(i, vhf.range_dimension) @@ -547,7 +552,7 @@ function get_jacobian( vgf::AbstractVectorGradientFunction{<:AllocatingEvaluation,FT,<:FunctionVectorialType}, p; basis::B=DefaultOrthonormalBasis(), - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vhf.jacobian_type), ) where {FT,B<:AbstractBasis} n = vgf.range_dimension d = manifold_dimension(M, p) @@ -603,7 +608,7 @@ function get_jacobian( vgf::AbstractVectorGradientFunction{<:InplaceEvaluation,FT,<:FunctionVectorialType}, p; basis::B=DefaultOrthonormalBasis(), - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vgf.jacobian_type), ) where {FT,B<:AbstractBasis} n = vgf.range_dimension d = manifold_dimension(M, p) @@ -665,20 +670,19 @@ get_jacobian!(M::AbstractManifold, JF, vgf::AbstractVectorGradientFunction, p) # (a) We have a single gradient function function get_jacobian!( M::AbstractManifold, - JF, + J, vgf::AbstractVectorGradientFunction{<:AllocatingEvaluation,FT,<:FunctionVectorialType}, p; - basis::B=DefaultOrthonormalBasis(), - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), + basis::B=get_jacobian_basis(vgf), + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vgf.jacobian_type), ) where {FT,B<:AbstractBasis} n = vgf.range_dimension - d = manifold_dimension(M, p) gradients = vgf.jacobian!!(M, p) mP = PowerManifold(M, range, vgf.range_dimension) for i in 1:n - JF[i, :] .= get_coordinates(M, p, gradients[mP, i], basis) + J[i, :] .= get_coordinates(M, p, gradients[mP, i], basis) end - return JF + return J end # (b) We have a vector of gradient functions function get_jacobian!( @@ -755,7 +759,7 @@ function get_jacobian!( return JF end -get_jacobian_basis(vgf::AbstractVectorGradientFunction) = DefaultBasis() +get_jacobian_basis(vgf::AbstractVectorGradientFunction) = DefaultOrthonormalBasis() function get_jacobian_basis( vgf::AbstractVectorGradientFunction{F,G,<:CoordinateVectorialType} ) where {F,G} @@ -795,7 +799,7 @@ function get_gradient( vgf::AbstractVectorGradientFunction, p, i::Integer, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vgf.jacobian_type), ) X = zero_vector(M, p) return get_gradient!(M, X, vgf, p, i, range) @@ -806,7 +810,7 @@ function get_gradient( vgf::AbstractVectorGradientFunction, p, i=:, # as long as the length can be found it should work, see _vgf_index_to_length - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vgf.jacobian_type), ) n = _vgf_index_to_length(i, vgf.range_dimension) pM = PowerManifold(M, range, n) @@ -847,7 +851,7 @@ function get_gradient!( }, p, i::Integer, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vgf.jacobian_type), ) where {FT<:AbstractVectorialType} JF = vgf.jacobian!!(M, p) get_vector!(M, X, p, JF[i, :], vgf.jacobian_type.basis) #convert rows to gradients @@ -861,7 +865,7 @@ function get_gradient!( }, p, i=:, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vgf.jacobian_type), ) where {FT<:AbstractVectorialType} n = _vgf_index_to_length(i, vgf.range_dimension) pM = PowerManifold(M, range, n) @@ -889,7 +893,7 @@ function get_gradient!( vgf::AbstractVectorGradientFunction{<:AllocatingEvaluation,FT,<:ComponentVectorialType}, p, i, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vgf.jacobian_type), ) where {FT} n = _vgf_index_to_length(i, vgf.range_dimension) pM = PowerManifold(M, range, n) @@ -907,7 +911,7 @@ function get_gradient!( vgf::AbstractVectorGradientFunction{<:AllocatingEvaluation,FT,<:ComponentVectorialType}, p, i::Colon, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vgf.jacobian_type), ) where {FT<:AbstractVectorialType} n = _vgf_index_to_length(i, vgf.range_dimension) pM = PowerManifold(M, range, n) @@ -924,7 +928,7 @@ function get_gradient!( vgf::AbstractVectorGradientFunction{<:AllocatingEvaluation,FT,<:FunctionVectorialType}, p, i, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vgf.jacobian_type), ) where {FT<:AbstractVectorialType} n = _vgf_index_to_length(i, vgf.range_dimension) mP = PowerManifold(M, range, n) @@ -937,7 +941,7 @@ function get_gradient!( vgf::AbstractVectorGradientFunction{<:AllocatingEvaluation,FT,<:FunctionVectorialType}, p, i::Integer, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vgf.jacobian_type), ) where {FT<:AbstractVectorialType} mP = PowerManifold(M, range, vgf.range_dimension) copyto!(M, X, p, vgf.jacobian!!(M, p)[mP, i]) @@ -953,7 +957,7 @@ function get_gradient!( vgf::AbstractVectorGradientFunction{<:InplaceEvaluation,FT,<:CoordinateVectorialType}, p, i::Integer, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vgf.jacobian_type), ) where {FT<:AbstractVectorialType} # a type wise safe way to allocate what usually should yield a n-times-d matrix pM = PowerManifold(M, range, vgf.range_dimension...) @@ -974,7 +978,7 @@ function get_gradient!( vgf::AbstractVectorGradientFunction{<:InplaceEvaluation,FT,<:CoordinateVectorialType}, p, i, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vgf.jacobian_type), ) where {FT<:AbstractVectorialType} # a type wise safe way to allocate what usually should yield a n-times-d matrix pM = PowerManifold(M, range, vgf.range_dimension...) @@ -999,7 +1003,7 @@ function get_gradient!( vgf::AbstractVectorGradientFunction{<:InplaceEvaluation,FT,<:ComponentVectorialType}, p, i::Integer, - range::Union{AbstractPowerRepresentation,Nothing}=nothing, + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vgf.jacobian_type), ) where {FT<:AbstractVectorialType} return vgf.jacobian!![i](M, X, p) end @@ -1009,7 +1013,7 @@ function get_gradient!( vgf::AbstractVectorGradientFunction{<:InplaceEvaluation,FT,<:ComponentVectorialType}, p, i, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vgf.jacobian_type), ) where {FT<:AbstractVectorialType} n = _vgf_index_to_length(i, vgf.range_dimension) pM = PowerManifold(M, range, n) @@ -1028,7 +1032,7 @@ function get_gradient!( vgf::AbstractVectorGradientFunction{<:InplaceEvaluation,FT,<:FunctionVectorialType}, p, i::Integer, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vgf.jacobian_type), ) where {FT<:AbstractVectorialType} pM = PowerManifold(M, range, vgf.range_dimension...) P = fill(p, pM) @@ -1043,7 +1047,7 @@ function get_gradient!( vgf::AbstractVectorGradientFunction{<:InplaceEvaluation,FT,<:FunctionVectorialType}, p, i, - range::Union{AbstractPowerRepresentation,Nothing}=NestedPowerRepresentation(), + range::Union{AbstractPowerRepresentation,Nothing}=get_range(vgf.jacobian_type), ) where {FT<:AbstractVectorialType} #Single access for function is a bit expensive n = _vgf_index_to_length(i, vgf.range_dimension) @@ -1064,6 +1068,7 @@ get_gradient_function(vgf::VectorGradientFunction, recursive=false) = vgf.jacobi # ---- Value @doc raw""" get_value(M::AbstractManifold, vgf::AbstractVectorFunction, p[, i=:]) + get_value!(M::AbstractManifold, V, vgf::AbstractVectorFunction, p[, i=:]) Evaluate the vector function [`VectorGradientFunction`](@ref) `vgf` at `p`. The `range` can be used to specify a potential range, but is currently only present for consistency. @@ -1075,6 +1080,8 @@ The `i` can be a linear index, you can provide * a `BitVector` specifying a selection * a `AbstractVector{<:Integer}` to specify indices * `:` to return the vector of all gradients, which is also the default + +This function can perform the evalutation inplace of `V`. """ get_value(M::AbstractManifold, vgf::AbstractVectorFunction, p, i) function get_value( @@ -1100,7 +1107,17 @@ function get_value( ) where {E<:AbstractEvaluationType} return [f(M, p) for f in vgf.value!![i]] end - +function get_value!( + M::AbstractManifold, V, vgf::AbstractVectorFunction{E,<:FunctionVectorialType}, p, i=: +) where {E<:AbstractEvaluationType} + c = vgf.value!!(M, p) + if isa(c, Number) + V .= c + else + V .= c[i] + end + return V +end @doc raw""" get_value_function(vgf::VectorGradientFunction, recursive=false) diff --git a/src/solvers/LevenbergMarquardt.jl b/src/solvers/LevenbergMarquardt.jl index 830af00d3a..f5a83bd6a8 100644 --- a/src/solvers/LevenbergMarquardt.jl +++ b/src/solvers/LevenbergMarquardt.jl @@ -112,11 +112,10 @@ function LevenbergMarquardt( vgf::VectorGradientFunction, p; evaluation::AbstractEvaluationType=AllocatingEvaluation(), - smoothing=:Idenity, + smoothing=:Identity, kwargs..., ) - _smoothing = smoothing_factory(smoothing) - nlso = NonlinearLeastSquaresObjective(vgf, _smoothing; evaluation=evaluation) + nlso = NonlinearLeastSquaresObjective(vgf; smoothing=smoothing) return LevenbergMarquardt(M, nlso, p; evaluation=evaluation, kwargs...) end function LevenbergMarquardt( @@ -171,9 +170,9 @@ function LevenbergMarquardt!( β::Real=5.0, η::Real=0.2, damping_term_min::Real=0.1, - initial_residual_values=similar(p, get_objective(nlso).num_components), + initial_residual_values=similar(p, length(get_objective(nlso).objective)), initial_jacobian_f=similar( - p, get_objective(nlso).num_components, manifold_dimension(M) + p, length(get_objective(nlso).objective), manifold_dimension(M) ), kwargs..., #collect rest ) where {O<:Union{NonlinearLeastSquaresObjective,AbstractDecoratedManifoldObjective}} @@ -204,7 +203,7 @@ function initialize_solver!( lms::LevenbergMarquardtState, ) where {mT<:AbstractManifold} M = get_manifold(dmp) - lms.residual_values = get_objective(dmp).f(M, lms.p) + lms.residual_values = get_value(M, get_objective(dmp).objective, lms.p) lms.X = get_gradient(dmp, lms.p) return lms end @@ -213,37 +212,13 @@ function initialize_solver!( lms::LevenbergMarquardtState, ) where {mT<:AbstractManifold} M = get_manifold(dmp) - get_objective(dmp).f(M, lms.residual_values, lms.p) - # TODO: Replace with a call to the Jacobian - get_gradient_from_Jacobian!(M, lms.X, get_objective(dmp), lms.p, lms.jacF) + nlso = get_objective(dmp) + get_residuals!(M, lms.residual_values, nlso, lms.p) + get_jacobian!(M, lms.jacobian, nlso, lms.p) + get_gradient!(M, lms.X, nlso, lms.p; jacobian_cache=lms.jacobian) return lms end -function _maybe_get_basis(M::AbstractManifold, p, B::AbstractBasis) - if requires_caching(B) - return get_basis(M, p, B) - else - return B - end -end - -# TODO: Adapt to vectorial function call instead of .f - maybe even skip or rename? -# TODO: It will just ne “get_value” of the vgf - adapted by smoothing?. -function get_residuals!( - dmp::DefaultManoptProblem{mT,<:NonlinearLeastSquaresObjective{AllocatingEvaluation}}, - residuals, - p, -) where {mT} - return copyto!(residuals, get_objective(dmp).f(get_manifold(dmp), p)) -end -function get_residuals!( - dmp::DefaultManoptProblem{mT,<:NonlinearLeastSquaresObjective{InplaceEvaluation}}, - residuals, - p, -) where {mT} - return get_objective(dmp).f(get_manifold(dmp), residuals, p) -end - function step_solver!( dmp::DefaultManoptProblem{mT,<:NonlinearLeastSquaresObjective}, lms::LevenbergMarquardtState, @@ -252,18 +227,16 @@ function step_solver!( # `o.residual_values` is either initialized by `initialize_solver!` or taken from the previous iteration M = get_manifold(dmp) nlso = get_objective(dmp) - # TODO: Replace with obtaining a basis from the vectorial function - basis_ox = _maybe_get_basis(M, lms.p, nlso.jacobian_tangent_basis) # a new Jacobian is only needed if the last step was successful if lms.last_step_successful - get_jacobian!(dmp, lms.jacF, lms.p) + get_jacobian!(dmp, lms.jacobian, lms.p) end λk = lms.damping_term * norm(lms.residual_values)^2 - JJ = transpose(lms.jacF) * lms.jacF + λk * I + JJ = transpose(lms.jacobian) * lms.jacobian + λk * I # `cholesky` is technically not necessary but it's the fastest method to solve the # problem because JJ is symmetric positive definite - grad_f_c = transpose(lms.jacF) * lms.residual_values + grad_f_c = transpose(lms.jacobian) * lms.residual_values sk = cholesky(JJ) \ -grad_f_c get_vector!(M, lms.X, lms.p, grad_f_c, basis_ox) @@ -272,11 +245,11 @@ function step_solver!( temp_x = retract(M, lms.p, lms.step_vector, lms.retraction_method) normFk2 = norm(lms.residual_values)^2 - get_residuals!(dmp, lms.candidate_residual_values, temp_x) + get_value!(M, lms.candidate_residual_values, nlso.objective, temp_x) ρk = (normFk2 - norm(lms.candidate_residual_values)^2) / ( - -2 * inner(M, lms.p, lms.X, lms.step_vector) - norm(lms.jacF * sk)^2 - + -2 * inner(M, lms.p, lms.X, lms.step_vector) - norm(lms.jacobian * sk)^2 - λk * norm(sk)^2 ) if ρk >= lms.η From 59b41a3b2a2ad4b6ee1e88ca1cb16310f4e528e4 Mon Sep 17 00:00:00 2001 From: Ronny Bergmann Date: Sat, 28 Dec 2024 15:09:07 +0100 Subject: [PATCH 14/34] Simplify code a bit. --- src/solvers/LevenbergMarquardt.jl | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/src/solvers/LevenbergMarquardt.jl b/src/solvers/LevenbergMarquardt.jl index f5a83bd6a8..b4765dcdd0 100644 --- a/src/solvers/LevenbergMarquardt.jl +++ b/src/solvers/LevenbergMarquardt.jl @@ -199,16 +199,7 @@ end # Solver functions # function initialize_solver!( - dmp::DefaultManoptProblem{mT,<:NonlinearLeastSquaresObjective{AllocatingEvaluation}}, - lms::LevenbergMarquardtState, -) where {mT<:AbstractManifold} - M = get_manifold(dmp) - lms.residual_values = get_value(M, get_objective(dmp).objective, lms.p) - lms.X = get_gradient(dmp, lms.p) - return lms -end -function initialize_solver!( - dmp::DefaultManoptProblem{mT,<:NonlinearLeastSquaresObjective{InplaceEvaluation}}, + dmp::DefaultManoptProblem{mT,<:NonlinearLeastSquaresObjective}, lms::LevenbergMarquardtState, ) where {mT<:AbstractManifold} M = get_manifold(dmp) @@ -229,10 +220,10 @@ function step_solver!( nlso = get_objective(dmp) # a new Jacobian is only needed if the last step was successful if lms.last_step_successful - get_jacobian!(dmp, lms.jacobian, lms.p) + get_jacobian!(M, lms.jacobian, nlso, lms.p) end λk = lms.damping_term * norm(lms.residual_values)^2 - + basis_ox = get_jacobian_basis(nlso.objective) JJ = transpose(lms.jacobian) * lms.jacobian + λk * I # `cholesky` is technically not necessary but it's the fastest method to solve the # problem because JJ is symmetric positive definite From ede72a3af94f6a3705afea0864b6bb13e4fbe6a1 Mon Sep 17 00:00:00 2001 From: Ronny Bergmann Date: Sat, 28 Dec 2024 15:15:14 +0100 Subject: [PATCH 15/34] debug dispatch. --- src/plans/vectorial_plan.jl | 63 ++++++++++++++++++------------------- 1 file changed, 30 insertions(+), 33 deletions(-) diff --git a/src/plans/vectorial_plan.jl b/src/plans/vectorial_plan.jl index f9b2c03b6d..24c7e13fb0 100644 --- a/src/plans/vectorial_plan.jl +++ b/src/plans/vectorial_plan.jl @@ -671,11 +671,15 @@ get_jacobian!(M::AbstractManifold, JF, vgf::AbstractVectorGradientFunction, p) function get_jacobian!( M::AbstractManifold, J, - vgf::AbstractVectorGradientFunction{<:AllocatingEvaluation,FT,<:FunctionVectorialType}, + vgf::VGF, p; basis::B=get_jacobian_basis(vgf), range::Union{AbstractPowerRepresentation,Nothing}=get_range(vgf.jacobian_type), -) where {FT,B<:AbstractBasis} +) where { + FT, + VGF<:AbstractVectorGradientFunction{<:AllocatingEvaluation,FT,<:FunctionVectorialType}, + B<:AbstractBasis, +} n = vgf.range_dimension gradients = vgf.jacobian!!(M, p) mP = PowerManifold(M, range, vgf.range_dimension) @@ -686,12 +690,11 @@ function get_jacobian!( end # (b) We have a vector of gradient functions function get_jacobian!( - M::AbstractManifold, - JF, - vgf::AbstractVectorGradientFunction{<:AllocatingEvaluation,FT,<:ComponentVectorialType}, - p; - basis=get_jacobian_basis(vgf), -) where {FT} + M::AbstractManifold, JF, vgf::VGF, p; basis=get_jacobian_basis(vgf) +) where { + FT, + VGF<:AbstractVectorGradientFunction{<:AllocatingEvaluation,FT,<:ComponentVectorialType}, +} for i in 1:(vgf.range_dimension) JF[i, :] .= get_coordinates(M, p, vgf.jacobian!![i](M, p), basis) end @@ -699,14 +702,13 @@ function get_jacobian!( end # (c) We have a Jacobian function function get_jacobian!( - M::AbstractManifold, - JF, - vgf::AbstractVectorGradientFunction{ + M::AbstractManifold, JF, vgf::VGF, p; basis=get_jacobian_basis(vgf) +) where { + FT, + VGF<:AbstractVectorGradientFunction{ <:AllocatingEvaluation,FT,<:CoordinateVectorialType }, - p; - basis=get_jacobian_basis(vgf), -) where {FT} +} JF .= vgf.jacobian!!(M, p) _change_basis!(JF, vgf.jacobian_type.basis, basis) return JF @@ -715,12 +717,12 @@ end # Part II: mutating vgf – allocating jacobian # (a) We have a single gradient function function get_jacobian!( - M::AbstractManifold, - JF, - vgf::AbstractVectorGradientFunction{<:InplaceEvaluation,FT,<:FunctionVectorialType}, - p; - basis::B=DefaultOrthonormalBasis(), -) where {FT,B<:AbstractBasis} + M::AbstractManifold, JF, vgf::VGF, p; basis::B=DefaultOrthonormalBasis() +) where { + FT, + VGF<:AbstractVectorGradientFunction{<:InplaceEvaluation,FT,<:FunctionVectorialType}, + B<:AbstractBasis, +} gradients = zero_vector(mP, fill(p, pM)) vgf.jacobian!!(M, gradients, p) for i in 1:n @@ -730,13 +732,10 @@ function get_jacobian!( end # (b) We have a vector of gradient functions function get_jacobian!( - M::AbstractManifold, - JF, - vgf::AbstractVectorGradientFunction{<:InplaceEvaluation,FT,<:ComponentVectorialType}, - p; - basis=get_jacobian_basis(vgf), - X=zero_vector(M, p), -) where {FT} + M::AbstractManifold, JF, vgf::VGF, p; basis=get_jacobian_basis(vgf), X=zero_vector(M, p) +) where { + FT,VGF<:AbstractVectorGradientFunction{<:InplaceEvaluation,FT,<:ComponentVectorialType} +} for i in 1:n vgf.jacobian!![i](M, X, p) JF[i, :] .= get_coordinates(M, p, X, basis) @@ -748,12 +747,10 @@ function get_jacobian!( end # (c) We have a Jacobian function function get_jacobian!( - M::AbstractManifold, - JF, - vgf::AbstractVectorGradientFunction{<:InplaceEvaluation,FT,<:CoordinateVectorialType}, - p; - basis=get_jacobian_basis(vgf), -) where {FT} + M::AbstractManifold, JF, vgf::VGF, p; basis=get_jacobian_basis(vgf) +) where { + FT,VGF<:AbstractVectorGradientFunction{<:InplaceEvaluation,FT,<:CoordinateVectorialType} +} vgf.jacobian!!(M, JF, p) _change_basis!(JF, vgf.jacobian_type.basis, basis) return JF From b7bff5e4794a4972ca3d1362fcf00b946e5ce7a8 Mon Sep 17 00:00:00 2001 From: Ronny Bergmann Date: Sun, 29 Dec 2024 16:18:51 +0100 Subject: [PATCH 16/34] Documentation and working on making this internal representation change nonbreaking. --- src/Manopt.jl | 2 +- src/plans/nonlinear_least_squares_plan.jl | 69 +++++++++-- src/plans/vectorial_plan.jl | 140 ++++++++++++++-------- src/solvers/LevenbergMarquardt.jl | 9 +- 4 files changed, 153 insertions(+), 67 deletions(-) diff --git a/src/Manopt.jl b/src/Manopt.jl index 5e073ac9a1..c4cd219012 100644 --- a/src/Manopt.jl +++ b/src/Manopt.jl @@ -11,7 +11,7 @@ module Manopt import Base: &, copy, getindex, identity, length, setindex!, show, | import LinearAlgebra: reflect! import ManifoldsBase: embed!, plot_slope, prepare_check_result, find_best_slope_window -import ManifoldsBase: base_manifold, base_point +import ManifoldsBase: base_manifold, base_point, get_basis using ColorSchemes using ColorTypes using Colors diff --git a/src/plans/nonlinear_least_squares_plan.jl b/src/plans/nonlinear_least_squares_plan.jl index 39dc35834b..a2200e8847 100644 --- a/src/plans/nonlinear_least_squares_plan.jl +++ b/src/plans/nonlinear_least_squares_plan.jl @@ -36,6 +36,28 @@ struct NonlinearLeastSquaresObjective{ smoothing::R end +# TODO document +function NonlinearLeastSquaresObjective( + f, + jacobian, + range_dimension; + evaluation::AbstractEvaluationType=AllocatingEvaluation(), + jacobian_tangent_basis::AbstractBasis=DefaultOrthonormalBasis(), + jacobian_type=CoordinateVectorialType(jacobian_tangent_basis), + function_type=FunctionVectorialType(), + kwargs..., +) + vgf = VectorGradientFunction( + f, + jacobian, + range_dimension; + evaluation=evaluation, + jacobian_type=jacobian_type, + function_type=function_type, + ) + return NonlinearLeastSquaresObjective(vgf; kwargs...) +end + function NonlinearLeastSquaresObjective( vgf::F; smoothing=:Identity ) where {F<:AbstractVectorGradientFunction} @@ -48,7 +70,7 @@ end function get_cost( M::AbstractManifold, nlso::NonlinearLeastSquaresObjective{ - E,AbstractVectorFunction{E,<:ComponentVectorialType},H + E,<:AbstractVectorFunction{E,<:ComponentVectorialType},H }, p; vector_space=Rn, @@ -63,7 +85,7 @@ end function get_cost( M::AbstractManifold, nlso::NonlinearLeastSquaresObjective{ - E,AbstractVectorFunction{E,<:FunctionVectorialType},H + E,<:AbstractVectorFunction{E,<:FunctionVectorialType},H }, p; vector_space=Rn, @@ -78,7 +100,7 @@ end function get_cost( M::AbstractManifold, nlso::NonlinearLeastSquaresObjective{ - E,AbstractVectorFunction{E,<:ComponentVectorialType},<:AbstractVectorFunction + E,<:AbstractVectorFunction{E,<:ComponentVectorialType},<:AbstractVectorFunction }, p; vector_space=Rn, @@ -93,7 +115,7 @@ end function get_cost( M::AbstractManifold, nlso::NonlinearLeastSquaresObjective{ - E,AbstractVectorFunction{E,<:FunctionVectorialType},<:AbstractVectorFunction + E,<:AbstractVectorFunction{E,<:FunctionVectorialType},<:AbstractVectorFunction }, p; vector_space=Rn, @@ -136,6 +158,7 @@ function get_jacobian!( J, nlso::NonlinearLeastSquaresObjective{E,AHVF,<:AbstractManifoldGradientObjective}, p; + vector_space=Rn, value_cache=get_value(M, nlso.objective, p), kwargs..., ) where {E,AHVF} @@ -151,7 +174,7 @@ function get_jacobian!( J, nlso::NonlinearLeastSquaresObjective{E,AHVF,<:AbstractVectorGradientFunction}, p; - basis::AbstractBasis=get_jacobian_basis(nlso.objective), + basis::AbstractBasis=get_basis(nlso.objective.jacobian_type), value_cache=get_value(M, nlso.objective, p), ) where {E,AHVF} get_jacobian!(M, J, nlso.objective, p; basis=basis) @@ -172,14 +195,34 @@ function get_gradient!( X, nlso::NonlinearLeastSquaresObjective, p; - basis=get_jacobian_basis(nlso.objective), + basis=get_basis(nlso.objective.jacobian_type), jacobian_cache=get_jacobian(M, nlso, p; basis=basis), value_cache=get_residuals(M, nlso, p), ) return get_vector!(M, X, p, transpose(jacobian_cache) * value_cache, basis) end -# Residuals +# +# +# --- Residuals +_doc_get_residuals_nlso = """ + get_residuals(M::AbstractManifold, nlso::NonlinearLeastSquaresObjective, p) + get_residuals!(M::AbstractManifold, V, nlso::NonlinearLeastSquaresObjective, p) + +Compute the vector of residuals ``s_i(f_i(p))``, ``i=1,…,n`` given the manifold `M`, +the [`NonlinearLeastSquaresObjective`](@ref) `nlso` and a current point ``p`` on `M`. + +# Keyword arguments + +* `vector_space=`[`Rn`](@ref)`: a vector space to use for evaluating the single + smoothing functions ``s_i`` on. +* `value_cache=`[`get_value`](@ref)`(M, nlso.objective, p)`: a cache to provide the + function evaltuation vector of the ``f_i(p)``, ``i=1,…,n`` in. +""" + +@doc "$(_doc_get_residuals_nlso)" +get_residuals(M::AbstractManifold, nlso::NonlinearLeastSquaresObjective, p; kwargs...) + # (a) with for a single smoothing function function get_residuals( M::AbstractManifold, nlso::NonlinearLeastSquaresObjective, p; kwargs... @@ -187,11 +230,15 @@ function get_residuals( V = zeros(length(nlso.objective)) return get_residuals!(M, V, nlso, p; kwargs...) end + +@doc "$(_doc_get_residuals_nlso)" +get_residuals!(M::AbstractManifold, V, nlso::NonlinearLeastSquaresObjective, p; kwargs...) + function get_residuals!( M::AbstractManifold, V, nlso::NonlinearLeastSquaresObjective{ - E,AbstractVectorFunction{E,<:ComponentVectorialType},H + E,<:AbstractVectorFunction{E,<:ComponentVectorialType},H }, p; vector_space=Rn, @@ -206,7 +253,7 @@ function get_residuals!( M::AbstractManifold, V, nlso::NonlinearLeastSquaresObjective{ - E,AbstractVectorFunction{E,<:FunctionVectorialType},H + E,<:AbstractVectorFunction{E,<:FunctionVectorialType},H }, p; vector_space=Rn, @@ -222,7 +269,7 @@ function get_residuals!( M::AbstractManifold, V, nlso::NonlinearLeastSquaresObjective{ - E,AbstractVectorFunction{E,<:ComponentVectorialType},<:AbstractVectorFunction + E,<:AbstractVectorFunction{E,<:ComponentVectorialType},<:AbstractVectorFunction }, p; vector_space=Rn, @@ -239,7 +286,7 @@ function get_residuals!( M::AbstractManifold, V, nlso::NonlinearLeastSquaresObjective{ - E,AbstractVectorFunction{E,<:FunctionVectorialType},<:AbstractVectorFunction + E,<:AbstractVectorFunction{E,<:FunctionVectorialType},<:AbstractVectorFunction }, p; vector_space=Rn, diff --git a/src/plans/vectorial_plan.jl b/src/plans/vectorial_plan.jl index 24c7e13fb0..ac3731c7c0 100644 --- a/src/plans/vectorial_plan.jl +++ b/src/plans/vectorial_plan.jl @@ -26,6 +26,19 @@ struct CoordinateVectorialType{B<:AbstractBasis} <: AbstractVectorialType basis::B end +""" + get_basis(::AbstractVectorialType) + +Return a basis that fits a vector function representation. + +For the case, where some vectorial data is stored with respect to a basis, +this function returns the corresponding basis, most prominently for the [`CoordinateVectorialType`](@ref). + +If a type is not with respect to a certain basis, the [`DefaultOrthonormalBasis`](@ref) is returned +""" +get_basis(::AbstractVectorialType) = DefaultOrthonormalBasis() +get_basis(cvt::CoordinateVectorialType) = cvt.basis + """ _to_iterable_indices(A::AbstractVector, i) @@ -62,6 +75,15 @@ struct FunctionVectorialType{P<:AbstractPowerRepresentation} <: AbstractVectoria range::P end +""" + get_range(::AbstractVectorialType) + +Return an abstract power manifold representation that fits a vector function's range. +Most prominently a [`FunctionVectorialType`](@ref) returns its internal range. + +Otherwise the default [`NestedPowerRepresentation`](@ref) is used to work on +a vector of data. +""" get_range(vt::FunctionVectorialType) = vt.range get_range(::AbstractVectorialType) = NestedPowerRepresentation() @@ -369,16 +391,16 @@ end function get_hessian!( M::AbstractManifold, Y, - vgf::VectorHessianFunction{<:AllocatingEvaluation,FT,JT,<:ComponentVectorialType}, + vhf::VectorHessianFunction{<:AllocatingEvaluation,FT,JT,<:ComponentVectorialType}, p, X, i::Colon, range::Union{AbstractPowerRepresentation,Nothing}=get_range(vhf.hessian_type), ) where {FT,JT} - n = _vgf_index_to_length(i, vgf.range_dimension) + n = _vgf_index_to_length(i, vhf.range_dimension) pM = PowerManifold(M, range, n) rep_size = representation_size(M) - for (j, f) in enumerate(vgf.hessians!!) + for (j, f) in enumerate(vhf.hessians!!) copyto!(M, _write(pM, rep_size, Y, (j,)), p, f(M, p, X)) end return Y @@ -487,40 +509,7 @@ get_hessian_function(vgf::VectorHessianFunction, recursive::Bool=false) = vgf.he # # -# ---- Jacobian -_doc_get_jacobian = """ - get_jacobian(M::AbstractManifold, vgf::AbstractVectorGradientFunction, p; basis = get_javobian_basis(vgf) ) - get_jacobian!(M::AbstractManifold, J, vgf::AbstractVectorGradientFunction, p; basis = get_javobian_basis(vgf) ) - -Evaluate the Jacobian of the vector function `vgf` on the manifold `M` at `p`. -Let ``n`` be the manifold dimension of `M`. -Representing every gradient ``$(_tex(:grad)) f_i(p) ∈ $(_math(:TpM))` as a vector of coefficients -``c_i = (c_{i,j})_{j=0}^n`` with respect to some basis, the Jacobian is given by - -```math - J_f = $(_tex(:pmatrix, - "c_{1,1} & $(_tex(:cdots)) & c_{1,n}", - "$(_tex(:vdots)) & $(_tex(:ddots)) & $(_tex(:vdots))", - "c_{n,1} & $(_tex(:cdots)) & c_{n,n}" - )) ∈ ℝ^{n×n}, -``` - -in other words, the Jacobian consists of the gradients in coordinates stored row-wise. - -For the [`ComponentVectorialType`](@ref) and the [`FunctionVectorialType`](@ref), -this is done in the tangent space basis provided by `basis=` directly. -For the [`CoordinateVectorialType`](@ref) the Jacobian is computed in the basis of this type, -if the basis does not agree with the `basis=`, a change of basis is performed - -# Keyword Arguments -* `basis` basis with respect to which the Jacobian is computed. -* `range` for the [`FunctionVectorialType`](@ref) specify the range type of the resulting gradient, - that is, the [`AbstractPowerRepresentation`](@extref `ManifoldsBase.AbstractPowerRepresentation`) the resulting power manifold has. -""" - -function get_jacobian end -@doc "$(_doc_get_jacobian)" -get_jacobian(M::AbstractManifold, vgf::AbstractVectorGradientFunction, p) +# --- Jacobian # A small helper function to change the basis of a Jacobian """ @@ -545,14 +534,53 @@ end # case we have the same basis: nothing to do, just return JF _change_basis!(M, p, JF, from_basis::B, to_basis_new) where {B<:AbstractBasis} = JF +_doc_get_jacobian_vgf = """ + get_jacobian(M::AbstractManifold, vgf::AbstractVectorGradientFunction, p; kwargs...) + get_jacobian(M::AbstractManifold, J, vgf::AbstractVectorGradientFunction, p; kwargs...) + +Compute the Jacobian ``J_F ∈ ℝ^{m×n}`` of the [`AbstractVectorGradientFunction`](@ref) ``F`` at `p` on the `M`. + +There are two interpretations of the Jacobian of a vectorial function ``F: $(_math(:M)) → ℝ^m`` on a manifold. +Both depend on choosing a basis on the tangent space ``$(_math(:TpM))`` which we denote by +``Y_1,…,Y_n``, where `n` is the [`manifold_dimension`](@extref) of `M`. +We can write any tangent vector ``X = $(_tex(:displaystyle))$(_tex(:sum))_i c_iY_i`` + +1. The Jacobian ``J_F`` is the matrix with respect to the basis ``Y_1,…,Y_n`` such that +for any ``X∈$(_math(:TpM))`` we have the equality of the differential ``DF(p)[X] = Jc``. + In other words, the `j`th column of ``J`` is given by ``DF(p)[Y_j]`` + +2. Given the gradients ``$(_tex(:grad)) F_i(p)`` of the component functions ``F_i: $(_math(:M)) → ℝ``, + we define the jacobian function as + + ````math + J(X) = $(_tex(:pmatrix, "⟨$(_tex(:grad)) F_1, X⟩_p", "⟨$(_tex(:grad)) F_1, X⟩_p", "⋮", "⟨$(_tex(:grad)) F_1, X⟩_p")) + ```` + + Then either the ``j``th column of ``J_F`` is given by ``J(Y_i)`` or the ``i``th row is given by all inner products ``$(_tex(:grad)) F_1, Y_j⟩_p`` of the ``i``th gradient function with all basis vectors ``Y_j``. + +The computation can be computed in-place of `J`. + +# Keyword arguments + +* `basis::AbstractBasis = `[`get_basis`](@ref)`(vgf)` + for the [`CoordinateVectorialType`](@ref) of the vectorial functions gradient, this + might lead to a change of basis, if this basis and the one the coordinates are given in do not agree. +* `range::AbstractPowerRepresentation = `[`get_range`](@ref)`(vgf.jacobian_type)` + specify the range of the gradients in the case of a [`FunctionVectorialType`](@ref), + that is, on which type of power manifold the gradient is given on. +""" + +@doc "$(_doc_get_jacobian_vgf)" +get_jacobian(::AbstractManifold, ::AbstractVectorGradientFunction, p; kwargs...) + # Part I: allocating vgf – allocating jacobian # (a) We have a single gradient function function get_jacobian( M::AbstractManifold, vgf::AbstractVectorGradientFunction{<:AllocatingEvaluation,FT,<:FunctionVectorialType}, p; - basis::B=DefaultOrthonormalBasis(), - range::Union{AbstractPowerRepresentation,Nothing}=get_range(vhf.jacobian_type), + basis::B=get_basis(vgf.jacobian_type), + range::AbstractPowerRepresentation=get_range(vgf.jacobian_type), ) where {FT,B<:AbstractBasis} n = vgf.range_dimension d = manifold_dimension(M, p) @@ -572,7 +600,7 @@ function get_jacobian( M::AbstractManifold, vgf::AbstractVectorGradientFunction{<:AllocatingEvaluation,FT,<:ComponentVectorialType}, p; - basis=get_jacobian_basis(vgf), + basis=get_basis(vgf.jacobian_type), kwargs..., ) where {FT} n = vgf.range_dimension @@ -593,7 +621,7 @@ function get_jacobian( <:AllocatingEvaluation,FT,<:CoordinateVectorialType }, p; - basis=get_jacobian_basis(vgf), + basis=get_basis(vgf.jacobian_type), kwargs..., ) where {FT} JF = vgf.jacobian!!(M, p) @@ -629,7 +657,7 @@ function get_jacobian( M::AbstractManifold, vgf::AbstractVectorGradientFunction{<:InplaceEvaluation,FT,<:ComponentVectorialType}, p; - basis=get_jacobian_basis(vgf), + basis=get_basis(vgf.jacobian_type), ) where {FT} n = vgf.range_dimension d = manifold_dimension(M, p) @@ -653,7 +681,7 @@ function get_jacobian( M::AbstractManifold, vgf::AbstractVectorGradientFunction{<:InplaceEvaluation,FT,<:CoordinateVectorialType}, p; - basis=get_jacobian_basis(vgf), + basis=get_basis(vgf.jacobian_type), ) where {FT} c = get_coordinats(M, p, zero_vector(M, p)) JF = zeros(eltype(c), vgf.range_dimension, manifold_dimension(M, p)) @@ -663,7 +691,7 @@ function get_jacobian( end function get_jacobian! end -@doc "$(_doc_get_jacobian)" +@doc "$(_doc_get_jacobian_vgf)" get_jacobian!(M::AbstractManifold, JF, vgf::AbstractVectorGradientFunction, p) # Part I: allocating vgf – inplace jacobian @@ -673,8 +701,8 @@ function get_jacobian!( J, vgf::VGF, p; - basis::B=get_jacobian_basis(vgf), - range::Union{AbstractPowerRepresentation,Nothing}=get_range(vgf.jacobian_type), + basis::B=get_basis(vgf.jacobian_type), + range::AbstractPowerRepresentation=get_range(vgf.jacobian_type), ) where { FT, VGF<:AbstractVectorGradientFunction{<:AllocatingEvaluation,FT,<:FunctionVectorialType}, @@ -683,14 +711,17 @@ function get_jacobian!( n = vgf.range_dimension gradients = vgf.jacobian!!(M, p) mP = PowerManifold(M, range, vgf.range_dimension) + println("range ", range, "mP", mP) for i in 1:n - J[i, :] .= get_coordinates(M, p, gradients[mP, i], basis) + c = @view J[i, :] + println("G ", gradients) + get_coordinates!(M, c, p, gradients[mP, i], basis) end return J end # (b) We have a vector of gradient functions function get_jacobian!( - M::AbstractManifold, JF, vgf::VGF, p; basis=get_jacobian_basis(vgf) + M::AbstractManifold, JF, vgf::VGF, p; basis=get_basis(vgf.jacobian_type) ) where { FT, VGF<:AbstractVectorGradientFunction{<:AllocatingEvaluation,FT,<:ComponentVectorialType}, @@ -702,7 +733,7 @@ function get_jacobian!( end # (c) We have a Jacobian function function get_jacobian!( - M::AbstractManifold, JF, vgf::VGF, p; basis=get_jacobian_basis(vgf) + M::AbstractManifold, JF, vgf::VGF, p; basis=get_basis(vgf.jacobian_type) ) where { FT, VGF<:AbstractVectorGradientFunction{ @@ -710,7 +741,7 @@ function get_jacobian!( }, } JF .= vgf.jacobian!!(M, p) - _change_basis!(JF, vgf.jacobian_type.basis, basis) + _change_basis!(M, p, JF, vgf.jacobian_type.basis, basis) return JF end @@ -732,7 +763,12 @@ function get_jacobian!( end # (b) We have a vector of gradient functions function get_jacobian!( - M::AbstractManifold, JF, vgf::VGF, p; basis=get_jacobian_basis(vgf), X=zero_vector(M, p) + M::AbstractManifold, + JF, + vgf::VGF, + p; + basis=get_basis(vgf.jacobian_type), + X=zero_vector(M, p), ) where { FT,VGF<:AbstractVectorGradientFunction{<:InplaceEvaluation,FT,<:ComponentVectorialType} } @@ -747,7 +783,7 @@ function get_jacobian!( end # (c) We have a Jacobian function function get_jacobian!( - M::AbstractManifold, JF, vgf::VGF, p; basis=get_jacobian_basis(vgf) + M::AbstractManifold, JF, vgf::VGF, p; basis=get_basis(vgf.jacobian_type) ) where { FT,VGF<:AbstractVectorGradientFunction{<:InplaceEvaluation,FT,<:CoordinateVectorialType} } diff --git a/src/solvers/LevenbergMarquardt.jl b/src/solvers/LevenbergMarquardt.jl index b4765dcdd0..887878ea15 100644 --- a/src/solvers/LevenbergMarquardt.jl +++ b/src/solvers/LevenbergMarquardt.jl @@ -83,7 +83,7 @@ function LevenbergMarquardt( num_components::Int=-1; evaluation::AbstractEvaluationType=AllocatingEvaluation(), function_type::AbstractVectorialType=FunctionVectorialType(), - jacobian_type::AbstractVectorialType=FunctionVectorialType(), + jacobian_type::AbstractVectorialType=CoordinateVectorialType(DefaultOrthonormalBasis()), kwargs..., ) if num_components == -1 @@ -135,6 +135,8 @@ function LevenbergMarquardt!( num_components::Int=-1; evaluation::AbstractEvaluationType=AllocatingEvaluation(), jacobian_tangent_basis::AbstractBasis=DefaultOrthonormalBasis(), + jacobian_type=CoordinateVectorialType(jacobian_tangent_basis), + function_type=FunctionVectorialType(), kwargs..., ) if num_components == -1 @@ -153,7 +155,8 @@ function LevenbergMarquardt!( jacobian_f, num_components; evaluation=evaluation, - jacobian_tangent_basis=jacobian_tangent_basis, + jacobian_type=jacobian_type, + function_type=function_type, ) return LevenbergMarquardt!(M, nlso, p; evaluation=evaluation, kwargs...) end @@ -223,7 +226,7 @@ function step_solver!( get_jacobian!(M, lms.jacobian, nlso, lms.p) end λk = lms.damping_term * norm(lms.residual_values)^2 - basis_ox = get_jacobian_basis(nlso.objective) + basis_ox = get_basis(nlso.objective.jacobian_type) JJ = transpose(lms.jacobian) * lms.jacobian + λk * I # `cholesky` is technically not necessary but it's the fastest method to solve the # problem because JJ is symmetric positive definite From 4174d9a45d225c458d41de4bc9833afb8b073c25 Mon Sep 17 00:00:00 2001 From: Ronny Bergmann Date: Sun, 29 Dec 2024 17:22:12 +0100 Subject: [PATCH 17/34] Get tests back up running. --- src/plans/vectorial_plan.jl | 38 ++++++++++++++++++++++++++--- test/plans/test_constrained_plan.jl | 10 +++++--- test/plans/test_vectorial_plan.jl | 7 +++--- 3 files changed, 44 insertions(+), 11 deletions(-) diff --git a/src/plans/vectorial_plan.jl b/src/plans/vectorial_plan.jl index ac3731c7c0..e43a81a1b3 100644 --- a/src/plans/vectorial_plan.jl +++ b/src/plans/vectorial_plan.jl @@ -788,7 +788,7 @@ function get_jacobian!( FT,VGF<:AbstractVectorGradientFunction{<:InplaceEvaluation,FT,<:CoordinateVectorialType} } vgf.jacobian!!(M, JF, p) - _change_basis!(JF, vgf.jacobian_type.basis, basis) + _change_basis!(M, p, JF, vgf.jacobian_type.basis, basis) return JF end @@ -1119,7 +1119,7 @@ This function can perform the evalutation inplace of `V`. get_value(M::AbstractManifold, vgf::AbstractVectorFunction, p, i) function get_value( M::AbstractManifold, vgf::AbstractVectorFunction{E,<:FunctionVectorialType}, p, i=: -) where {E<:AbstractEvaluationType} +) where {E<:AllocatingEvaluation} c = vgf.value!!(M, p) if isa(c, Number) return c @@ -1140,9 +1140,25 @@ function get_value( ) where {E<:AbstractEvaluationType} return [f(M, p) for f in vgf.value!![i]] end +function get_value( + M::AbstractManifold, + vgf::AbstractVectorFunction{E,<:FunctionVectorialType}, + p, + i=:; + value_cache=zeros(vgf.range_dimension), +) where {E<:InplaceEvaluation} + vgf.value!!(M, value_cache, p) + return value_cache[i] +end +# A ComponentVectorialType Inplace does that make sense, since those would be real - valued functions + function get_value!( - M::AbstractManifold, V, vgf::AbstractVectorFunction{E,<:FunctionVectorialType}, p, i=: -) where {E<:AbstractEvaluationType} + M::AbstractManifold, + V, + vgf::AbstractVectorFunction{AllocatingEvaluation,<:FunctionVectorialType}, + p, + i=:, +) c = vgf.value!!(M, p) if isa(c, Number) V .= c @@ -1151,6 +1167,20 @@ function get_value!( end return V end + +function get_value!( + M::AbstractManifold, + V, + vgf::AbstractVectorFunction{InplaceEvaluation,<:FunctionVectorialType}, + p, + i=:; + value_cache=zeros(vgf.range_dimension), +) + vgf.value!!(M, value_cache, p) + V .= value_cache[i] + return V +end + @doc raw""" get_value_function(vgf::VectorGradientFunction, recursive=false) diff --git a/test/plans/test_constrained_plan.jl b/test/plans/test_constrained_plan.jl index a0fd3842b9..7ebc1362f6 100644 --- a/test/plans/test_constrained_plan.jl +++ b/test/plans/test_constrained_plan.jl @@ -12,6 +12,7 @@ include("../utils/dummy_types.jl") hess_f!(M, Y, p, X) = (Y .= [2.0, 2.0, 2.0]) # Inequality constraints g(M, p) = [p[1] - 1, -p[2] - 1] + g!(M, V, p) = (V .= [p[1] - 1, -p[2] - 1]) # # Function grad_g(M, p) = [[1.0, 0.0, 0.0], [0.0, -1.0, 0.0]] grad_gA(M, p) = [1.0 0.0; 0.0 -1.0; 0.0 0.0] @@ -41,6 +42,7 @@ include("../utils/dummy_types.jl") ) == 2 # Equality Constraints h(M, p) = [2 * p[3] - 1] + h!(M, V, p) = (V .= [2 * p[3] - 1]) h1(M, p) = 2 * p[3] - 1 grad_h(M, p) = [[0.0, 0.0, 2.0]] grad_hA(M, p) = [[0.0, 0.0, 2.0];;] @@ -83,9 +85,9 @@ include("../utils/dummy_types.jl") cofm = ConstrainedManifoldObjective( f, grad_f!, - g, + g!, grad_g!, - h, + h!, grad_h!; evaluation=InplaceEvaluation(), inequality_constraints=2, @@ -149,9 +151,9 @@ include("../utils/dummy_types.jl") cofhm = ConstrainedManifoldObjective( f, grad_f!, - g, + g!, grad_g!, - h, + h!, grad_h!; hess_f=hess_f!, hess_g=hess_g!, diff --git a/test/plans/test_vectorial_plan.jl b/test/plans/test_vectorial_plan.jl index da8daf3668..0f3b25a1ef 100644 --- a/test/plans/test_vectorial_plan.jl +++ b/test/plans/test_vectorial_plan.jl @@ -3,6 +3,7 @@ using Manopt: get_value, get_value_function, get_gradient_function @testset "VectorialGradientCost" begin M = ManifoldsBase.DefaultManifold(3) g(M, p) = [p[1] - 1, -p[2] - 1] + g!(M, V, p) = (V .= [p[1] - 1, -p[2] - 1]) # # Function grad_g(M, p) = [[1.0, 0.0, 0.0], [0.0, -1.0, 0.0]] hess_g(M, p, X) = [copy(X), -copy(X)] @@ -37,7 +38,7 @@ using Manopt: get_value, get_value_function, get_gradient_function function_type=ComponentVectorialType(), jacobian_type=ComponentVectorialType(), ) - vgf_fi = VectorGradientFunction(g, grad_g!, 2; evaluation=InplaceEvaluation()) + vgf_fi = VectorGradientFunction(g!, grad_g!, 2; evaluation=InplaceEvaluation()) vgf_vi = VectorGradientFunction( [g1, g2], [grad_g1!, grad_g2!], @@ -50,7 +51,7 @@ using Manopt: get_value, get_value_function, get_gradient_function g, jac_g, 2; jacobian_type=CoordinateVectorialType(DefaultOrthonormalBasis()) ) vgf_ji = VectorGradientFunction( - g, + g!, jac_g!, 2; jacobian_type=CoordinateVectorialType(DefaultOrthonormalBasis()), @@ -71,7 +72,7 @@ using Manopt: get_value, get_value_function, get_gradient_function jacobian_type=ComponentVectorialType(), hessian_type=ComponentVectorialType(), ) - vhf_fi = VectorHessianFunction(g, grad_g!, hess_g!, 2; evaluation=InplaceEvaluation()) + vhf_fi = VectorHessianFunction(g!, grad_g!, hess_g!, 2; evaluation=InplaceEvaluation()) vhf_vi = VectorHessianFunction( [g1, g2], [grad_g1!, grad_g2!], From 2eedd44c3a5ccc08fda707969cdc5aa34d595d9f Mon Sep 17 00:00:00 2001 From: Ronny Bergmann Date: Sun, 29 Dec 2024 17:48:55 +0100 Subject: [PATCH 18/34] fix docs so they at least render. --- docs/src/plans/objective.md | 4 ++++ src/Manopt.jl | 2 ++ src/plans/constrained_plan.jl | 2 +- src/plans/vectorial_plan.jl | 8 ++++---- src/solvers/interior_point_Newton.jl | 2 +- 5 files changed, 12 insertions(+), 6 deletions(-) diff --git a/docs/src/plans/objective.md b/docs/src/plans/objective.md index 01c1f5c6b7..2d6796107a 100644 --- a/docs/src/plans/objective.md +++ b/docs/src/plans/objective.md @@ -121,6 +121,8 @@ ManifoldCostGradientObjective ```@docs get_gradient get_gradients +get_residuals +get_residuals! ``` and internally @@ -268,6 +270,8 @@ Base.length(::VectorGradientFunction) ```@docs Manopt._to_iterable_indices Manopt._change_basis! +Manopt.get_basis +Manopt.get_range ``` ### Subproblem objective diff --git a/src/Manopt.jl b/src/Manopt.jl index c4cd219012..793befaa7b 100644 --- a/src/Manopt.jl +++ b/src/Manopt.jl @@ -381,6 +381,8 @@ export get_state, get_differential_dual_prox!, set_gradient!, set_iterate!, + get_residuals, + get_residuals!, linearized_forward_operator, linearized_forward_operator!, adjoint_linearized_operator, diff --git a/src/plans/constrained_plan.jl b/src/plans/constrained_plan.jl index 6edba426a3..875d12efbb 100644 --- a/src/plans/constrained_plan.jl +++ b/src/plans/constrained_plan.jl @@ -332,7 +332,7 @@ components gradients, for example In another interpretation, this can be considered a point on the tangent space at ``P = (p,…,p) \in \mathcal M^m``, so in the tangent space to the [`PowerManifold`](@extref `ManifoldsBase.PowerManifold`) ``\mathcal M^m``. -The case where this is a [`NestedPowerRepresentation`](@extref) this agrees with the +The case where this is a [`NestedPowerRepresentation`](@extref `ManifoldsBase.NestedPowerRepresentation`) this agrees with the interpretation from before, but on power manifolds, more efficient representations exist. To then access the elements, the range has to be specified. That is what this diff --git a/src/plans/vectorial_plan.jl b/src/plans/vectorial_plan.jl index e43a81a1b3..95689d6abc 100644 --- a/src/plans/vectorial_plan.jl +++ b/src/plans/vectorial_plan.jl @@ -34,7 +34,7 @@ Return a basis that fits a vector function representation. For the case, where some vectorial data is stored with respect to a basis, this function returns the corresponding basis, most prominently for the [`CoordinateVectorialType`](@ref). -If a type is not with respect to a certain basis, the [`DefaultOrthonormalBasis`](@ref) is returned +If a type is not with respect to a certain basis, the [`DefaultOrthonormalBasis`](@extref `ManifoldsBase.DefaultOrthonormalBasis`) is returned """ get_basis(::AbstractVectorialType) = DefaultOrthonormalBasis() get_basis(cvt::CoordinateVectorialType) = cvt.basis @@ -68,7 +68,7 @@ struct ComponentVectorialType <: AbstractVectorialType end A type to indicate that constraints are implemented one whole functions, for example ``g(p) ∈ ℝ^m`` or ``\operatorname{grad} g(p) ∈ (T_p\mathcal M)^m``. -This type internally stores the [`AbstractPowerRepresentation`](@ref), +This type internally stores the [`AbstractPowerRepresentation`](@extref `ManifoldsBase.AbstractPowerRepresentation`), when it makes sense, especially for Hessian and gradient functions. """ struct FunctionVectorialType{P<:AbstractPowerRepresentation} <: AbstractVectorialType @@ -81,7 +81,7 @@ end Return an abstract power manifold representation that fits a vector function's range. Most prominently a [`FunctionVectorialType`](@ref) returns its internal range. -Otherwise the default [`NestedPowerRepresentation`](@ref) is used to work on +Otherwise the default [`NestedPowerRepresentation`](@extref `ManifoldsBase.NestedPowerRepresentation`)`()` is used to work on a vector of data. """ get_range(vt::FunctionVectorialType) = vt.range @@ -542,7 +542,7 @@ Compute the Jacobian ``J_F ∈ ℝ^{m×n}`` of the [`AbstractVectorGradientFunct There are two interpretations of the Jacobian of a vectorial function ``F: $(_math(:M)) → ℝ^m`` on a manifold. Both depend on choosing a basis on the tangent space ``$(_math(:TpM))`` which we denote by -``Y_1,…,Y_n``, where `n` is the [`manifold_dimension`](@extref) of `M`. +``Y_1,…,Y_n``, where `n` is the $(_link(:manifold_dimension))`(M)`. We can write any tangent vector ``X = $(_tex(:displaystyle))$(_tex(:sum))_i c_iY_i`` 1. The Jacobian ``J_F`` is the matrix with respect to the basis ``Y_1,…,Y_n`` such that diff --git a/src/solvers/interior_point_Newton.jl b/src/solvers/interior_point_Newton.jl index 112774d569..625bcbe3bd 100644 --- a/src/solvers/interior_point_Newton.jl +++ b/src/solvers/interior_point_Newton.jl @@ -58,7 +58,7 @@ $(_var(:Keyword, :evaluation)) * `g=nothing`: the inequality constraints * `grad_g=nothing`: the gradient of the inequality constraints * `grad_h=nothing`: the gradient of the equality constraints -* `gradient_range=nothing`: specify how gradients are represented, where `nothing` is equivalent to [`NestedPowerRepresentation`](@extref) +* `gradient_range=nothing`: specify how gradients are represented, where `nothing` is equivalent to [`NestedPowerRepresentation`](@extref `ManifoldsBase.NestedPowerRepresentation`) * `gradient_equality_range=gradient_range`: specify how the gradients of the equality constraints are represented * `gradient_inequality_range=gradient_range`: specify how the gradients of the inequality constraints are represented * `h=nothing`: the equality constraints From fe5d1c6a77fefcf27f43cdde318760851a6d65a7 Mon Sep 17 00:00:00 2001 From: Ronny Bergmann Date: Mon, 30 Dec 2024 13:12:26 +0100 Subject: [PATCH 19/34] Finish docs. --- src/documentation_glossary.jl | 1 + src/plans/manifold_default_factory.jl | 6 +-- src/plans/nonlinear_least_squares_plan.jl | 66 +++++++++++++++++------ src/solvers/LevenbergMarquardt.jl | 3 +- 4 files changed, 56 insertions(+), 20 deletions(-) diff --git a/src/documentation_glossary.jl b/src/documentation_glossary.jl index 45583702c0..c6b90244c0 100644 --- a/src/documentation_glossary.jl +++ b/src/documentation_glossary.jl @@ -91,6 +91,7 @@ define!(:LaTeX, :prox, raw"\operatorname{prox}") define!(:LaTeX, :quad, raw"\quad") define!(:LaTeX, :reflect, raw"\operatorname{refl}") define!(:LaTeX, :retr, raw"\operatorname{retr}") +define!(:LaTeX, :rm, (letter) -> raw"\mathrm{" * "$letter" * "}") define!(:LaTeX, :sqrt, (s) -> raw"\sqrt{" * "$s}") define!(:LaTeX, :subgrad, raw"∂") define!(:LaTeX, :sum, raw"\sum") diff --git a/src/plans/manifold_default_factory.jl b/src/plans/manifold_default_factory.jl index 30f737525f..9fcb3131f7 100644 --- a/src/plans/manifold_default_factory.jl +++ b/src/plans/manifold_default_factory.jl @@ -7,9 +7,9 @@ decission on which manifold to use to a later point For now this is established for -* [`DirectionUpdateRule`](@ref)s (TODO: WIP) -* [`Stepsize`](@ref) (TODO: WIP) -* [`StoppingCriterion`](@ref) (TODO:WIP) +* [`DirectionUpdateRule`](@ref)s +* [`Stepsize`](@ref) +* [`StoppingCriterion`](@ref) This factory stores necessary and optional parameters as well as keyword arguments provided by the user to later produce the type this factory is for. diff --git a/src/plans/nonlinear_least_squares_plan.jl b/src/plans/nonlinear_least_squares_plan.jl index a2200e8847..c2c9540973 100644 --- a/src/plans/nonlinear_least_squares_plan.jl +++ b/src/plans/nonlinear_least_squares_plan.jl @@ -11,17 +11,35 @@ Specify a nonlinear least squares problem # Fields * `objective`: a [`AbstractVectorGradientFunction`](@ref)`{E}` containing both the vector of cost functions ``f_i`` as well as their gradients ``$(_tex(:grad)) f_i``` -* `smoothing`: a [`ManifoldHessianObjective`](@ref) or a [`Vector of a smoothing function ``ρ: ℝ → ℝ``, hence including its first and second derivatives ``ρ'`` and ``ρ''``. +* `smoothing`: a [`ManifoldHessianObjective`](@ref) or a [`Vector of a smoothing function ``s: ℝ → ℝ``, hence including its first and second derivatives ``s'`` and ``s''``. This `NonlinearLeastSquaresObjective` then has the same [`AbstractEvaluationType`](@ref) `T` -as the (inner) `objective. -The smoothing is expected to be the smoothing is expected to be [`AllocatingEvaluation`](@ref), -since it works on a one-dimensional vector space ``ℝ`` only anyways. +as the (inner) `objective`. +The smoothing is expected to be be of [`AllocatingEvaluation`](@ref) type, +since it works on a one-dimensional vector space ``ℝ`` or each of its components does. # Constructors - NonlinearLeastSquaresObjective(f_i, grad_f_i, ρ::F, ρ_prime::G, ρ_prime_prime::H) where {F<:Function} - NonlinearLeastSquaresObjective(vf::AbstractVectorGradientFunction, ρ::Union{ManifoldHessianObjective, VectorHessianFunction}) + NonlinearLeastSquaresObjective(f, jacobian, range_dimension; kwargs...) + NonlinearLeastSquaresObjective(vf::AbstractVectorGradientFunction, smoothing = :Identity) + +# Arguments + +* `f` the vectorial cost function ``f: $(_math(:M)) → ℝ^m`` +* `jacobian` the Jacobian, might also be a vector of gradients of the component functions of `f` +* `range_dimension` the number of dimensions `m` the function `f` maps into + +These three can also be passed as a [`AbstractVectorGradientFunction`](@ref) `vf` already. +For this second constructor, the `smoothing=` keyword is the second positional argument. + +# Keyword arguments + +$(_var(:Keyword, :evaluation)) +* `function_type::`[`AbstractVectorialType`](@ref)`=`[`FunctionVectorialType`](@ref)`()`: format the +* `jacobian_tangent_basis::AbstractBasis=DefaultOrthonormalBasis()`; shortcut to specify the basis the Jacobian matrix is build with. +* `jacobian_type::`[`AbstractVectorialType`](@ref)`=`[`CoordinateVectorialType`](@ref)`(jacobian_tangent_basis)`: specify the format the Jacobian is given in. By default a matrix of the differential with respect to a certain basis of the tangent space. +* `smoothing::Union{`[`ManifoldHessianObjective`](@ref),[`VectorHessianFunction`](@ref)`,Symbol}=:Identity`: specify the type of smoothing, + either applied to all components, a vector for each component or a symbol passed to the [`smoothing_factory`](@ref) of predefined smoothing types. # See also @@ -36,15 +54,14 @@ struct NonlinearLeastSquaresObjective{ smoothing::R end -# TODO document function NonlinearLeastSquaresObjective( f, jacobian, range_dimension; evaluation::AbstractEvaluationType=AllocatingEvaluation(), jacobian_tangent_basis::AbstractBasis=DefaultOrthonormalBasis(), - jacobian_type=CoordinateVectorialType(jacobian_tangent_basis), - function_type=FunctionVectorialType(), + jacobian_type::AbstractVectorialType=CoordinateVectorialType(jacobian_tangent_basis), + function_type::AbstractVectorialType=FunctionVectorialType(), kwargs..., ) vgf = VectorGradientFunction( @@ -453,16 +470,20 @@ containing all smoothing functions with their repetitions mentioned * `smoothing_factory(:Identity)`: returns the identity function as a single smoothing function * `smoothing_factory(:Identity, 2)`: returns a [`VectorHessianFunction`](@ref) with two identity functions +* `smoothing_factory(:Identity, 2.0)`: returns a [`VectorHessianFunction`](@ref) with the identity ``s(x) = x`` function scaled to ``s_2(x)`` as described above * `smoothing_factory(mho, 0.5)`: returns a [`ManifoldHessianObjective`](@ref) with the scaled variant of the given `mho`, for example the one returned in the first example * `smoothing_factory( ( (:Identity, 2), (:Huber, 3) ))`: returns a [`VectorHessianFunction`](@ref) with 5 components, the first 2 `:Identity` the last 3 `:Huber` # Currently available smoothing functions | `Symbol` | ``s(x)`` | ``s'(x)`` | ``s''(x)`` | Comment | -|:-------- |:-----:|:------:|:-------:|:------- | -| `:Identity` | ``x`` | ``1`` | ``0`` | No smoothing, the default | -| `:Huber` | ``$(_tex(:cases, "x & $(_tex(:text, " for ")) x ≤ 1", "2$(_tex(:sqrt, "x")) - 1 & $(_tex(:text, " for ")) x > 1"))`` | ``$(_tex(:cases, "1 & $(_tex(:text, " for ")) x ≤ 1", "$(_tex(:frac, "1", _tex(:sqrt, "x"))) & $(_tex(:text, " for ")) x > 1"))`` | ``$(_tex(:cases, "0 & $(_tex(:text, " for ")) x ≤ 1", "-$(_tex(:frac, "1", "x^{3/2}")) & $(_tex(:text, " for ")) x > 1"))`` | | -| `:Tukey` | ``$(_tex(:cases, "$(_tex(:frac, "1", "3")) (1-(1-x)^3) & $(_tex(:text, " for ")) x ≤ 1", "$(_tex(:frac, "1", "3")) & $(_tex(:text, " for ")) x > 1"))`` | ``$(_tex(:cases, "(1-s)^2 & $(_tex(:text, " for ")) x ≤ 1", "0 & $(_tex(:text, " for ")) x > 1"))`` | ``$(_tex(:cases, "s-2 & $(_tex(:text, " for ")) x ≤ 1", "0 & $(_tex(:text, " for ")) x > 1"))`` | | +|:------ |:-----:|:-----:|:-----:|:--------- | +| `:Arctan` | ``$(_tex(:rm, "arctan"))(x)`` | ``$(_tex(:displaystyle))$(_tex(:frac, "1", "x^2+1"))`` | ``-$(_tex(:displaystyle))$(_tex(:frac, "2x", "(x^2+1)^2"))`` | | +| `:Cauchy` | ``$(_tex(:log))(1+x)`` | ``$(_tex(:displaystyle))$(_tex(:frac, "1", "1+x"))`` | ``-$(_tex(:displaystyle))$(_tex(:frac, "1", "(1+x)^2"))`` | | +| `:Huber` | ``$(_tex(:cases, "x, & x ≤ 1,", "2$(_tex(:sqrt, "x")) - 1, & x > 1."))`` | ``$(_tex(:cases, "1, & x ≤ 1,", "$(_tex(:frac, "1", _tex(:sqrt, "x"))), & x > 1."))`` | ``$(_tex(:cases, "0, & x ≤ 1,", "-$(_tex(:frac, "1", "x^{3/2}")), & x > 1."))`` | | +| `:Identity` | ``x`` | ``1`` | ``0`` | default (no smoothing) | +| `:SoftL1` | ``2$(_tex(:sqrt, "1+x")) - 1`` | ``$(_tex(:displaystyle))$(_tex(:frac, "1", _tex(:sqrt, "1+x")))`` | ``-$(_tex(:displaystyle))$(_tex(:frac, "1", "2(1+x)^{3/2}"))`` | | +| `:Tukey` | ``$(_tex(:cases, "$(_tex(:frac, "1", "3"))-$(_tex(:frac, "1", "3"))(1-x)^3, & x ≤ 1,", "$(_tex(:frac, "1", "3")), & x > 1."))`` | ``$(_tex(:cases, "(1-x)^2, & x ≤ 1,", "0, & x > 1."))`` | ``$(_tex(:cases, "x-2, & x ≤ 1,", "0, & x > 1."))`` | | Note that in the implementation the second derivative follows the general scheme of hessians and actually implements s''(x)[X] = s''(x)X``. @@ -568,21 +589,34 @@ function _smoothing_factory(s::Symbol, k::Int) return fill(s, k), fill(s_p, k), fill(s_pp, k) end # Library -function _smoothing_factory(::Val{:Identity}) - return (E, x) -> x, (E, x) -> one(x), (E, x, X) -> zero(X) +function _smoothing_factory(::Val{:Arctan}) + return (E, x) -> arctan(x), + (E, x) -> x <= 1 / (x^2 + 1), + (E, x, X) -> 2 * x / (x^2 + 1)^2 +end +function _smoothing_factory(::Val{:Cauchy}) + return (E, x) -> log(1 + x), (E, x) -> x <= 1 / (1 + x), (E, x, X) -> -1 / (1 + x)^2 end function _smoothing_factory(::Val{:Huber}) return (E, x) -> x <= 1 ? x : 2 * sqrt(x) - 1, (E, x) -> x <= 1 ? 1 : 1 / sqrt(x), (E, x, X) -> (x <= 1 ? 0 : -1 / (2x^(3 / 2))) * X end +# Default +function _smoothing_factory(::Val{:Identity}) + return (E, x) -> x, (E, x) -> one(x), (E, x, X) -> zero(X) +end +function _smoothing_factory(::Val{:SoftL1}) + return (E, x) -> 2 * sqrt(1 + x) - 1, + (E, x) -> x <= 1 / sqrt(1 + x), + (E, x, X) -> -1 / (2 * (1 + x)^(3 / 2)) +end function _smoothing_factory(::Val{:Tukey}) return (E, x) -> x <= 1 ? 1 / 3 * (1 - (1 - x)^3) : 1 / 3, (E, x) -> x <= 1 ? (1 - s)^2 : 0, (E, x, X) -> (x <= 1 ? x - 2 : 0) * X end -# TODO: Vectorial cases: (symbol, int) function show(io::IO, lms::LevenbergMarquardtState) i = get_count(lms, :Iterations) Iter = (i > 0) ? "After $i iterations\n" : "" diff --git a/src/solvers/LevenbergMarquardt.jl b/src/solvers/LevenbergMarquardt.jl index 887878ea15..c3b6b44af5 100644 --- a/src/solvers/LevenbergMarquardt.jl +++ b/src/solvers/LevenbergMarquardt.jl @@ -53,7 +53,8 @@ $(_var(:Keyword, :evaluation)) By default this is a vector of length `num_components` of similar type as `p`. * `jacobian_type=`[`FunctionVectorialType`](@ref)`: an [`AbstractVectorialType`](@ref) specifying the type of Jacobian provided. $(_var(:Keyword, :retraction_method)) -* `smoothing`: specify a smoothing function ``s`` for all ``s_i=s`` as an [`ManifoldHessianObjective`](@ref) or a vector of smoothing functions ``(s_1,…s_n)`` together as a [`VectorHessianFunction`](@ref). +* `smoothing=:Identity`: specify a smoothing function ``s`` for all ``s_i=s`` as an [`ManifoldHessianObjective`](@ref) or a vector of smoothing functions ``(s_1,…s_n)`` together as a [`VectorHessianFunction`](@ref). + You can also generate them using certain symbols and parameters. For all available options see the [`smoothing_factory`](@ref). $(_var(:Keyword, :stopping_criterion; default="[`StopAfterIteration`](@ref)`(200)`$(_sc(:Any))[`StopWhenGradientNormLess`](@ref)`(1e-12)`")) $(_note(:OtherKeywords)) From 85c1a64c5e53481ea6ac1952a01ad2c22f457949 Mon Sep 17 00:00:00 2001 From: Ronny Bergmann Date: Mon, 30 Dec 2024 13:55:02 +0100 Subject: [PATCH 20/34] Add a test for the new factory. --- .../test_nonlinear_least_squares_plan.jl | 25 +++++++++++++++++++ test/runtests.jl | 1 + 2 files changed, 26 insertions(+) create mode 100644 test/plans/test_nonlinear_least_squares_plan.jl diff --git a/test/plans/test_nonlinear_least_squares_plan.jl b/test/plans/test_nonlinear_least_squares_plan.jl new file mode 100644 index 0000000000..9fa5bec20e --- /dev/null +++ b/test/plans/test_nonlinear_least_squares_plan.jl @@ -0,0 +1,25 @@ +using Manifolds, Manopt, Test + +@testset "Nonlinear lest squares plane" begin + @testset "Test cost/residual/jacobian cases with smoothing" begin end + @testset "Smootthing factory" begin + s1 = Manopt.smoothing_factory(:Identity) + @test s1 isa ManifoldHessianObjective + + s2 = Manopt.smoothing_factory((:Identity, 2)) + @test s2 isa VectorHessianFunction + @test length(s2) == 2 + + s3 = Manopt.smoothing_factory((:Identity, 3.0)) + @test s3 isa ManifoldHessianObjective + + for s in [:Arctan, :Cauchy, :Huber, :SoftL1, :Tukey] + s4 = Manopt.smoothing_factory(s) + @test s4 isa ManifoldHessianObjective + end + + s5 = Manopt.smoothing_factory(((:Identity, 2), (:Huber, 3))) + @test s5 isa VectorHessianFunction + @test length(s5) == 5 + end +end diff --git a/test/runtests.jl b/test/runtests.jl index 3d90036bcb..80947d9fa1 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -16,6 +16,7 @@ include("utils/example_tasks.jl") include("plans/test_conjugate_residual_plan.jl") include("plans/test_interior_point_newton_plan.jl") include("plans/test_nelder_mead_plan.jl") + include("plans/test_nonlinear_least_squares_plan.jl") include("plans/test_gradient_plan.jl") include("plans/test_constrained_plan.jl") include("plans/test_hessian_plan.jl") From 2002ef584ae50b1534452cb210935e3a08cd8a44 Mon Sep 17 00:00:00 2001 From: Ronny Bergmann Date: Mon, 30 Dec 2024 15:20:13 +0100 Subject: [PATCH 21/34] Test the new variants. --- src/plans/nonlinear_least_squares_plan.jl | 35 ++--- src/plans/vectorial_plan.jl | 29 ++-- .../test_nonlinear_least_squares_plan.jl | 133 +++++++++++++++++- 3 files changed, 165 insertions(+), 32 deletions(-) diff --git a/src/plans/nonlinear_least_squares_plan.jl b/src/plans/nonlinear_least_squares_plan.jl index c2c9540973..4cf927a92e 100644 --- a/src/plans/nonlinear_least_squares_plan.jl +++ b/src/plans/nonlinear_least_squares_plan.jl @@ -95,8 +95,11 @@ function get_cost( ) where {E<:AbstractEvaluationType,H<:AbstractManifoldHessianObjective} v = 0.0 for i in 1:length(nlso.objective) - v += get_cost(vector_space(1), nlso.smoothing, get_value(nlso.objective, p, i)^2) + v += get_cost( + vector_space(1), nlso.smoothing, abs(get_value(M, nlso.objective, p, i))^2 + ) end + v *= 0.5 return v end function get_cost( @@ -108,8 +111,8 @@ function get_cost( vector_space=Rn, value_cache=get_value(M, nlso.objective, p), ) where {E<:AbstractEvaluationType,H<:AbstractManifoldHessianObjective} - return sum( - get_cost(vector_space(1), nlso.smoothing, value_cache[i]) for + return 0.5 * sum( + get_cost(vector_space(1), nlso.smoothing, abs(value_cache[i])^2) for i in 1:length(value_cache) ) end @@ -125,8 +128,11 @@ function get_cost( ) where {E<:AbstractEvaluationType} v = 0.0 for i in 1:length(nlso.objective) - v += get_cost(vector_space(1), nlso.smoothing, get_value(nlso.objective, p, i)^2, i) + v += get_value( + vector_space(1), nlso.smoothing, abs(get_value(M, nlso.objective, p, i))^2, i + ) end + v *= 0.5 return v end function get_cost( @@ -138,8 +144,8 @@ function get_cost( vector_space=Rn, value_cache=get_value(M, nlso.objective, p), ) where {E<:AbstractEvaluationType} - return sum( - get_cost(vector_space(1), nlso.smoothing, value_cache[i], i) for + return 0.5 * sum( + get_value(vector_space(1), nlso.smoothing, abs(value_cache[i])^2, i) for i in 1:length(value_cache) ) end @@ -191,6 +197,7 @@ function get_jacobian!( J, nlso::NonlinearLeastSquaresObjective{E,AHVF,<:AbstractVectorGradientFunction}, p; + vector_space=Rn, basis::AbstractBasis=get_basis(nlso.objective.jacobian_type), value_cache=get_value(M, nlso.objective, p), ) where {E,AHVF} @@ -226,7 +233,7 @@ _doc_get_residuals_nlso = """ get_residuals(M::AbstractManifold, nlso::NonlinearLeastSquaresObjective, p) get_residuals!(M::AbstractManifold, V, nlso::NonlinearLeastSquaresObjective, p) -Compute the vector of residuals ``s_i(f_i(p))``, ``i=1,…,n`` given the manifold `M`, +Compute the vector of residuals ``f_i(p)``, ``i=1,…,n`` given the manifold `M`, the [`NonlinearLeastSquaresObjective`](@ref) `nlso` and a current point ``p`` on `M`. # Keyword arguments @@ -258,11 +265,10 @@ function get_residuals!( E,<:AbstractVectorFunction{E,<:ComponentVectorialType},H }, p; - vector_space=Rn, kwargs..., ) where {E<:AbstractEvaluationType,H<:AbstractManifoldHessianObjective} for i in 1:length(nlso.objective) - V[i] = get_cost(vector_space(1), nlso.smoothing, get_value(nlso.objective, p, i)^2) + V[i] = get_value(M, nlso.objective, p, i) end return V end @@ -273,11 +279,10 @@ function get_residuals!( E,<:AbstractVectorFunction{E,<:FunctionVectorialType},H }, p; - vector_space=Rn, value_cache=get_value(M, nlso.objective, p), ) where {E<:AbstractEvaluationType,H<:AbstractManifoldHessianObjective} for i in 1:length(value_cache) - V[i] = get_cost(vector_space(1), nlso.smoothing, value_cache[i]) + V[i] = value_cache[i] end return V end @@ -289,13 +294,10 @@ function get_residuals!( E,<:AbstractVectorFunction{E,<:ComponentVectorialType},<:AbstractVectorFunction }, p; - vector_space=Rn, kwargs..., ) where {E<:AbstractEvaluationType} for i in 1:length(nlso.objective) - V[i] = get_cost( - vector_space(1), nlso.smoothing, get_value(nlso.objective, p, i)^2, i - ) + V[i] = get_value(M, nlso.objective, p, i) end return V end @@ -306,11 +308,10 @@ function get_residuals!( E,<:AbstractVectorFunction{E,<:FunctionVectorialType},<:AbstractVectorFunction }, p; - vector_space=Rn, value_cache=get_value(M, nlso.objective, p), ) where {E<:AbstractEvaluationType} for i in 1:length(value_cache) - V[i] = get_cost(vector_space(1), nlso.smoothing, value_cache[i], i) + V[i] = value_cache[i] end return V end diff --git a/src/plans/vectorial_plan.jl b/src/plans/vectorial_plan.jl index 95689d6abc..7168d96d4f 100644 --- a/src/plans/vectorial_plan.jl +++ b/src/plans/vectorial_plan.jl @@ -21,11 +21,14 @@ for example ``g_i(p) ∈ ℝ^m`` or ``\operatorname{grad} g_i(p) ∈ T_p\mathcal # Fields * `basis` an [`AbstractBasis`](@extref `ManifoldsBase.AbstractBasis`) to indicate the default representation. + +# Constructor + CoordinateVectorialType(basis=DefaultOrthonormalBasis()) """ struct CoordinateVectorialType{B<:AbstractBasis} <: AbstractVectorialType basis::B end - +CoordinateVectorialType() = CoordinateVectorialType(DefaultOrthonormalBasis()) """ get_basis(::AbstractVectorialType) @@ -641,7 +644,7 @@ function get_jacobian( n = vgf.range_dimension d = manifold_dimension(M, p) mP = PowerManifold(M, range, vgf.range_dimension) - gradients = zero_vector(mP, fill(p, pM)) + gradients = zero_vector(mP, fill(p, mP)) vgf.jacobian!!(M, gradients, p) # generate the first row to get an eltype c1 = get_coordinates(M, p, gradients[mP, 1], basis) @@ -711,10 +714,8 @@ function get_jacobian!( n = vgf.range_dimension gradients = vgf.jacobian!!(M, p) mP = PowerManifold(M, range, vgf.range_dimension) - println("range ", range, "mP", mP) for i in 1:n c = @view J[i, :] - println("G ", gradients) get_coordinates!(M, c, p, gradients[mP, i], basis) end return J @@ -748,15 +749,21 @@ end # Part II: mutating vgf – allocating jacobian # (a) We have a single gradient function function get_jacobian!( - M::AbstractManifold, JF, vgf::VGF, p; basis::B=DefaultOrthonormalBasis() + M::AbstractManifold, + JF, + vgf::VGF, + p; + basis::B=DefaultOrthonormalBasis(), + range::AbstractPowerRepresentation=get_range(vgf.jacobian_type), ) where { FT, VGF<:AbstractVectorGradientFunction{<:InplaceEvaluation,FT,<:FunctionVectorialType}, B<:AbstractBasis, } - gradients = zero_vector(mP, fill(p, pM)) + mP = PowerManifold(M, range, vgf.range_dimension) + gradients = zero_vector(mP, fill(p, mP)) vgf.jacobian!!(M, gradients, p) - for i in 1:n + for i in 1:(vgf.range_dimension) JF[i, :] .= get_coordinates(M, p, gradients[mP, i], basis) end return JF @@ -772,7 +779,7 @@ function get_jacobian!( ) where { FT,VGF<:AbstractVectorGradientFunction{<:InplaceEvaluation,FT,<:ComponentVectorialType} } - for i in 1:n + for i in 1:(vgf.range_dimension) vgf.jacobian!![i](M, X, p) JF[i, :] .= get_coordinates(M, p, X, basis) end @@ -847,8 +854,7 @@ function get_gradient( ) n = _vgf_index_to_length(i, vgf.range_dimension) pM = PowerManifold(M, range, n) - P = fill(p, pM) - X = zero_vector(pM, P) + X = zero_vector(pM, fill(p, pM)) return get_gradient!(M, X, vgf, p, i, range) end # (c) Special cases where allocations can be skipped @@ -994,8 +1000,7 @@ function get_gradient!( ) where {FT<:AbstractVectorialType} # a type wise safe way to allocate what usually should yield a n-times-d matrix pM = PowerManifold(M, range, vgf.range_dimension...) - P = fill(p, pM) - Y = zero_vector(pM, P) + Y = zero_vector(pM, fill(p, pM)) JF = reshape( get_coordinates(pM, fill(p, pM), Y, vgf.jacobian_type.basis), power_dimensions(pM)..., diff --git a/test/plans/test_nonlinear_least_squares_plan.jl b/test/plans/test_nonlinear_least_squares_plan.jl index 9fa5bec20e..b6999c87d2 100644 --- a/test/plans/test_nonlinear_least_squares_plan.jl +++ b/test/plans/test_nonlinear_least_squares_plan.jl @@ -1,14 +1,141 @@ using Manifolds, Manopt, Test -@testset "Nonlinear lest squares plane" begin - @testset "Test cost/residual/jacobian cases with smoothing" begin end +@testset "Nonlinear lest squares plan" begin + @testset "Test cost/residual/jacobian cases with smoothing" begin + # a simple nlso objetive on R2 + M = Euclidean(2) + d1 = [1, 0] + d2 = [0, 1] + f1(M, x) = norm(x - d1) + f2(M, x) = norm(x - d2) + f(M, x) = [f1(M, x), f2(M, x)] + # Components + f!(M, V, x) = (V .= [f1(M, x), f2(M, x)]) + j1(M, x) = (x - d1) / norm(x - d1) + j1!(M, X, x) = (X .= (x - d1) / norm(x - d1)) + j2(M, x) = (x - d2) / norm(x - d2) + j2!(M, X, x) = (X .= (x - d2) / norm(x - d2)) + # Function + JF(M, x) = [j1(M, x), j2(M, x)] + JF!(M, JF, x) = (JF .= [j1(M, x), j2(M, x)]) + # Jacobi matrix + J(M, x) = cat(j1(M, x), j2(M, x); dims=2) + J!(M, J, x) = (J .= cat(j1(M, x), j2(M, x); dims=2)) + # Smoothing types + s1 = Manopt.smoothing_factory(:Identity) + s2 = Manopt.smoothing_factory((:Identity, 2)) + + # Test all (new) possible combinations of vectorial cost and Jacobian + # (1) [F]unction (Gradient), [C]omponent (Gradients), [J] Coordinate (Jacobian in Basis) + # (2) [a]llocating [i] inplace + # (3) [s] single smoothing [v] vector smoothing + nlsoFas = NonlinearLeastSquaresObjective( + f, JF, 2; jacobian_type=FunctionVectorialType(), smoothing=s1 + ) + nlsoFav = NonlinearLeastSquaresObjective( + f, JF, 2; jacobian_type=FunctionVectorialType(), smoothing=s2 + ) + nlsoFis = NonlinearLeastSquaresObjective( + f!, + JF!, + 2; + evaluation=InplaceEvaluation(), + jacobian_type=FunctionVectorialType(), + smoothing=s1, + ) + nlsoFiv = NonlinearLeastSquaresObjective( + f!, + JF!, + 2; + evaluation=InplaceEvaluation(), + jacobian_type=FunctionVectorialType(), + smoothing=s2, + ) + + nlsoCas = NonlinearLeastSquaresObjective( + [f1, f2], + [j1, j2], + 2; + function_type=ComponentVectorialType(), + jacobian_type=ComponentVectorialType(), + smoothing=s1, + ) + nlsoCav = NonlinearLeastSquaresObjective( + [f1, f2], + [j1, j2], + 2; + function_type=ComponentVectorialType(), + jacobian_type=ComponentVectorialType(), + smoothing=s2, + ) + nlsoCis = NonlinearLeastSquaresObjective( + [f1, f2], + [j1!, j2!], + 2; + function_type=ComponentVectorialType(), + jacobian_type=ComponentVectorialType(), + evaluation=InplaceEvaluation(), + smoothing=s1, + ) + nlsoCiv = NonlinearLeastSquaresObjective( + [f1, f2], + [j1!, j2!], + 2; + function_type=ComponentVectorialType(), + jacobian_type=ComponentVectorialType(), + evaluation=InplaceEvaluation(), + smoothing=s2, + ) + + nlsoJas = NonlinearLeastSquaresObjective(f, J, 2; smoothing=s1) + nlsoJav = NonlinearLeastSquaresObjective(f, J, 2; smoothing=s2) + nlsoJis = NonlinearLeastSquaresObjective( + f!, J!, 2; evaluation=InplaceEvaluation(), smoothing=s1 + ) + nlsoJiv = NonlinearLeastSquaresObjective( + f!, J!, 2; evaluation=InplaceEvaluation(), smoothing=s2 + ) + + p = [0.5, 0.5] + V = [0.0, 0.0] + Vt = [1 / sqrt(2), 1 / sqrt(2)] + G = zeros(2, 2) + Gt = 1 / sqrt(2) .* [-1.0 1.0; 1.0 -1.0] + for nlso in [ + nlsoFas, + nlsoFav, + nlsoFis, + nlsoFiv, + nlsoCas, + nlsoCav, + nlsoCis, + nlsoCiv, + nlsoJas, + nlsoJav, + nlsoJis, + nlsoJiv, + ] + c = get_cost(M, nlso, p) + @test c ≈ 0.5 + fill!(V, 0.0) + get_residuals!(M, V, nlso, p) + @test V == get_residuals(M, nlso, p) + @test V ≈ Vt + @test 0.5 * sum(abs.(V) .^ 2) ≈ c + fill!(G, 0.0) + get_jacobian!(M, G, nlso, p) + @test G == get_jacobian(M, nlso, p) + @test G == Gt + end + end @testset "Smootthing factory" begin s1 = Manopt.smoothing_factory(:Identity) @test s1 isa ManifoldHessianObjective - + @test Manopt.smoothing_factory(s1) === s1 # Passthrough for mhos s2 = Manopt.smoothing_factory((:Identity, 2)) @test s2 isa VectorHessianFunction @test length(s2) == 2 + @test Manopt.smoothing_factory(s2) === s2 # Passthrough for vhfs s3 = Manopt.smoothing_factory((:Identity, 3.0)) @test s3 isa ManifoldHessianObjective From e4be25526cea106d38774a0664e72246c4c5c3ae Mon Sep 17 00:00:00 2001 From: Ronny Bergmann Date: Mon, 30 Dec 2024 16:00:22 +0100 Subject: [PATCH 22/34] Refine factory. --- src/plans/nonlinear_least_squares_plan.jl | 56 +++++++------------ .../test_nonlinear_least_squares_plan.jl | 13 ++++- 2 files changed, 30 insertions(+), 39 deletions(-) diff --git a/src/plans/nonlinear_least_squares_plan.jl b/src/plans/nonlinear_least_squares_plan.jl index 4cf927a92e..4df62d3537 100644 --- a/src/plans/nonlinear_least_squares_plan.jl +++ b/src/plans/nonlinear_least_squares_plan.jl @@ -150,24 +150,6 @@ function get_cost( ) end -function get_jacobian( - dmp::DefaultManoptProblem{mT,<:NonlinearLeastSquaresObjective}, p; kwargs... -) where {mT} - nlso = get_objective(dmp) - M = get_manifold(dmp) - J = zeros(length(nlso.objective), manifold_dimension(M)) - get_jacobian!(M, J, nlso, p; kwargs...) - return J -end -function get_jacobian!( - dmp::DefaultManoptProblem{mT,<:NonlinearLeastSquaresObjective}, J, p; kwargs... -) where {mT} - nlso = get_objective(dmp) - M = get_manifold(dmp) - get_jacobian!(M, J, nlso, p; kwargs...) - return J -end - function get_jacobian( M::AbstractManifold, nlso::NonlinearLeastSquaresObjective, p; kwargs... ) @@ -446,6 +428,7 @@ mutable struct LevenbergMarquardtState{ end end +function smoothing_factory end """ smoothing_factory(s::Symbol=:Identity) smoothing_factory((s,α)::Tuple{Union{Symbol, ManifoldHessianObjective,<:Real}) @@ -489,7 +472,7 @@ containing all smoothing functions with their repetitions mentioned Note that in the implementation the second derivative follows the general scheme of hessians and actually implements s''(x)[X] = s''(x)X``. """ -function smoothing_factory(s) end +smoothing_factory(s) smoothing_factory() = smoothing_factory(:Identity) smoothing_factory(o::ManifoldHessianObjective) = o @@ -497,6 +480,9 @@ smoothing_factory(o::VectorHessianFunction) = o function smoothing_factory(s::Symbol) return ManifoldHessianObjective(_smoothing_factory(Val(s))...) end +function _smoothing_factory(s::Symbol) + return _smoothing_factory(Val(s)) +end function smoothing_factory((s, α)::Tuple{Symbol,<:Real}) s, s_p, s_pp = _smoothing_factory(s, α) return ManifoldHessianObjective(s, s_p, s_pp) @@ -529,28 +515,22 @@ function smoothing_factory((o, k)::Tuple{ManifoldHessianObjective,<:Int}) hessian_type=ComponentVectorialType(), ) end -function smoothing_factory( - S::NTuple{ - n, - <:Union{ - Symbol, - ManifoldHessianObjective, - Tuple{Symbol,<:Int}, - Tuple{Symbol,<:Real}, - Tuple{ManifoldHessianObjective,<:Int}, - Tuple{ManifoldHessianObjective,<:Real}, - }, - } where {n}, -) +function smoothing_factory(S...) s = Function[] s_p = Function[] s_pp = Function[] # collect all functions including their copies into a large vector for t in S - _s, _s_p, _s_pp = _smoothing_factory(t...) - push!(s, _s...) - push!(s_p, _s_p...) - push!(s_pp, _s_pp...) + _s, _s_p, _s_pp = t isa Tuple ? _smoothing_factory(t...) : _smoothing_factory(t) + if _s isa Array + push!(s, _s...) + push!(s_p, _s_p...) + push!(s_pp, _s_pp...) + else + push!(s, _s) + push!(s_p, _s_p) + push!(s_pp, _s_pp) + end end k = length(s) return VectorHessianFunction( @@ -569,6 +549,10 @@ function _smoothing_factory(o::ManifoldHessianObjective) (E, x) -> get_gradient(E, o, x), (E, x, X) -> get_hessian(E, o, x, X) end +function _smoothing_factory(o::VectorHessianFunction) + return o.value!!, o.jacobian!!, o.hessians!! +end + function _smoothing_factory(o::ManifoldHessianObjective, α::Real) return (E, x) -> α^2 * get_cost(E, o, x / α^2), (E, x) -> get_gradient(E, o, x / α^2), diff --git a/test/plans/test_nonlinear_least_squares_plan.jl b/test/plans/test_nonlinear_least_squares_plan.jl index b6999c87d2..b317911740 100644 --- a/test/plans/test_nonlinear_least_squares_plan.jl +++ b/test/plans/test_nonlinear_least_squares_plan.jl @@ -129,8 +129,14 @@ using Manifolds, Manopt, Test end end @testset "Smootthing factory" begin - s1 = Manopt.smoothing_factory(:Identity) + s1 = Manopt.smoothing_factory() @test s1 isa ManifoldHessianObjective + s1s = Manopt.smoothing_factory((s1, 2.0)) + @test s1s isa ManifoldHessianObjective + s1v = Manopt.smoothing_factory((s1, 3)) + @test s1v isa VectorHessianFunction + @test length(s1v) == 3 + @test Manopt.smoothing_factory(s1) === s1 # Passthrough for mhos s2 = Manopt.smoothing_factory((:Identity, 2)) @test s2 isa VectorHessianFunction @@ -145,8 +151,9 @@ using Manifolds, Manopt, Test @test s4 isa ManifoldHessianObjective end - s5 = Manopt.smoothing_factory(((:Identity, 2), (:Huber, 3))) + # Combine all different types + s5 = Manopt.smoothing_factory((:Identity, 2), (:Huber, 3), s1, :Tukey, s2) @test s5 isa VectorHessianFunction - @test length(s5) == 5 + @test length(s5) == 9 end end From 2261efb2b4f8484bc1bf1480b09e16a8a58f3c92 Mon Sep 17 00:00:00 2001 From: Ronny Bergmann Date: Mon, 30 Dec 2024 16:36:20 +0100 Subject: [PATCH 23/34] Finish testing. --- src/plans/nonlinear_least_squares_plan.jl | 7 +- src/plans/vectorial_plan.jl | 67 +++++++++---------- .../test_nonlinear_least_squares_plan.jl | 23 ++++++- 3 files changed, 58 insertions(+), 39 deletions(-) diff --git a/src/plans/nonlinear_least_squares_plan.jl b/src/plans/nonlinear_least_squares_plan.jl index 4df62d3537..594e33293d 100644 --- a/src/plans/nonlinear_least_squares_plan.jl +++ b/src/plans/nonlinear_least_squares_plan.jl @@ -433,8 +433,9 @@ function smoothing_factory end smoothing_factory(s::Symbol=:Identity) smoothing_factory((s,α)::Tuple{Union{Symbol, ManifoldHessianObjective,<:Real}) smoothing_factory((s,k)::Tuple{Union{Symbol, ManifoldHessianObjective,<:Int}) - smoothing_factory(S::NTuple{n, <:Union{Symbol, Tuple{S, Int} S<: Tuple{Symbol, <:Real}}} where n) smoothing_factory(o::ManifoldHessianObjective) + smoothing_factory(o::VectorHessianFunction) + smoothing_factory(s...) Create a smoothing function from a symbol `s`. @@ -447,8 +448,8 @@ which yields ``s_α'(x) = s'$(_tex(:bigl))($(_tex(:frac, "x", "α^2"))$(_tex(:bi For a tuple `(s, k)`, a [`VectorHessianFunction`](@ref) is returned, where every component is the smooting function indicated by `s` If the argument already is a [`VectorHessianFunction`](@ref), it is returned unchanged. -Finally for a tuple containing the above four cases, a [`VectorHessianFunction`](@ref) is returned, -containing all smoothing functions with their repetitions mentioned +Finally passing a sequence of these different cases as `s...`, a [`VectorHessianFunction`](@ref) is returned, +containing all smoothing functions with their repetitions mentioned as a vectorial function. # Examples diff --git a/src/plans/vectorial_plan.jl b/src/plans/vectorial_plan.jl index 7168d96d4f..90a2ba9302 100644 --- a/src/plans/vectorial_plan.jl +++ b/src/plans/vectorial_plan.jl @@ -516,7 +516,7 @@ get_hessian_function(vgf::VectorHessianFunction, recursive::Bool=false) = vgf.he # A small helper function to change the basis of a Jacobian """ - _change_basis!(M::AbstractManifold, p, JF, from_basis::B1, to_basis::B; X=zero_vector(M,p)) + _change_basis!(M::AbstractManifold, JF, p, from_basis::B1, to_basis::B; X=zero_vector(M,p)) Given a jacobian matrix `JF` on a manifold `M` at `p` with respect to the `from_basis` in the tangent space of `p` on `M`. Change the basis of the Jacobian to `to_basis` in place of `JF`. @@ -525,7 +525,7 @@ in the tangent space of `p` on `M`. Change the basis of the Jacobian to `to_basi * `X` a temporary vector to store a generated vector, before decomposing it again with respect to the new basis """ function _change_basis!( - M, p, JF, from_basis::B1, to_basis::B2; X=zero_vector(M, p) + M, JF, p, from_basis::B1, to_basis::B2; X=zero_vector(M, p) ) where {B1<:AbstractBasis,B2<:AbstractBasis} # change every row to new basis for i in 1:size(JF, 1) # every row @@ -535,8 +535,11 @@ function _change_basis!( return JF end # case we have the same basis: nothing to do, just return JF -_change_basis!(M, p, JF, from_basis::B, to_basis_new) where {B<:AbstractBasis} = JF - +function _change_basis!( + M, JF, p, from_basis::B, to_basis_new::B; kwargs... +) where {B<:AbstractBasis} + return JF +end _doc_get_jacobian_vgf = """ get_jacobian(M::AbstractManifold, vgf::AbstractVectorGradientFunction, p; kwargs...) get_jacobian(M::AbstractManifold, J, vgf::AbstractVectorGradientFunction, p; kwargs...) @@ -580,13 +583,17 @@ get_jacobian(::AbstractManifold, ::AbstractVectorGradientFunction, p; kwargs...) # (a) We have a single gradient function function get_jacobian( M::AbstractManifold, - vgf::AbstractVectorGradientFunction{<:AllocatingEvaluation,FT,<:FunctionVectorialType}, + vgf::VGF, p; basis::B=get_basis(vgf.jacobian_type), range::AbstractPowerRepresentation=get_range(vgf.jacobian_type), -) where {FT,B<:AbstractBasis} +) where { + FT, + VGF<:AbstractVectorGradientFunction{<:AllocatingEvaluation,FT,<:FunctionVectorialType}, + B<:AbstractBasis, +} n = vgf.range_dimension - d = manifold_dimension(M, p) + d = manifold_dimension(M) gradients = vgf.jacobian!!(M, p) mP = PowerManifold(M, range, vgf.range_dimension) # generate the first row to get an eltype @@ -600,14 +607,13 @@ function get_jacobian( end # (b) We have a vector of gradient functions function get_jacobian( - M::AbstractManifold, - vgf::AbstractVectorGradientFunction{<:AllocatingEvaluation,FT,<:ComponentVectorialType}, - p; - basis=get_basis(vgf.jacobian_type), - kwargs..., -) where {FT} + M::AbstractManifold, vgf::VGF, p; basis=get_basis(vgf.jacobian_type), kwargs... +) where { + FT, + VGF<:AbstractVectorGradientFunction{<:AllocatingEvaluation,FT,<:ComponentVectorialType}, +} n = vgf.range_dimension - d = manifold_dimension(M, p) + d = manifold_dimension(M) # generate the first row to get an eltype c1 = get_coordinates(M, p, vgf.jacobian!![1](M, p), basis) JF = zeros(eltype(c1), n, d) @@ -619,16 +625,15 @@ function get_jacobian( end # (c) We have a Jacobian function function get_jacobian( - M::AbstractManifold, - vgf::AbstractVectorGradientFunction{ + M::AbstractManifold, vgf::VGF, p; basis=get_basis(vgf.jacobian_type), kwargs... +) where { + FT, + VGF<:AbstractVectorGradientFunction{ <:AllocatingEvaluation,FT,<:CoordinateVectorialType }, - p; - basis=get_basis(vgf.jacobian_type), - kwargs..., -) where {FT} +} JF = vgf.jacobian!!(M, p) - _change_basis!(JF, vgf.jacobian_type.basis, basis) + _change_basis!(M, p, JF, vgf.jacobian_type.basis, basis) return JF end @@ -642,7 +647,7 @@ function get_jacobian( range::Union{AbstractPowerRepresentation,Nothing}=get_range(vgf.jacobian_type), ) where {FT,B<:AbstractBasis} n = vgf.range_dimension - d = manifold_dimension(M, p) + d = manifold_dimension(M) mP = PowerManifold(M, range, vgf.range_dimension) gradients = zero_vector(mP, fill(p, mP)) vgf.jacobian!!(M, gradients, p) @@ -663,7 +668,7 @@ function get_jacobian( basis=get_basis(vgf.jacobian_type), ) where {FT} n = vgf.range_dimension - d = manifold_dimension(M, p) + d = manifold_dimension(M) # generate the first row to get an eltype X = zero_vector(M, p) vgf.jacobian!![1](M, X, p) @@ -675,9 +680,6 @@ function get_jacobian( JF[i, :] .= get_coordinates(M, p, X, basis) end return JF - JF = vgf.jacobian!!(M, p) - _change_basis!(JF, vgf.jacobian_type.basis, basis) - return JF end # (c) We have a Jacobian function function get_jacobian( @@ -686,10 +688,10 @@ function get_jacobian( p; basis=get_basis(vgf.jacobian_type), ) where {FT} - c = get_coordinats(M, p, zero_vector(M, p)) - JF = zeros(eltype(c), vgf.range_dimension, manifold_dimension(M, p)) + c = get_coordinates(M, p, zero_vector(M, p)) + JF = zeros(eltype(c), vgf.range_dimension, manifold_dimension(M)) vgf.jacobian!!(M, JF, p) - _change_basis!(JF, vgf.jacobian_type.basis, basis) + _change_basis!(M, JF, p, vgf.jacobian_type.basis, basis) return JF end @@ -742,7 +744,7 @@ function get_jacobian!( }, } JF .= vgf.jacobian!!(M, p) - _change_basis!(M, p, JF, vgf.jacobian_type.basis, basis) + _change_basis!(M, JF, p, vgf.jacobian_type.basis, basis) return JF end @@ -784,9 +786,6 @@ function get_jacobian!( JF[i, :] .= get_coordinates(M, p, X, basis) end return JF - JF = vgf.jacobian!!(M, p) - _change_basis!(JF, vgf.jacobian_type.basis, basis) - return JF end # (c) We have a Jacobian function function get_jacobian!( @@ -795,7 +794,7 @@ function get_jacobian!( FT,VGF<:AbstractVectorGradientFunction{<:InplaceEvaluation,FT,<:CoordinateVectorialType} } vgf.jacobian!!(M, JF, p) - _change_basis!(M, p, JF, vgf.jacobian_type.basis, basis) + _change_basis!(M, JF, p, vgf.jacobian_type.basis, basis) return JF end diff --git a/test/plans/test_nonlinear_least_squares_plan.jl b/test/plans/test_nonlinear_least_squares_plan.jl index b317911740..2ba260c29a 100644 --- a/test/plans/test_nonlinear_least_squares_plan.jl +++ b/test/plans/test_nonlinear_least_squares_plan.jl @@ -87,8 +87,12 @@ using Manifolds, Manopt, Test smoothing=s2, ) - nlsoJas = NonlinearLeastSquaresObjective(f, J, 2; smoothing=s1) - nlsoJav = NonlinearLeastSquaresObjective(f, J, 2; smoothing=s2) + nlsoJas = NonlinearLeastSquaresObjective( + f, J, 2; jacobian_type=CoordinateVectorialType(), smoothing=s1 + ) + nlsoJav = NonlinearLeastSquaresObjective( + f, J, 2; jacobian_type=CoordinateVectorialType(), smoothing=s2 + ) nlsoJis = NonlinearLeastSquaresObjective( f!, J!, 2; evaluation=InplaceEvaluation(), smoothing=s1 ) @@ -126,8 +130,23 @@ using Manifolds, Manopt, Test get_jacobian!(M, G, nlso, p) @test G == get_jacobian(M, nlso, p) @test G == Gt + # since s1/s2 are the identity we can also always check agains the allocating + # jacobian of the objective + G2 = get_jacobian(M, nlso.objective, p) + @test G2 == Gt end end + @testset "Test Change of basis" begin + J = ones(2, 2) + Jt = ones(2, 2) + M = Euclidean(2) + p = [0.5, 0.5] + B1 = DefaultBasis() + B2 = DefaultOrthonormalBasis() + Manopt._change_basis!(M, J, p, B1, B2) + # In practice both are the same basis in coordinates, so Jtt stays as iss + @test J == Jt + end @testset "Smootthing factory" begin s1 = Manopt.smoothing_factory() @test s1 isa ManifoldHessianObjective From 8dc437ffac9b58bac6b83055ed26ceda04ad4287 Mon Sep 17 00:00:00 2001 From: Ronny Bergmann Date: Mon, 30 Dec 2024 17:10:40 +0100 Subject: [PATCH 24/34] add the remaining tests. --- src/plans/vectorial_plan.jl | 6 +----- test/plans/test_vectorial_plan.jl | 2 ++ 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/src/plans/vectorial_plan.jl b/src/plans/vectorial_plan.jl index 90a2ba9302..c35729cd24 100644 --- a/src/plans/vectorial_plan.jl +++ b/src/plans/vectorial_plan.jl @@ -1164,11 +1164,7 @@ function get_value!( i=:, ) c = vgf.value!!(M, p) - if isa(c, Number) - V .= c - else - V .= c[i] - end + V .= c[i] return V end diff --git a/test/plans/test_vectorial_plan.jl b/test/plans/test_vectorial_plan.jl index 0f3b25a1ef..745ad25982 100644 --- a/test/plans/test_vectorial_plan.jl +++ b/test/plans/test_vectorial_plan.jl @@ -57,6 +57,8 @@ using Manopt: get_value, get_value_function, get_gradient_function jacobian_type=CoordinateVectorialType(DefaultOrthonormalBasis()), evaluation=InplaceEvaluation(), ) + @test Manopt.get_jacobian_basis(vgf_ji) == vgf_ji.jacobian_type.basis + @test Manopt.get_jacobian_basis(vgf_vi) == DefaultOrthonormalBasis() p = [1.0, 2.0, 3.0] c = [0.0, -3.0] gg = [[1.0, 0.0, 0.0], [0.0, -1.0, 0.0]] From 8b9cbcbafd46ee06848921e59fda5e7b3ea852c4 Mon Sep 17 00:00:00 2001 From: Ronny Bergmann Date: Mon, 30 Dec 2024 17:31:16 +0100 Subject: [PATCH 25/34] Add changelog entry. --- Changelog.md | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/Changelog.md b/Changelog.md index 4be3383e6f..299ebc2bc0 100644 --- a/Changelog.md +++ b/Changelog.md @@ -9,13 +9,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added -* the Levenberg-Marquardt algorithm internally uses a `VectorGradientFunction`, -which males a “gradient matrix” and a vector of gradient functions available additionally to -a Jacobian, that requires bases for tangent spaces +* the Levenberg-Marquardt algorithm internally uses a `VectorGradientFunction`, which allows + to use a vector of gradients of a function returning all gradients as wel for the algorithm +* smoothing is now available for the Levenberg-Marquardt algorithm + ### Changed * Minimum Julia version is now 1.10 (the LTS which replaced 1.6) +* The vectorial functions had a bug where the original vector function for the mutating case + was not always treated as mutating. ### Removed From 304426a72cc087f7389da727250b8c1a27d8221f Mon Sep 17 00:00:00 2001 From: Ronny Bergmann Date: Mon, 30 Dec 2024 18:06:10 +0100 Subject: [PATCH 26/34] Fix a final function. --- src/plans/vectorial_plan.jl | 9 ++++----- test/plans/test_vectorial_plan.jl | 10 ++++++++++ 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/src/plans/vectorial_plan.jl b/src/plans/vectorial_plan.jl index c35729cd24..839a83a570 100644 --- a/src/plans/vectorial_plan.jl +++ b/src/plans/vectorial_plan.jl @@ -798,12 +798,11 @@ function get_jacobian!( return JF end -get_jacobian_basis(vgf::AbstractVectorGradientFunction) = DefaultOrthonormalBasis() -function get_jacobian_basis( - vgf::AbstractVectorGradientFunction{F,G,<:CoordinateVectorialType} -) where {F,G} - return vgf.jacobian_type.basis +function get_jacobian_basis(vgf::AbstractVectorGradientFunction) + return _get_jacobian_basis(vgf.jacobian_type) end +_get_jacobian_basis(jt::AbstractVectorialType) = DefaultOrthonormalBasis() +_get_jacobian_basis(jt::CoordinateVectorialType) = jt.basis # # diff --git a/test/plans/test_vectorial_plan.jl b/test/plans/test_vectorial_plan.jl index 745ad25982..c136dd2f70 100644 --- a/test/plans/test_vectorial_plan.jl +++ b/test/plans/test_vectorial_plan.jl @@ -59,6 +59,16 @@ using Manopt: get_value, get_value_function, get_gradient_function ) @test Manopt.get_jacobian_basis(vgf_ji) == vgf_ji.jacobian_type.basis @test Manopt.get_jacobian_basis(vgf_vi) == DefaultOrthonormalBasis() + vgf_jib = VectorGradientFunction( + g!, + jac_g!, + 2; + jacobian_type=CoordinateVectorialType(DefaultBasis()), + evaluation=InplaceEvaluation(), + ) + @test Manopt.get_jacobian_basis(vgf_ji) == vgf_ji.jacobian_type.basis + @test Manopt.get_jacobian_basis(vgf_jib) == DefaultBasis() + @test Manopt.get_jacobian_basis(vgf_vi) == DefaultOrthonormalBasis() p = [1.0, 2.0, 3.0] c = [0.0, -3.0] gg = [[1.0, 0.0, 0.0], [0.0, -1.0, 0.0]] From b903316b74c2fbd6da9ac14c5d56648264876627 Mon Sep 17 00:00:00 2001 From: Ronny Bergmann Date: Wed, 1 Jan 2025 16:58:57 +0100 Subject: [PATCH 27/34] Remove smoothing. --- Changelog.md | 3 +- docs/src/solvers/LevenbergMarquardt.md | 6 - src/Manopt.jl | 1 - src/documentation_glossary.jl | 4 +- src/plans/nonlinear_least_squares_plan.jl | 324 +----------------- src/solvers/LevenbergMarquardt.jl | 12 +- .../test_nonlinear_least_squares_plan.jl | 105 +----- 7 files changed, 28 insertions(+), 427 deletions(-) diff --git a/Changelog.md b/Changelog.md index 299ebc2bc0..73191c629a 100644 --- a/Changelog.md +++ b/Changelog.md @@ -11,8 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 * the Levenberg-Marquardt algorithm internally uses a `VectorGradientFunction`, which allows to use a vector of gradients of a function returning all gradients as wel for the algorithm -* smoothing is now available for the Levenberg-Marquardt algorithm - +* The `VectorGradientFunction` now also have a `get_jacobian` function ### Changed diff --git a/docs/src/solvers/LevenbergMarquardt.md b/docs/src/solvers/LevenbergMarquardt.md index ea998307ed..82d7cd38a5 100644 --- a/docs/src/solvers/LevenbergMarquardt.md +++ b/docs/src/solvers/LevenbergMarquardt.md @@ -15,12 +15,6 @@ LevenbergMarquardt! LevenbergMarquardtState ``` -## Available smoothing functions - -```@docs -Manopt.smoothing_factory -``` - ## [Technical details](@id sec-lm-technical-details) The [`LevenbergMarquardt`](@ref) solver requires the following functions of a manifold to be available diff --git a/src/Manopt.jl b/src/Manopt.jl index 793befaa7b..091f0accfc 100644 --- a/src/Manopt.jl +++ b/src/Manopt.jl @@ -391,7 +391,6 @@ export get_state, forward_operator!, get_objective, get_unconstrained_objective -export smoothing_factory export get_hessian, get_hessian! export ApproxHessianFiniteDifference export is_state_decorator, dispatch_state_decorator diff --git a/src/documentation_glossary.jl b/src/documentation_glossary.jl index c6b90244c0..1e030a4c94 100644 --- a/src/documentation_glossary.jl +++ b/src/documentation_glossary.jl @@ -268,13 +268,11 @@ define!( :NonLinearLeastSquares, (; M="M", p="p") -> """ ```math -$(_tex(:argmin))_{$p ∈ $(_math(:M; M=M))} $(_tex(:frac,1,2)) $(_tex(:sum))_{i=1}^n s_i$(_tex(:bigl))($(_tex(:bigl))( $(_tex(:abs, "f_i($p)"))^2 $(_tex(:bigr))) +$(_tex(:argmin))_{$p ∈ $(_math(:M; M=M))} $(_tex(:frac,1,2)) $(_tex(:sum))_{i=1}^n $(_tex(:abs, "f_i($p)"))^2 ``` where ``f: $(_math(:M; M=M)) → ℝ^n`` is written with component functions ``f_i: $(_math(:M; M=M)) → ℝ``, and each component function is continuously differentiable. -The function ``s_i: ℝ → ℝ`` can be seen as smoothing or regularisation of the least squares term. -It is assumed to be twice continuously differentiable and its default is ``s_i(x) = x`` for all ``i=1,…n``. """, ) diff --git a/src/plans/nonlinear_least_squares_plan.jl b/src/plans/nonlinear_least_squares_plan.jl index 594e33293d..5f94e2fb08 100644 --- a/src/plans/nonlinear_least_squares_plan.jl +++ b/src/plans/nonlinear_least_squares_plan.jl @@ -11,17 +11,14 @@ Specify a nonlinear least squares problem # Fields * `objective`: a [`AbstractVectorGradientFunction`](@ref)`{E}` containing both the vector of cost functions ``f_i`` as well as their gradients ``$(_tex(:grad)) f_i``` -* `smoothing`: a [`ManifoldHessianObjective`](@ref) or a [`Vector of a smoothing function ``s: ℝ → ℝ``, hence including its first and second derivatives ``s'`` and ``s''``. This `NonlinearLeastSquaresObjective` then has the same [`AbstractEvaluationType`](@ref) `T` as the (inner) `objective`. -The smoothing is expected to be be of [`AllocatingEvaluation`](@ref) type, -since it works on a one-dimensional vector space ``ℝ`` or each of its components does. # Constructors NonlinearLeastSquaresObjective(f, jacobian, range_dimension; kwargs...) - NonlinearLeastSquaresObjective(vf::AbstractVectorGradientFunction, smoothing = :Identity) + NonlinearLeastSquaresObjective(vf::AbstractVectorGradientFunction) # Arguments @@ -30,7 +27,6 @@ since it works on a one-dimensional vector space ``ℝ`` or each of its componen * `range_dimension` the number of dimensions `m` the function `f` maps into These three can also be passed as a [`AbstractVectorGradientFunction`](@ref) `vf` already. -For this second constructor, the `smoothing=` keyword is the second positional argument. # Keyword arguments @@ -38,20 +34,15 @@ $(_var(:Keyword, :evaluation)) * `function_type::`[`AbstractVectorialType`](@ref)`=`[`FunctionVectorialType`](@ref)`()`: format the * `jacobian_tangent_basis::AbstractBasis=DefaultOrthonormalBasis()`; shortcut to specify the basis the Jacobian matrix is build with. * `jacobian_type::`[`AbstractVectorialType`](@ref)`=`[`CoordinateVectorialType`](@ref)`(jacobian_tangent_basis)`: specify the format the Jacobian is given in. By default a matrix of the differential with respect to a certain basis of the tangent space. -* `smoothing::Union{`[`ManifoldHessianObjective`](@ref),[`VectorHessianFunction`](@ref)`,Symbol}=:Identity`: specify the type of smoothing, - either applied to all components, a vector for each component or a symbol passed to the [`smoothing_factory`](@ref) of predefined smoothing types. # See also [`LevenbergMarquardt`](@ref), [`LevenbergMarquardtState`](@ref) """ struct NonlinearLeastSquaresObjective{ - E<:AbstractEvaluationType, - F<:AbstractVectorGradientFunction{E}, - R<:Union{AbstractManifoldHessianObjective,VectorHessianFunction}, + E<:AbstractEvaluationType,F<:AbstractVectorGradientFunction{E} } <: AbstractManifoldGradientObjective{E,F,F} objective::F - smoothing::R end function NonlinearLeastSquaresObjective( @@ -75,62 +66,18 @@ function NonlinearLeastSquaresObjective( return NonlinearLeastSquaresObjective(vgf; kwargs...) end -function NonlinearLeastSquaresObjective( - vgf::F; smoothing=:Identity -) where {F<:AbstractVectorGradientFunction} - s = smoothing_factory(smoothing) - return NonlinearLeastSquaresObjective(vgf, s) -end - # Cost -# (a) with for a single smoothing function function get_cost( M::AbstractManifold, nlso::NonlinearLeastSquaresObjective{ - E,<:AbstractVectorFunction{E,<:ComponentVectorialType},H + E,<:AbstractVectorFunction{E,<:ComponentVectorialType} }, p; - vector_space=Rn, - kwargs..., -) where {E<:AbstractEvaluationType,H<:AbstractManifoldHessianObjective} - v = 0.0 - for i in 1:length(nlso.objective) - v += get_cost( - vector_space(1), nlso.smoothing, abs(get_value(M, nlso.objective, p, i))^2 - ) - end - v *= 0.5 - return v -end -function get_cost( - M::AbstractManifold, - nlso::NonlinearLeastSquaresObjective{ - E,<:AbstractVectorFunction{E,<:FunctionVectorialType},H - }, - p; - vector_space=Rn, - value_cache=get_value(M, nlso.objective, p), -) where {E<:AbstractEvaluationType,H<:AbstractManifoldHessianObjective} - return 0.5 * sum( - get_cost(vector_space(1), nlso.smoothing, abs(value_cache[i])^2) for - i in 1:length(value_cache) - ) -end -# (b) vectorial ρ -function get_cost( - M::AbstractManifold, - nlso::NonlinearLeastSquaresObjective{ - E,<:AbstractVectorFunction{E,<:ComponentVectorialType},<:AbstractVectorFunction - }, - p; - vector_space=Rn, kwargs..., ) where {E<:AbstractEvaluationType} v = 0.0 for i in 1:length(nlso.objective) - v += get_value( - vector_space(1), nlso.smoothing, abs(get_value(M, nlso.objective, p, i))^2, i - ) + v += abs(get_value(M, nlso.objective, p, i))^2 end v *= 0.5 return v @@ -138,16 +85,12 @@ end function get_cost( M::AbstractManifold, nlso::NonlinearLeastSquaresObjective{ - E,<:AbstractVectorFunction{E,<:FunctionVectorialType},<:AbstractVectorFunction + E,<:AbstractVectorFunction{E,<:FunctionVectorialType} }, p; - vector_space=Rn, value_cache=get_value(M, nlso.objective, p), ) where {E<:AbstractEvaluationType} - return 0.5 * sum( - get_value(vector_space(1), nlso.smoothing, abs(value_cache[i])^2, i) for - i in 1:length(value_cache) - ) + return 0.5 * sum(abs(value_cache[i])^2 for i in 1:length(value_cache)) end function get_jacobian( @@ -157,39 +100,13 @@ function get_jacobian( get_jacobian!(M, J, nlso, p; kwargs...) return J end -# Cases: (a) single smoothing function +# The jacobian is now just a pass-through function get_jacobian!( - M::AbstractManifold, - J, - nlso::NonlinearLeastSquaresObjective{E,AHVF,<:AbstractManifoldGradientObjective}, - p; - vector_space=Rn, - value_cache=get_value(M, nlso.objective, p), - kwargs..., -) where {E,AHVF} + M::AbstractManifold, J, nlso::NonlinearLeastSquaresObjective, p; kwargs... +) get_jacobian!(M, J, nlso.objective, p; kwargs...) - for i in 1:length(nlso.objective) # s'(f_i(p)) * f_i'(p) - J[i, :] .*= get_gradient(vector_space(1), nlso.smoothing, value_cache[i]) - end - return J -end -# Cases: (b) vectorial smoothing function -function get_jacobian!( - M::AbstractManifold, - J, - nlso::NonlinearLeastSquaresObjective{E,AHVF,<:AbstractVectorGradientFunction}, - p; - vector_space=Rn, - basis::AbstractBasis=get_basis(nlso.objective.jacobian_type), - value_cache=get_value(M, nlso.objective, p), -) where {E,AHVF} - get_jacobian!(M, J, nlso.objective, p; basis=basis) - for i in 1:length(nlso.objective) # s_i'(f_i(p)) * f_i'(p) - J[i, :] .*= get_gradient(vector_space(1), nlso.smoothing, value_cache[i], i) - end return J end - function get_gradient( M::AbstractManifold, nlso::NonlinearLeastSquaresObjective, p; kwargs... ) @@ -217,19 +134,11 @@ _doc_get_residuals_nlso = """ Compute the vector of residuals ``f_i(p)``, ``i=1,…,n`` given the manifold `M`, the [`NonlinearLeastSquaresObjective`](@ref) `nlso` and a current point ``p`` on `M`. - -# Keyword arguments - -* `vector_space=`[`Rn`](@ref)`: a vector space to use for evaluating the single - smoothing functions ``s_i`` on. -* `value_cache=`[`get_value`](@ref)`(M, nlso.objective, p)`: a cache to provide the - function evaltuation vector of the ``f_i(p)``, ``i=1,…,n`` in. """ @doc "$(_doc_get_residuals_nlso)" get_residuals(M::AbstractManifold, nlso::NonlinearLeastSquaresObjective, p; kwargs...) -# (a) with for a single smoothing function function get_residuals( M::AbstractManifold, nlso::NonlinearLeastSquaresObjective, p; kwargs... ) @@ -244,36 +153,7 @@ function get_residuals!( M::AbstractManifold, V, nlso::NonlinearLeastSquaresObjective{ - E,<:AbstractVectorFunction{E,<:ComponentVectorialType},H - }, - p; - kwargs..., -) where {E<:AbstractEvaluationType,H<:AbstractManifoldHessianObjective} - for i in 1:length(nlso.objective) - V[i] = get_value(M, nlso.objective, p, i) - end - return V -end -function get_residuals!( - M::AbstractManifold, - V, - nlso::NonlinearLeastSquaresObjective{ - E,<:AbstractVectorFunction{E,<:FunctionVectorialType},H - }, - p; - value_cache=get_value(M, nlso.objective, p), -) where {E<:AbstractEvaluationType,H<:AbstractManifoldHessianObjective} - for i in 1:length(value_cache) - V[i] = value_cache[i] - end - return V -end -# (b) vectorial ρ -function get_residuals!( - M::AbstractManifold, - V, - nlso::NonlinearLeastSquaresObjective{ - E,<:AbstractVectorFunction{E,<:ComponentVectorialType},<:AbstractVectorFunction + E,<:AbstractVectorFunction{E,<:ComponentVectorialType} }, p; kwargs..., @@ -287,14 +167,11 @@ function get_residuals!( M::AbstractManifold, V, nlso::NonlinearLeastSquaresObjective{ - E,<:AbstractVectorFunction{E,<:FunctionVectorialType},<:AbstractVectorFunction + E,<:AbstractVectorFunction{E,<:FunctionVectorialType} }, - p; - value_cache=get_value(M, nlso.objective, p), + p, ) where {E<:AbstractEvaluationType} - for i in 1:length(value_cache) - V[i] = value_cache[i] - end + get_value!(M, V, nlso.objective, p) return V end @@ -428,181 +305,6 @@ mutable struct LevenbergMarquardtState{ end end -function smoothing_factory end -""" - smoothing_factory(s::Symbol=:Identity) - smoothing_factory((s,α)::Tuple{Union{Symbol, ManifoldHessianObjective,<:Real}) - smoothing_factory((s,k)::Tuple{Union{Symbol, ManifoldHessianObjective,<:Int}) - smoothing_factory(o::ManifoldHessianObjective) - smoothing_factory(o::VectorHessianFunction) - smoothing_factory(s...) - -Create a smoothing function from a symbol `s`. - -For a single symbol `s`, the corresponding smoothing function is returned as a [`ManifoldHessianObjective`](@ref) -If the argument already is a [`ManifoldHessianObjective`](@ref), it is returned unchanged. - -For a tuple `(s, α)`, the smoothing function is scaled by `α` as ``s_α(x) = α s$(_tex(:bigl))($(_tex(:frac, "x", "α^2"))$(_tex(:bigr)))``, -which yields ``s_α'(x) = s'$(_tex(:bigl))($(_tex(:frac, "x", "α^2"))$(_tex(:bigr)))`` and ``s_α''(x)[X] = $(_tex(:bigl))($(_tex(:frac, "1", "α^2"))$(_tex(:bigr)))s''$(_tex(:bigl))($(_tex(:frac, "x", "α^2"))$(_tex(:bigr)))[X]``. - -For a tuple `(s, k)`, a [`VectorHessianFunction`](@ref) is returned, where every component is the smooting function indicated by `s` -If the argument already is a [`VectorHessianFunction`](@ref), it is returned unchanged. - -Finally passing a sequence of these different cases as `s...`, a [`VectorHessianFunction`](@ref) is returned, -containing all smoothing functions with their repetitions mentioned as a vectorial function. - -# Examples - -* `smoothing_factory(:Identity)`: returns the identity function as a single smoothing function -* `smoothing_factory(:Identity, 2)`: returns a [`VectorHessianFunction`](@ref) with two identity functions -* `smoothing_factory(:Identity, 2.0)`: returns a [`VectorHessianFunction`](@ref) with the identity ``s(x) = x`` function scaled to ``s_2(x)`` as described above -* `smoothing_factory(mho, 0.5)`: returns a [`ManifoldHessianObjective`](@ref) with the scaled variant of the given `mho`, for example the one returned in the first example -* `smoothing_factory( ( (:Identity, 2), (:Huber, 3) ))`: returns a [`VectorHessianFunction`](@ref) with 5 components, the first 2 `:Identity` the last 3 `:Huber` - -# Currently available smoothing functions - -| `Symbol` | ``s(x)`` | ``s'(x)`` | ``s''(x)`` | Comment | -|:------ |:-----:|:-----:|:-----:|:--------- | -| `:Arctan` | ``$(_tex(:rm, "arctan"))(x)`` | ``$(_tex(:displaystyle))$(_tex(:frac, "1", "x^2+1"))`` | ``-$(_tex(:displaystyle))$(_tex(:frac, "2x", "(x^2+1)^2"))`` | | -| `:Cauchy` | ``$(_tex(:log))(1+x)`` | ``$(_tex(:displaystyle))$(_tex(:frac, "1", "1+x"))`` | ``-$(_tex(:displaystyle))$(_tex(:frac, "1", "(1+x)^2"))`` | | -| `:Huber` | ``$(_tex(:cases, "x, & x ≤ 1,", "2$(_tex(:sqrt, "x")) - 1, & x > 1."))`` | ``$(_tex(:cases, "1, & x ≤ 1,", "$(_tex(:frac, "1", _tex(:sqrt, "x"))), & x > 1."))`` | ``$(_tex(:cases, "0, & x ≤ 1,", "-$(_tex(:frac, "1", "x^{3/2}")), & x > 1."))`` | | -| `:Identity` | ``x`` | ``1`` | ``0`` | default (no smoothing) | -| `:SoftL1` | ``2$(_tex(:sqrt, "1+x")) - 1`` | ``$(_tex(:displaystyle))$(_tex(:frac, "1", _tex(:sqrt, "1+x")))`` | ``-$(_tex(:displaystyle))$(_tex(:frac, "1", "2(1+x)^{3/2}"))`` | | -| `:Tukey` | ``$(_tex(:cases, "$(_tex(:frac, "1", "3"))-$(_tex(:frac, "1", "3"))(1-x)^3, & x ≤ 1,", "$(_tex(:frac, "1", "3")), & x > 1."))`` | ``$(_tex(:cases, "(1-x)^2, & x ≤ 1,", "0, & x > 1."))`` | ``$(_tex(:cases, "x-2, & x ≤ 1,", "0, & x > 1."))`` | | - -Note that in the implementation the second derivative follows the general scheme of hessians -and actually implements s''(x)[X] = s''(x)X``. -""" -smoothing_factory(s) - -smoothing_factory() = smoothing_factory(:Identity) -smoothing_factory(o::ManifoldHessianObjective) = o -smoothing_factory(o::VectorHessianFunction) = o -function smoothing_factory(s::Symbol) - return ManifoldHessianObjective(_smoothing_factory(Val(s))...) -end -function _smoothing_factory(s::Symbol) - return _smoothing_factory(Val(s)) -end -function smoothing_factory((s, α)::Tuple{Symbol,<:Real}) - s, s_p, s_pp = _smoothing_factory(s, α) - return ManifoldHessianObjective(s, s_p, s_pp) -end -function smoothing_factory((o, α)::Tuple{ManifoldHessianObjective,<:Real}) - s, s_p, s_pp = _smoothing_factory(o, α) - return ManifoldHessianObjective(s, s_p, s_pp) -end -function smoothing_factory((s, k)::Tuple{Symbol,<:Int}) - s, s_p, s_pp = _smoothing_factory(s, k) - return VectorHessianFunction( - s, - s_p, - s_pp, - k; - function_type=ComponentVectorialType(), - jacobian_type=ComponentVectorialType(), - hessian_type=ComponentVectorialType(), - ) -end -function smoothing_factory((o, k)::Tuple{ManifoldHessianObjective,<:Int}) - s, s_p, s_pp = _smoothing_factory(o, k) - return VectorHessianFunction( - s, - s_p, - s_pp, - k; - function_type=ComponentVectorialType(), - jacobian_type=ComponentVectorialType(), - hessian_type=ComponentVectorialType(), - ) -end -function smoothing_factory(S...) - s = Function[] - s_p = Function[] - s_pp = Function[] - # collect all functions including their copies into a large vector - for t in S - _s, _s_p, _s_pp = t isa Tuple ? _smoothing_factory(t...) : _smoothing_factory(t) - if _s isa Array - push!(s, _s...) - push!(s_p, _s_p...) - push!(s_pp, _s_pp...) - else - push!(s, _s) - push!(s_p, _s_p) - push!(s_pp, _s_pp) - end - end - k = length(s) - return VectorHessianFunction( - s, - s_p, - s_pp, - k; - function_type=ComponentVectorialType(), - jacobian_type=ComponentVectorialType(), - hessian_type=ComponentVectorialType(), - ) -end -# Inner functions that split any smoothing function into its ρ, ρ' and ρ'' parts -function _smoothing_factory(o::ManifoldHessianObjective) - return (E, x) -> get_cost(E, o, x), - (E, x) -> get_gradient(E, o, x), - (E, x, X) -> get_hessian(E, o, x, X) -end -function _smoothing_factory(o::VectorHessianFunction) - return o.value!!, o.jacobian!!, o.hessians!! -end - -function _smoothing_factory(o::ManifoldHessianObjective, α::Real) - return (E, x) -> α^2 * get_cost(E, o, x / α^2), - (E, x) -> get_gradient(E, o, x / α^2), - (E, x, X) -> get_hessian(E, o, x / α^2, X) / α^2 -end -function _smoothing_factory(s::Symbol, α::Real) - s, s_p, s_pp = _smoothing_factory(Val(s)) - return (E, x) -> α^2 * s(E, x / α^2), - (E, x) -> s_p(E, x / α^2), - (E, x, X) -> s_pp(E, x / α^2, X) / α^2 -end -function _smoothing_factory(o::ManifoldHessianObjective, k::Int) - return fill((E, x) -> get_cost(E, o, x), k), - fill((E, x) -> get_gradient(E, o, x), k), - fill((E, x, X) -> get_hessian(E, o, x, X), k) -end -function _smoothing_factory(s::Symbol, k::Int) - s, s_p, s_pp = _smoothing_factory(Val(s)) - return fill(s, k), fill(s_p, k), fill(s_pp, k) -end -# Library -function _smoothing_factory(::Val{:Arctan}) - return (E, x) -> arctan(x), - (E, x) -> x <= 1 / (x^2 + 1), - (E, x, X) -> 2 * x / (x^2 + 1)^2 -end -function _smoothing_factory(::Val{:Cauchy}) - return (E, x) -> log(1 + x), (E, x) -> x <= 1 / (1 + x), (E, x, X) -> -1 / (1 + x)^2 -end -function _smoothing_factory(::Val{:Huber}) - return (E, x) -> x <= 1 ? x : 2 * sqrt(x) - 1, - (E, x) -> x <= 1 ? 1 : 1 / sqrt(x), - (E, x, X) -> (x <= 1 ? 0 : -1 / (2x^(3 / 2))) * X -end -# Default -function _smoothing_factory(::Val{:Identity}) - return (E, x) -> x, (E, x) -> one(x), (E, x, X) -> zero(X) -end -function _smoothing_factory(::Val{:SoftL1}) - return (E, x) -> 2 * sqrt(1 + x) - 1, - (E, x) -> x <= 1 / sqrt(1 + x), - (E, x, X) -> -1 / (2 * (1 + x)^(3 / 2)) -end -function _smoothing_factory(::Val{:Tukey}) - return (E, x) -> x <= 1 ? 1 / 3 * (1 - (1 - x)^3) : 1 / 3, - (E, x) -> x <= 1 ? (1 - s)^2 : 0, - (E, x, X) -> (x <= 1 ? x - 2 : 0) * X -end - function show(io::IO, lms::LevenbergMarquardtState) i = get_count(lms, :Iterations) Iter = (i > 0) ? "After $i iterations\n" : "" diff --git a/src/solvers/LevenbergMarquardt.jl b/src/solvers/LevenbergMarquardt.jl index c3b6b44af5..23d48860fa 100644 --- a/src/solvers/LevenbergMarquardt.jl +++ b/src/solvers/LevenbergMarquardt.jl @@ -11,7 +11,7 @@ to solve $(_problem(:NonLinearLeastSquares)) -The second signature performs the optimization in-place of `p`. +The second block of signatures perform the optimization in-place of `p`. # Input @@ -35,8 +35,7 @@ $(_var(:Argument, :p)) for mutating evaluation this value must be explicitly specified. You can also provide the cost and its Jacobian already as a[`VectorGradientFunction`](@ref) `vgf`, -Alternatively, passing a [`NonlinearLeastSquaresObjective`](@ref) `nlso`, -then both the keyword `jacobian_tangent_basis` and the `smoothing` are ignored. +Alternatively, passing a [`NonlinearLeastSquaresObjective`](@ref) `nlso`. # Keyword arguments @@ -53,9 +52,6 @@ $(_var(:Keyword, :evaluation)) By default this is a vector of length `num_components` of similar type as `p`. * `jacobian_type=`[`FunctionVectorialType`](@ref)`: an [`AbstractVectorialType`](@ref) specifying the type of Jacobian provided. $(_var(:Keyword, :retraction_method)) -* `smoothing=:Identity`: specify a smoothing function ``s`` for all ``s_i=s`` as an [`ManifoldHessianObjective`](@ref) or a vector of smoothing functions ``(s_1,…s_n)`` together as a [`VectorHessianFunction`](@ref). - You can also generate them using certain symbols and parameters. For all available options see the [`smoothing_factory`](@ref). -$(_var(:Keyword, :stopping_criterion; default="[`StopAfterIteration`](@ref)`(200)`$(_sc(:Any))[`StopWhenGradientNormLess`](@ref)`(1e-12)`")) $(_note(:OtherKeywords)) @@ -113,10 +109,9 @@ function LevenbergMarquardt( vgf::VectorGradientFunction, p; evaluation::AbstractEvaluationType=AllocatingEvaluation(), - smoothing=:Identity, kwargs..., ) - nlso = NonlinearLeastSquaresObjective(vgf; smoothing=smoothing) + nlso = NonlinearLeastSquaresObjective(vgf) return LevenbergMarquardt(M, nlso, p; evaluation=evaluation, kwargs...) end function LevenbergMarquardt( @@ -180,7 +175,6 @@ function LevenbergMarquardt!( ), kwargs..., #collect rest ) where {O<:Union{NonlinearLeastSquaresObjective,AbstractDecoratedManifoldObjective}} - i_nlso = get_objective(nlso) # un-decorate for safety dnlso = decorate_objective!(M, nlso; kwargs...) nlsp = DefaultManoptProblem(M, dnlso) lms = LevenbergMarquardtState( diff --git a/test/plans/test_nonlinear_least_squares_plan.jl b/test/plans/test_nonlinear_least_squares_plan.jl index 2ba260c29a..c54fc07c57 100644 --- a/test/plans/test_nonlinear_least_squares_plan.jl +++ b/test/plans/test_nonlinear_least_squares_plan.jl @@ -1,7 +1,7 @@ using Manifolds, Manopt, Test @testset "Nonlinear lest squares plan" begin - @testset "Test cost/residual/jacobian cases with smoothing" begin + @testset "Test cost/residual/jacobian cases" begin # a simple nlso objetive on R2 M = Euclidean(2) d1 = [1, 0] @@ -22,103 +22,46 @@ using Manifolds, Manopt, Test J(M, x) = cat(j1(M, x), j2(M, x); dims=2) J!(M, J, x) = (J .= cat(j1(M, x), j2(M, x); dims=2)) # Smoothing types - s1 = Manopt.smoothing_factory(:Identity) - s2 = Manopt.smoothing_factory((:Identity, 2)) # Test all (new) possible combinations of vectorial cost and Jacobian # (1) [F]unction (Gradient), [C]omponent (Gradients), [J] Coordinate (Jacobian in Basis) # (2) [a]llocating [i] inplace - # (3) [s] single smoothing [v] vector smoothing - nlsoFas = NonlinearLeastSquaresObjective( - f, JF, 2; jacobian_type=FunctionVectorialType(), smoothing=s1 + nlsoFa = NonlinearLeastSquaresObjective( + f, JF, 2; jacobian_type=FunctionVectorialType() ) - nlsoFav = NonlinearLeastSquaresObjective( - f, JF, 2; jacobian_type=FunctionVectorialType(), smoothing=s2 - ) - nlsoFis = NonlinearLeastSquaresObjective( - f!, - JF!, - 2; - evaluation=InplaceEvaluation(), - jacobian_type=FunctionVectorialType(), - smoothing=s1, - ) - nlsoFiv = NonlinearLeastSquaresObjective( + nlsoFi = NonlinearLeastSquaresObjective( f!, JF!, 2; evaluation=InplaceEvaluation(), jacobian_type=FunctionVectorialType(), - smoothing=s2, - ) - - nlsoCas = NonlinearLeastSquaresObjective( - [f1, f2], - [j1, j2], - 2; - function_type=ComponentVectorialType(), - jacobian_type=ComponentVectorialType(), - smoothing=s1, ) - nlsoCav = NonlinearLeastSquaresObjective( + nlsoCa = NonlinearLeastSquaresObjective( [f1, f2], [j1, j2], 2; function_type=ComponentVectorialType(), jacobian_type=ComponentVectorialType(), - smoothing=s2, ) - nlsoCis = NonlinearLeastSquaresObjective( + nlsoCi = NonlinearLeastSquaresObjective( [f1, f2], [j1!, j2!], 2; function_type=ComponentVectorialType(), jacobian_type=ComponentVectorialType(), evaluation=InplaceEvaluation(), - smoothing=s1, ) - nlsoCiv = NonlinearLeastSquaresObjective( - [f1, f2], - [j1!, j2!], - 2; - function_type=ComponentVectorialType(), - jacobian_type=ComponentVectorialType(), - evaluation=InplaceEvaluation(), - smoothing=s2, - ) - - nlsoJas = NonlinearLeastSquaresObjective( - f, J, 2; jacobian_type=CoordinateVectorialType(), smoothing=s1 - ) - nlsoJav = NonlinearLeastSquaresObjective( - f, J, 2; jacobian_type=CoordinateVectorialType(), smoothing=s2 - ) - nlsoJis = NonlinearLeastSquaresObjective( - f!, J!, 2; evaluation=InplaceEvaluation(), smoothing=s1 - ) - nlsoJiv = NonlinearLeastSquaresObjective( - f!, J!, 2; evaluation=InplaceEvaluation(), smoothing=s2 + nlsoJa = NonlinearLeastSquaresObjective( + f, J, 2; jacobian_type=CoordinateVectorialType() ) + nlsoJi = NonlinearLeastSquaresObjective(f!, J!, 2; evaluation=InplaceEvaluation()) p = [0.5, 0.5] V = [0.0, 0.0] Vt = [1 / sqrt(2), 1 / sqrt(2)] G = zeros(2, 2) Gt = 1 / sqrt(2) .* [-1.0 1.0; 1.0 -1.0] - for nlso in [ - nlsoFas, - nlsoFav, - nlsoFis, - nlsoFiv, - nlsoCas, - nlsoCav, - nlsoCis, - nlsoCiv, - nlsoJas, - nlsoJav, - nlsoJis, - nlsoJiv, - ] + for nlso in [nlsoFa, nlsoFi, nlsoCa, nlsoCi, nlsoJa, nlsoJi] c = get_cost(M, nlso, p) @test c ≈ 0.5 fill!(V, 0.0) @@ -147,32 +90,4 @@ using Manifolds, Manopt, Test # In practice both are the same basis in coordinates, so Jtt stays as iss @test J == Jt end - @testset "Smootthing factory" begin - s1 = Manopt.smoothing_factory() - @test s1 isa ManifoldHessianObjective - s1s = Manopt.smoothing_factory((s1, 2.0)) - @test s1s isa ManifoldHessianObjective - s1v = Manopt.smoothing_factory((s1, 3)) - @test s1v isa VectorHessianFunction - @test length(s1v) == 3 - - @test Manopt.smoothing_factory(s1) === s1 # Passthrough for mhos - s2 = Manopt.smoothing_factory((:Identity, 2)) - @test s2 isa VectorHessianFunction - @test length(s2) == 2 - @test Manopt.smoothing_factory(s2) === s2 # Passthrough for vhfs - - s3 = Manopt.smoothing_factory((:Identity, 3.0)) - @test s3 isa ManifoldHessianObjective - - for s in [:Arctan, :Cauchy, :Huber, :SoftL1, :Tukey] - s4 = Manopt.smoothing_factory(s) - @test s4 isa ManifoldHessianObjective - end - - # Combine all different types - s5 = Manopt.smoothing_factory((:Identity, 2), (:Huber, 3), s1, :Tukey, s2) - @test s5 isa VectorHessianFunction - @test length(s5) == 9 - end end From 6d4990d337a18024e062c5f742246e5113cf5189 Mon Sep 17 00:00:00 2001 From: Ronny Bergmann Date: Fri, 3 Jan 2025 09:34:51 +0100 Subject: [PATCH 28/34] Remove duplicate transp entry. --- src/documentation_glossary.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/documentation_glossary.jl b/src/documentation_glossary.jl index 1e030a4c94..24f947a6ed 100644 --- a/src/documentation_glossary.jl +++ b/src/documentation_glossary.jl @@ -95,7 +95,6 @@ define!(:LaTeX, :rm, (letter) -> raw"\mathrm{" * "$letter" * "}") define!(:LaTeX, :sqrt, (s) -> raw"\sqrt{" * "$s}") define!(:LaTeX, :subgrad, raw"∂") define!(:LaTeX, :sum, raw"\sum") -define!(:LaTeX, :transp, (letter) -> raw"\text{" * "$letter" * "}") define!(:LaTeX, :text, (letter) -> raw"\text{" * "$letter" * "}") define!(:LaTeX, :transp, raw"\mathrm{T}") define!(:LaTeX, :vdots, raw"\vdots") From bf40b15582f92c0ce8d2a37394b9fd08333b4e38 Mon Sep 17 00:00:00 2001 From: Mateusz Baran Date: Fri, 3 Jan 2025 09:55:51 +0100 Subject: [PATCH 29/34] minor improvements --- .JuliaFormatter.toml | 2 +- Changelog.md | 2 +- src/plans/nonlinear_least_squares_plan.jl | 24 ++++++++++++++--------- 3 files changed, 17 insertions(+), 11 deletions(-) diff --git a/.JuliaFormatter.toml b/.JuliaFormatter.toml index 042b468347..acd76178c8 100644 --- a/.JuliaFormatter.toml +++ b/.JuliaFormatter.toml @@ -1,2 +1,2 @@ style = "blue" -ignore = ["tutorials"] \ No newline at end of file +ignore = ["tutorials", ".git"] \ No newline at end of file diff --git a/Changelog.md b/Changelog.md index 73191c629a..0bbe45d1c3 100644 --- a/Changelog.md +++ b/Changelog.md @@ -10,7 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added * the Levenberg-Marquardt algorithm internally uses a `VectorGradientFunction`, which allows - to use a vector of gradients of a function returning all gradients as wel for the algorithm + to use a vector of gradients of a function returning all gradients as well for the algorithm * The `VectorGradientFunction` now also have a `get_jacobian` function ### Changed diff --git a/src/plans/nonlinear_least_squares_plan.jl b/src/plans/nonlinear_least_squares_plan.jl index 5f94e2fb08..f78cd42928 100644 --- a/src/plans/nonlinear_least_squares_plan.jl +++ b/src/plans/nonlinear_least_squares_plan.jl @@ -10,30 +10,36 @@ Specify a nonlinear least squares problem # Fields -* `objective`: a [`AbstractVectorGradientFunction`](@ref)`{E}` containing both the vector of cost functions ``f_i`` as well as their gradients ``$(_tex(:grad)) f_i``` +* `objective`: a [`AbstractVectorGradientFunction`](@ref)`{E}` containing both the vector of + cost functions ``f_i`` (or a function returning a vector of costs) as well as their + gradients ``$(_tex(:grad)) f_i`` (or Jacobian of the vector-valued function). This `NonlinearLeastSquaresObjective` then has the same [`AbstractEvaluationType`](@ref) `T` as the (inner) `objective`. # Constructors - NonlinearLeastSquaresObjective(f, jacobian, range_dimension; kwargs...) + NonlinearLeastSquaresObjective(f, jacobian, range_dimension::Integer; kwargs...) NonlinearLeastSquaresObjective(vf::AbstractVectorGradientFunction) # Arguments * `f` the vectorial cost function ``f: $(_math(:M)) → ℝ^m`` * `jacobian` the Jacobian, might also be a vector of gradients of the component functions of `f` -* `range_dimension` the number of dimensions `m` the function `f` maps into +* `range_dimension::Integer` the number of dimensions `m` the function `f` maps into These three can also be passed as a [`AbstractVectorGradientFunction`](@ref) `vf` already. # Keyword arguments $(_var(:Keyword, :evaluation)) -* `function_type::`[`AbstractVectorialType`](@ref)`=`[`FunctionVectorialType`](@ref)`()`: format the -* `jacobian_tangent_basis::AbstractBasis=DefaultOrthonormalBasis()`; shortcut to specify the basis the Jacobian matrix is build with. -* `jacobian_type::`[`AbstractVectorialType`](@ref)`=`[`CoordinateVectorialType`](@ref)`(jacobian_tangent_basis)`: specify the format the Jacobian is given in. By default a matrix of the differential with respect to a certain basis of the tangent space. +* `function_type::`[`AbstractVectorialType`](@ref)`=`[`FunctionVectorialType`](@ref)`()`: specify + the format the residuals are given in. By default a function returning a vector. +* `jacobian_tangent_basis::AbstractBasis=DefaultOrthonormalBasis()`; shortcut to specify + the basis the Jacobian matrix is build with. +* `jacobian_type::`[`AbstractVectorialType`](@ref)`=`[`CoordinateVectorialType`](@ref)`(jacobian_tangent_basis)`: + specify the format the Jacobian is given in. By default a matrix of the differential with + respect to a certain basis of the tangent space. # See also @@ -48,7 +54,7 @@ end function NonlinearLeastSquaresObjective( f, jacobian, - range_dimension; + range_dimension::Integer; evaluation::AbstractEvaluationType=AllocatingEvaluation(), jacobian_tangent_basis::AbstractBasis=DefaultOrthonormalBasis(), jacobian_type::AbstractVectorialType=CoordinateVectorialType(jacobian_tangent_basis), @@ -79,7 +85,7 @@ function get_cost( for i in 1:length(nlso.objective) v += abs(get_value(M, nlso.objective, p, i))^2 end - v *= 0.5 + v /= 2 return v end function get_cost( @@ -90,7 +96,7 @@ function get_cost( p; value_cache=get_value(M, nlso.objective, p), ) where {E<:AbstractEvaluationType} - return 0.5 * sum(abs(value_cache[i])^2 for i in 1:length(value_cache)) + return sum(abs2, value_cache) / 2 end function get_jacobian( From 666572804459719df759ab2602975ea92b9ad263 Mon Sep 17 00:00:00 2001 From: Mateusz Baran Date: Fri, 3 Jan 2025 10:05:41 +0100 Subject: [PATCH 30/34] minor docs improvements --- src/plans/vectorial_plan.jl | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/src/plans/vectorial_plan.jl b/src/plans/vectorial_plan.jl index 839a83a570..cf0bdcaac6 100644 --- a/src/plans/vectorial_plan.jl +++ b/src/plans/vectorial_plan.jl @@ -2,7 +2,7 @@ AbstractVectorialType An abstract type for different representations of a vectorial function - ``f: $(_math(:M)) → ℝ`` and its (component-wise) gradient/Jacobian +``f: $(_math(:M)) → ℝ^m`` and its (component-wise) gradient/Jacobian """ abstract type AbstractVectorialType end @@ -10,8 +10,8 @@ abstract type AbstractVectorialType end CoordinateVectorialType{B<:AbstractBasis} <: AbstractVectorialType A type to indicate that gradient of the constraints is implemented as a -Jacobian matrix with respect to a certain basis, that is if the vectfor function -are is ``f: \mathcal M → ℝ^m`` with and we have a asis ``\mathcal B`` of ``T_p\mathcal M``, at ``p∈ \mathcal M`` +Jacobian matrix with respect to a certain basis, that is if the vector function +is ``f: \mathcal M → ℝ^m`` and we have a basis ``\mathcal B`` of ``T_p\mathcal M``, at ``p∈ \mathcal M`` This can be written as ``J_g(p) = (c_1^{\mathrm{T}},…,c_m^{\mathrm{T}})^{\mathrm{T}} \in ℝ^{m,d}``, that is, every row ``c_i`` of this matrix is a set of coefficients such that `get_coefficients(M, p, c, B)` is the tangent vector ``\oepratorname{grad} g_i(p)`` @@ -20,9 +20,11 @@ for example ``g_i(p) ∈ ℝ^m`` or ``\operatorname{grad} g_i(p) ∈ T_p\mathcal # Fields -* `basis` an [`AbstractBasis`](@extref `ManifoldsBase.AbstractBasis`) to indicate the default representation. +* `basis` an [`AbstractBasis`](@extref `ManifoldsBase.AbstractBasis`) to indicate the basis + in which Jacobian is expressed. # Constructor + CoordinateVectorialType(basis=DefaultOrthonormalBasis()) """ struct CoordinateVectorialType{B<:AbstractBasis} <: AbstractVectorialType @@ -37,7 +39,8 @@ Return a basis that fits a vector function representation. For the case, where some vectorial data is stored with respect to a basis, this function returns the corresponding basis, most prominently for the [`CoordinateVectorialType`](@ref). -If a type is not with respect to a certain basis, the [`DefaultOrthonormalBasis`](@extref `ManifoldsBase.DefaultOrthonormalBasis`) is returned +If a type is not with respect to a certain basis, the [`DefaultOrthonormalBasis`](@extref `ManifoldsBase.DefaultOrthonormalBasis`) +is returned. """ get_basis(::AbstractVectorialType) = DefaultOrthonormalBasis() get_basis(cvt::CoordinateVectorialType) = cvt.basis @@ -162,7 +165,7 @@ And advantage here is, that again the single components can be evaluated individ # Fields * `value!!::F`: the cost function ``f``, which can take different formats -* `cost_type::`[`AbstractVectorialType`](@ref): indicating / string data for the type of `f` +* `cost_type::`[`AbstractVectorialType`](@ref): indicating / storing data for the type of `f` * `jacobian!!::G`: the Jacobian of ``f`` * `jacobian_type::`[`AbstractVectorialType`](@ref): indicating / storing data for the type of ``J_f`` * `parameters`: the number `n` from, the size of the vector ``f`` returns. @@ -224,7 +227,7 @@ either as a vector of gradients of a Jacobian, and the Hessian, as a vector of Hessians of the component functions. Both the Jacobian and the Hessian can map into either a sequence of tangent spaces -or a single tangent space of the power manifold of lenth `n`. +or a single tangent space of the power manifold of length `n`. # Fields @@ -1117,7 +1120,7 @@ The `i` can be a linear index, you can provide * a `AbstractVector{<:Integer}` to specify indices * `:` to return the vector of all gradients, which is also the default -This function can perform the evalutation inplace of `V`. +This function can perform the evaluation inplace of `V`. """ get_value(M::AbstractManifold, vgf::AbstractVectorFunction, p, i) function get_value( From 4f6838677a4d7e9126877b6e6050a9e562dbd7a2 Mon Sep 17 00:00:00 2001 From: Mateusz Baran Date: Fri, 3 Jan 2025 10:11:28 +0100 Subject: [PATCH 31/34] one last fix --- src/plans/vectorial_plan.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plans/vectorial_plan.jl b/src/plans/vectorial_plan.jl index cf0bdcaac6..04c5a713a6 100644 --- a/src/plans/vectorial_plan.jl +++ b/src/plans/vectorial_plan.jl @@ -321,7 +321,7 @@ Since `i` is assumed to be a linear index, you can provide * a `UnitRange` to specify a range to be returned like `1:3` * a `BitVector` specifying a selection * a `AbstractVector{<:Integer}` to specify indices -* `:` to return the vector of all gradients +* `:` to return the vector of all Hessian evaluations """ get_hessian( M::AbstractManifold, From 98ded014b58add6451474fa6adf36f0199c431d9 Mon Sep 17 00:00:00 2001 From: Mateusz Baran Date: Fri, 3 Jan 2025 10:13:51 +0100 Subject: [PATCH 32/34] another fix --- src/solvers/LevenbergMarquardt.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/solvers/LevenbergMarquardt.jl b/src/solvers/LevenbergMarquardt.jl index 23d48860fa..0c7164b052 100644 --- a/src/solvers/LevenbergMarquardt.jl +++ b/src/solvers/LevenbergMarquardt.jl @@ -26,7 +26,7 @@ $(_var(:Argument, :M; type=true)) * as a single function returning a vector of gradient vectors ``$(_tex(:grad)) f_i(p)`` * as a vector of functions, where each single function returns a gradient vector ``$(_tex(:grad)) f_i(p)`` * as a single function returning a (coefficient) matrix with respect to an [`AbstractBasis`](@extref `ManifoldsBase.AbstractBasis`) - of the trangent space at `p`. + of the tangent space at `p`. The type is determined by the `jacobian_type=` keyword argument. $(_var(:Argument, :p)) * `num_components`: length of the vector returned by the cost function (`d`). @@ -45,12 +45,12 @@ $(_var(:Keyword, :evaluation)) residual (objective) at minimum is equal to 0. * `damping_term_min=0.1`: initial (and also minimal) value of the damping term * `β=5.0`: parameter by which the damping term is multiplied when the current new point is rejected -* `function_type=`[`FunctionVectorialType`](@ref)`: an [`AbstractVectorialType`](@ref) specifying the type of cost function provided. +* `function_type=`[`FunctionVectorialType`](@ref): an [`AbstractVectorialType`](@ref) specifying the type of cost function provided. * `initial_jacobian_f`: the initial Jacobian of the cost function `f`. By default this is a matrix of size `num_components` times the manifold dimension of similar type as `p`. * `initial_residual_values`: the initial residual vector of the cost function `f`. By default this is a vector of length `num_components` of similar type as `p`. -* `jacobian_type=`[`FunctionVectorialType`](@ref)`: an [`AbstractVectorialType`](@ref) specifying the type of Jacobian provided. +* `jacobian_type=`[`FunctionVectorialType`](@ref): an [`AbstractVectorialType`](@ref) specifying the type of Jacobian provided. $(_var(:Keyword, :retraction_method)) $(_note(:OtherKeywords)) From 0e7cd4d289113ab973a824a45d3840ef1486a46f Mon Sep 17 00:00:00 2001 From: Ronny Bergmann Date: Fri, 3 Jan 2025 15:16:59 +0100 Subject: [PATCH 33/34] Resolve the m-n-d confusion. The number of residuals/components in LM is now unified to m. --- src/documentation_glossary.jl | 4 ++-- src/plans/nonlinear_least_squares_plan.jl | 2 +- src/solvers/LevenbergMarquardt.jl | 14 +++++++------- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/documentation_glossary.jl b/src/documentation_glossary.jl index 24f947a6ed..4d8dbc5cc0 100644 --- a/src/documentation_glossary.jl +++ b/src/documentation_glossary.jl @@ -267,10 +267,10 @@ define!( :NonLinearLeastSquares, (; M="M", p="p") -> """ ```math -$(_tex(:argmin))_{$p ∈ $(_math(:M; M=M))} $(_tex(:frac,1,2)) $(_tex(:sum))_{i=1}^n $(_tex(:abs, "f_i($p)"))^2 +$(_tex(:argmin))_{$p ∈ $(_math(:M; M=M))} $(_tex(:frac,1,2)) $(_tex(:sum))_{i=1}^m $(_tex(:abs, "f_i($p)"))^2 ``` -where ``f: $(_math(:M; M=M)) → ℝ^n`` is written with component functions ``f_i: $(_math(:M; M=M)) → ℝ``, +where ``f: $(_math(:M; M=M)) → ℝ^m`` is written with component functions ``f_i: $(_math(:M; M=M)) → ℝ``, ``i=1,…,m``, and each component function is continuously differentiable. """, ) diff --git a/src/plans/nonlinear_least_squares_plan.jl b/src/plans/nonlinear_least_squares_plan.jl index f78cd42928..c7bfb7a697 100644 --- a/src/plans/nonlinear_least_squares_plan.jl +++ b/src/plans/nonlinear_least_squares_plan.jl @@ -138,7 +138,7 @@ _doc_get_residuals_nlso = """ get_residuals(M::AbstractManifold, nlso::NonlinearLeastSquaresObjective, p) get_residuals!(M::AbstractManifold, V, nlso::NonlinearLeastSquaresObjective, p) -Compute the vector of residuals ``f_i(p)``, ``i=1,…,n`` given the manifold `M`, +Compute the vector of residuals ``f_i(p)``, ``i=1,…,m`` given the manifold `M`, the [`NonlinearLeastSquaresObjective`](@ref) `nlso` and a current point ``p`` on `M`. """ diff --git a/src/solvers/LevenbergMarquardt.jl b/src/solvers/LevenbergMarquardt.jl index 0c7164b052..b7f963f38d 100644 --- a/src/solvers/LevenbergMarquardt.jl +++ b/src/solvers/LevenbergMarquardt.jl @@ -16,20 +16,20 @@ The second block of signatures perform the optimization in-place of `p`. # Input $(_var(:Argument, :M; type=true)) -* `f`: a cost function ``f: $(_math(:M))→ℝ^d``. +* `f`: a cost function ``f: $(_math(:M))→ℝ^m``. The cost function can be provided in two different ways - * as a single function returning a vector ``f(p) ∈ ℝ^d`` + * as a single function returning a vector ``f(p) ∈ ℝ^m`` * as a vector of functions, where each single function returns a scalar ``f_i(p) ∈ ℝ`` The type is determined by the `function_type=` keyword argument. * `jacobian_f`: the Jacobian of ``f``. The Jacobian can be provided in three different ways - * as a single function returning a vector of gradient vectors ``$(_tex(:grad)) f_i(p)`` - * as a vector of functions, where each single function returns a gradient vector ``$(_tex(:grad)) f_i(p)`` - * as a single function returning a (coefficient) matrix with respect to an [`AbstractBasis`](@extref `ManifoldsBase.AbstractBasis`) - of the tangent space at `p`. + * as a single function returning a vector of gradient vectors ``$(_tex(:bigl))($(_tex(:grad)) f_i(p)$(_tex(:bigr)))_{i=1}^m + * as a vector of functions, where each single function returns a gradient vector ``$(_tex(:grad)) f_i(p)``, , ``i=1,…m`` + * as a single function returning a (coefficient) matrix ``J ∈ ℝ^{m×d}``, where ``d`` is the dimension of the manifold. + These coefficients are given with respect to an [`AbstractBasis`](@extref `ManifoldsBase.AbstractBasis`) of the tangent space at `p`. The type is determined by the `jacobian_type=` keyword argument. $(_var(:Argument, :p)) -* `num_components`: length of the vector returned by the cost function (`d`). +* `num_components`: length ``m`` of the vector returned by the cost function. By default its value is -1 which means that it is determined automatically by calling `f` one additional time. This is only possible when `evaluation` is [`AllocatingEvaluation`](@ref), for mutating evaluation this value must be explicitly specified. From d3c6cdf94a10095e126fce46b2b9c439064fb409 Mon Sep 17 00:00:00 2001 From: Mateusz Baran Date: Fri, 3 Jan 2025 15:42:14 +0100 Subject: [PATCH 34/34] fix formatting --- src/solvers/LevenbergMarquardt.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/solvers/LevenbergMarquardt.jl b/src/solvers/LevenbergMarquardt.jl index b7f963f38d..e5e26c635e 100644 --- a/src/solvers/LevenbergMarquardt.jl +++ b/src/solvers/LevenbergMarquardt.jl @@ -23,8 +23,8 @@ $(_var(:Argument, :M; type=true)) The type is determined by the `function_type=` keyword argument. * `jacobian_f`: the Jacobian of ``f``. The Jacobian can be provided in three different ways - * as a single function returning a vector of gradient vectors ``$(_tex(:bigl))($(_tex(:grad)) f_i(p)$(_tex(:bigr)))_{i=1}^m - * as a vector of functions, where each single function returns a gradient vector ``$(_tex(:grad)) f_i(p)``, , ``i=1,…m`` + * as a single function returning a vector of gradient vectors ``$(_tex(:bigl))($(_tex(:grad)) f_i(p)$(_tex(:bigr)))_{i=1}^m`` + * as a vector of functions, where each single function returns a gradient vector ``$(_tex(:grad)) f_i(p)``, ``i=1,…,m`` * as a single function returning a (coefficient) matrix ``J ∈ ℝ^{m×d}``, where ``d`` is the dimension of the manifold. These coefficients are given with respect to an [`AbstractBasis`](@extref `ManifoldsBase.AbstractBasis`) of the tangent space at `p`. The type is determined by the `jacobian_type=` keyword argument.