Fixes from benchmark (#341)

* New benchmark * use HZ line search * Fix LineSearchesExt bug * minor updates * changelog * for testing * somewhat fix L-BFGS memory * fix L-BFGS memory * minor updates * fix test, minor benchmark updates * would this help? * add debug to ALM * fix ALM; test on Julia 1.10 * tweaking stopping criteria of ALM * Rayleigh quotient benchmarking * remove benchmark * update changelog * fix changelog
JuliaManifolds · Jan 1, 2024 · a24c869 · a24c869 · mateuszbaran · Jan 1, 2024
1 parent da84054
commit a24c869
Show file tree

Hide file tree

Showing 9 changed files with 91 additions and 48 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -11,7 +11,7 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        julia-version: ["1.6", "1.8", "1.9"]
+        julia-version: ["1.6", "1.9", "1.10"]
         os: [ubuntu-latest, macOS-latest, windows-latest]
     steps:
       - uses: actions/checkout@v4

diff --git a/Changelog.md b/Changelog.md
@@ -5,6 +5,20 @@ All notable Changes to the Julia package `Manopt.jl` will be documented in this
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.4.46] January 1, 2024
+
+### Changed
+
+* An error is thrown when a line search from `LineSearches.jl` reports search failure.
+* Changed default stopping criterion in ALM algorithm to mitigate an issue occurring when step size is very small.
+* Default memory length in default ALM subsolver is now capped at manifold dimension.
+* Replaced CI testing on Julia 1.8 with testing on Julia 1.10.
+
+### Fixed
+
+* A bug in `LineSearches.jl` extension leading to slower convergence.
+* Fixed a bug in L-BFGS related to memory storage, which caused significantly slower convergence.
+
 ## [0.4.45] December 28, 2023
 
 ### Added

diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "Manopt"
 uuid = "0fc0a36d-df90-57f3-8f93-d78a9fc72bb5"
 authors = ["Ronny Bergmann <[email protected]>"]
-version = "0.4.45"
+version = "0.4.46"
 
 [deps]
 ColorSchemes = "35d6a980-a343-548e-a6ea-1d62b119f2f4"

diff --git a/ext/ManoptLineSearchesExt.jl b/ext/ManoptLineSearchesExt.jl
@@ -42,29 +42,20 @@ function (cs::Manopt.LineSearchesStepsize)(
         retract!(M, p_tmp, p, η, α, cs.retraction_method)
         get_gradient!(mp, X_tmp, p_tmp)
         vector_transport_to!(M, Y_tmp, p, η, p_tmp, cs.vector_transport_method)
-        return real(inner(M, p_tmp, Y_tmp, Y_tmp))
+        return real(inner(M, p_tmp, X_tmp, Y_tmp))
     end
     function ϕdϕ(α)
         # TODO: optimize?
         retract!(M, p_tmp, p, η, α, cs.retraction_method)
         get_gradient!(mp, X_tmp, p_tmp)
         vector_transport_to!(M, Y_tmp, p, η, p_tmp, cs.vector_transport_method)
         phi = f(M, p_tmp)
-        dphi = real(inner(M, p_tmp, Y_tmp, Y_tmp))
+        dphi = real(inner(M, p_tmp, X_tmp, Y_tmp))
         return (phi, dphi)
     end
 
-    try
-        α, fp = cs.linesearch(ϕ, dϕ, ϕdϕ, α0, fp, dphi_0)
-        return α
-    catch ex
-        if isa(ex, LineSearches.LineSearchException)
-            # maybe indicate failure?
-            return zero(dphi_0)
-        else
-            rethrow(ex)
-        end
-    end
+    α, fp = cs.linesearch(ϕ, dϕ, ϕdϕ, α0, fp, dphi_0)
+    return α
 end
 
 end
diff --git a/src/Manopt.jl b/src/Manopt.jl
@@ -15,7 +15,7 @@ import ManifoldsBase: embed!
 using ColorSchemes
 using ColorTypes
 using Colors
-using DataStructures: CircularBuffer, capacity, length, push!, size
+using DataStructures: CircularBuffer, capacity, length, push!, size, isfull
 using Dates: Millisecond, Nanosecond, Period, canonicalize, value
 using LinearAlgebra:
     Diagonal, I, eigen, eigvals, tril, Symmetric, dot, cholesky, eigmin, opnorm

diff --git a/src/plans/quasi_newton_plan.jl b/src/plans/quasi_newton_plan.jl
@@ -457,8 +457,8 @@ When updating there are two cases: if there is still free memory, i.e. ``k < m``
         update::AbstractQuasiNewtonUpdateRule,
         memory_size;
         initial_vector=zero_vector(M,x),
-        scale=1.0
-        project=true
+        scale::Real=1.0
+        project::Bool=true
     )
 
 # See also
@@ -489,8 +489,8 @@ function QuasiNewtonLimitedMemoryDirectionUpdate(
     ::NT,
     memory_size::Int;
     initial_vector::T=zero_vector(M, p),
-    scale=1.0,
-    project=true,
+    scale::Real=1.0,
+    project::Bool=true,
     vector_transport_method::VTM=default_vector_transport_method(M, typeof(p)),
 ) where {NT<:AbstractQuasiNewtonUpdateRule,T,VTM<:AbstractVectorTransportMethod}
     mT = allocate_result_type(
@@ -531,6 +531,7 @@ function (d::QuasiNewtonLimitedMemoryDirectionUpdate{InverseBFGS})(r, mp, st)
         r .*= -1
         return r
     end
+    # backward pass
     for i in m:-1:1
         # what if we divide by zero here? Setting to zero ignores this in the next step
         # precompute in case inner is expensive
@@ -558,12 +559,17 @@ function (d::QuasiNewtonLimitedMemoryDirectionUpdate{InverseBFGS})(r, mp, st)
     if (last_safe_index == -1)
         d.message = "$(d.message)$(length(d.message)>0 ? :"\n" : "")"
         d.message = "$(d.message) All memory yield zero inner products, falling back to a gradient step."
-        return -get_gradient(st)
+
+        r .*= -1
+        return r
     end
-    r .*= 1 / (d.ρ[last_safe_index] * norm(M, p, d.memory_y[last_safe_index])^2)
+    # initial scaling guess
+    r ./= d.ρ[last_safe_index] * norm(M, p, d.memory_y[last_safe_index])^2
+    # forward pass
     for i in eachindex(d.ρ)
         if abs(d.ρ[i]) > 0
-            r .+= (d.ξ[i] - d.ρ[i] * inner(M, p, d.memory_y[i], r)) .* d.memory_s[i]
+            coeff = d.ξ[i] - d.ρ[i] * inner(M, p, d.memory_y[i], r)
+            r .+= coeff .* d.memory_s[i]
         end
     end
     d.project && embed_project!(M, r, p, r)

diff --git a/src/solvers/augmented_Lagrangian_method.jl b/src/solvers/augmented_Lagrangian_method.jl
@@ -63,6 +63,7 @@ mutable struct AugmentedLagrangianMethodState{
     θ_ϵ::R
     penalty::R
     stop::TStopping
+    last_stepsize::R
     function AugmentedLagrangianMethodState(
         M::AbstractManifold,
         co::ConstrainedManifoldObjective,
@@ -81,9 +82,12 @@ mutable struct AugmentedLagrangianMethodState{
         θ_ρ::R=0.3,
         ϵ_exponent=1 / 100,
         θ_ϵ=(ϵ_min / ϵ)^(ϵ_exponent),
-        stopping_criterion::SC=StopAfterIteration(300) | (
-            StopWhenSmallerOrEqual(:ϵ, ϵ_min) & StopWhenChangeLess(1e-10)
-        ),
+        stopping_criterion::SC=StopAfterIteration(300) |
+                               (
+                                   StopWhenSmallerOrEqual(:ϵ, ϵ_min) &
+                                   StopWhenChangeLess(1e-10)
+                               ) |
+                               StopWhenChangeLess(1e-10),
         kwargs...,
     ) where {
         P,
@@ -110,6 +114,7 @@ mutable struct AugmentedLagrangianMethodState{
         alms.θ_ϵ = θ_ϵ
         alms.penalty = Inf
         alms.stop = stopping_criterion
+        alms.last_stepsize = Inf
         return alms
     end
 end
@@ -241,7 +246,7 @@ function augmented_Lagrangian_method(
     f::TF,
     grad_f::TGF,
     p=rand(M);
-    evaluation=AllocatingEvaluation(),
+    evaluation::AbstractEvaluationType=AllocatingEvaluation(),
     g=nothing,
     h=nothing,
     grad_g=nothing,
@@ -265,7 +270,7 @@ function augmented_Lagrangian_method(
     f::TF,
     grad_f::TGF,
     p::Number;
-    evaluation=AllocatingEvaluation(),
+    evaluation::AbstractEvaluationType=AllocatingEvaluation(),
     g=nothing,
     grad_g=nothing,
     grad_h=nothing,
@@ -287,7 +292,7 @@ function augmented_Lagrangian_method(
 end
 
 @doc raw"""
-    augmented_Lagrangian_method!(M, f, grad_f p=rand(M); kwargs...)
+    augmented_Lagrangian_method!(M, f, grad_f, p=rand(M); kwargs...)
 
 perform the augmented Lagrangian method (ALM) in-place of `p`.
 
@@ -298,7 +303,7 @@ function augmented_Lagrangian_method!(
     f::TF,
     grad_f::TGF,
     p;
-    evaluation=AllocatingEvaluation(),
+    evaluation::AbstractEvaluationType=AllocatingEvaluation(),
     g=nothing,
     h=nothing,
     grad_g=nothing,
@@ -315,11 +320,11 @@ function augmented_Lagrangian_method!(
     M::AbstractManifold,
     cmo::O,
     p;
-    evaluation=AllocatingEvaluation(),
+    evaluation::AbstractEvaluationType=AllocatingEvaluation(),
     ϵ::Real=1e-3,
     ϵ_min::Real=1e-6,
-    ϵ_exponent=1 / 100,
-    θ_ϵ=(ϵ_min / ϵ)^(ϵ_exponent),
+    ϵ_exponent::Real=1 / 100,
+    θ_ϵ::Real=(ϵ_min / ϵ)^(ϵ_exponent),
     μ::Vector=ones(length(get_inequality_constraints(M, cmo, p))),
     μ_max::Real=20.0,
     λ::Vector=ones(length(get_equality_constraints(M, cmo, p))),
@@ -332,16 +337,16 @@ function augmented_Lagrangian_method!(
     sub_cost=AugmentedLagrangianCost(cmo, ρ, μ, λ),
     sub_grad=AugmentedLagrangianGrad(cmo, ρ, μ, λ),
     sub_kwargs=(;),
-    sub_stopping_criterion=StopAfterIteration(300) |
-                           StopWhenGradientNormLess(ϵ) |
-                           StopWhenStepsizeLess(1e-8),
+    sub_stopping_criterion::StoppingCriterion=StopAfterIteration(300) |
+                                              StopWhenGradientNormLess(ϵ) |
+                                              StopWhenStepsizeLess(1e-8),
     sub_state::AbstractManoptSolverState=decorate_state!(
         QuasiNewtonState(
             M,
             copy(p);
             initial_vector=zero_vector(M, p),
             direction_update=QuasiNewtonLimitedMemoryDirectionUpdate(
-                M, copy(M, p), InverseBFGS(), 30
+                M, copy(M, p), InverseBFGS(), min(manifold_dimension(M), 30)
             ),
             stopping_criterion=sub_stopping_criterion,
             stepsize=default_stepsize(M, QuasiNewtonState),
@@ -359,9 +364,12 @@ function augmented_Lagrangian_method!(
             sub_kwargs...,
         ),
     ),
-    stopping_criterion::StoppingCriterion=StopAfterIteration(300) | (
-        StopWhenSmallerOrEqual(:ϵ, ϵ_min) & StopWhenChangeLess(1e-10)
-    ),
+    stopping_criterion::StoppingCriterion=StopAfterIteration(300) |
+                                          (
+                                              StopWhenSmallerOrEqual(:ϵ, ϵ_min) &
+                                              StopWhenChangeLess(1e-10)
+                                          ) |
+                                          StopWhenStepsizeLess(1e-10),
     kwargs...,
 ) where {O<:Union{ConstrainedManifoldObjective,AbstractDecoratedManifoldObjective}}
     alms = AugmentedLagrangianMethodState(
@@ -410,7 +418,9 @@ function step_solver!(mp::AbstractManoptProblem, alms::AugmentedLagrangianMethod
 
     update_stopping_criterion!(alms, :MinIterateChange, alms.ϵ)
 
-    copyto!(M, alms.p, get_solver_result(solve!(alms.sub_problem, alms.sub_state)))
+    new_p = get_solver_result(solve!(alms.sub_problem, alms.sub_state))
+    alms.last_stepsize = distance(M, alms.p, new_p, default_inverse_retraction_method(M))
+    copyto!(M, alms.p, new_p)
 
     # update multipliers
     cost_ineq = get_inequality_constraints(mp, alms.p)
@@ -440,3 +450,7 @@ function step_solver!(mp::AbstractManoptProblem, alms::AugmentedLagrangianMethod
     return alms
 end
 get_solver_result(alms::AugmentedLagrangianMethodState) = alms.p
+
+function get_last_stepsize(p::AbstractManoptProblem, s::AugmentedLagrangianMethodState, i)
+    return s.last_stepsize
+end
diff --git a/src/solvers/quasi_Newton.jl b/src/solvers/quasi_Newton.jl
@@ -119,7 +119,7 @@ function show(io::IO, qns::QuasiNewtonState)
     ## Parameters
     * direction update:        $(status_summary(qns.direction_update))
     * retraction method:       $(qns.retraction_method)
-    * vector trnasport method: $(qns.vector_transport_method)
+    * vector transport method: $(qns.vector_transport_method)
 
     ## Stepsize
     $(qns.stepsize)
@@ -276,7 +276,7 @@ function quasi_Newton!(
     basis::AbstractBasis=DefaultOrthonormalBasis(),
     direction_update::AbstractQuasiNewtonUpdateRule=InverseBFGS(),
     memory_size::Int=min(manifold_dimension(M), 20),
-    stabilize=true,
+    stabilize::Bool=true,
     initial_operator::AbstractMatrix=(
         if memory_size >= 0
             fill(1.0, 0, 0) # don't allocate initial_operator for limited memory operation
@@ -349,8 +349,13 @@ function step_solver!(mp::AbstractManoptProblem, qns::QuasiNewtonState, iter)
     copyto!(M, qns.p_old, get_iterate(qns))
     retract!(M, qns.p, qns.p, qns.η, α, qns.retraction_method)
     qns.η .*= α
-    β = locking_condition_scale(
-        M, qns.direction_update, qns.p_old, qns.η, qns.p, qns.vector_transport_method
+    # qns.yk update fails if α is equal to 0 because then β is NaN
+    β = ifelse(
+        iszero(α),
+        one(α),
+        locking_condition_scale(
+            M, qns.direction_update, qns.p_old, qns.η, qns.p, qns.vector_transport_method
+        ),
     )
     vector_transport_to!(
         M,
@@ -617,8 +622,21 @@ function update_hessian!(
     end
 
     # add newest
-    push!(d.memory_s, st.sk)
-    push!(d.memory_y, st.yk)
+    # reuse old memory if buffer is full or allocate a copy if it is not
+    if isfull(d.memory_s)
+        old_sk = popfirst!(d.memory_s)
+        copyto!(M, old_sk, st.sk)
+        push!(d.memory_s, old_sk)
+    else
+        push!(d.memory_s, copy(M, st.sk))
+    end
+    if isfull(d.memory_y)
+        old_yk = popfirst!(d.memory_y)
+        copyto!(M, old_yk, st.yk)
+        push!(d.memory_y, old_yk)
+    else
+        push!(d.memory_y, copy(M, st.yk))
+    end
     return d
 end
 

diff --git a/test/helpers/test_linesearches.jl b/test/helpers/test_linesearches.jl
@@ -48,7 +48,7 @@ using Test
     @test get_last_stepsize(mp, x_opt, x_opt.stepsize, 1) > 0.0
 
     # this tests catching LineSearchException
-    @test iszero(ls_hz(mp, x_opt, 1, NaN * zero_vector(M, x0)))
+    @test_throws LineSearchException ls_hz(mp, x_opt, 1, NaN * zero_vector(M, x0))
 
     # test rethrowing errors
     function rosenbrock_throw(::AbstractManifold, x)