From a24c869897c732b510c71fcc2d7693f819e925d1 Mon Sep 17 00:00:00 2001 From: Mateusz Baran Date: Mon, 1 Jan 2024 16:50:27 +0100 Subject: [PATCH] Fixes from benchmark (#341) * New benchmark * use HZ line search * Fix LineSearchesExt bug * minor updates * changelog * for testing * somewhat fix L-BFGS memory * fix L-BFGS memory * minor updates * fix test, minor benchmark updates * would this help? * add debug to ALM * fix ALM; test on Julia 1.10 * tweaking stopping criteria of ALM * Rayleigh quotient benchmarking * remove benchmark * update changelog * fix changelog --- .github/workflows/ci.yml | 2 +- Changelog.md | 14 ++++++ Project.toml | 2 +- ext/ManoptLineSearchesExt.jl | 17 ++------ src/Manopt.jl | 2 +- src/plans/quasi_newton_plan.jl | 20 ++++++--- src/solvers/augmented_Lagrangian_method.jl | 50 ++++++++++++++-------- src/solvers/quasi_Newton.jl | 30 ++++++++++--- test/helpers/test_linesearches.jl | 2 +- 9 files changed, 91 insertions(+), 48 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index acfe1490b2..fd7aa1a14c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -11,7 +11,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - julia-version: ["1.6", "1.8", "1.9"] + julia-version: ["1.6", "1.9", "1.10"] os: [ubuntu-latest, macOS-latest, windows-latest] steps: - uses: actions/checkout@v4 diff --git a/Changelog.md b/Changelog.md index 80e1df308b..d7acc33312 100644 --- a/Changelog.md +++ b/Changelog.md @@ -5,6 +5,20 @@ All notable Changes to the Julia package `Manopt.jl` will be documented in this The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.4.46] January 1, 2024 + +### Changed + +* An error is thrown when a line search from `LineSearches.jl` reports search failure. +* Changed default stopping criterion in ALM algorithm to mitigate an issue occurring when step size is very small. +* Default memory length in default ALM subsolver is now capped at manifold dimension. +* Replaced CI testing on Julia 1.8 with testing on Julia 1.10. + +### Fixed + +* A bug in `LineSearches.jl` extension leading to slower convergence. +* Fixed a bug in L-BFGS related to memory storage, which caused significantly slower convergence. + ## [0.4.45] December 28, 2023 ### Added diff --git a/Project.toml b/Project.toml index 857ceae1c2..a69bcab818 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Manopt" uuid = "0fc0a36d-df90-57f3-8f93-d78a9fc72bb5" authors = ["Ronny Bergmann "] -version = "0.4.45" +version = "0.4.46" [deps] ColorSchemes = "35d6a980-a343-548e-a6ea-1d62b119f2f4" diff --git a/ext/ManoptLineSearchesExt.jl b/ext/ManoptLineSearchesExt.jl index 9ac2c5ea6e..b3bda53f61 100644 --- a/ext/ManoptLineSearchesExt.jl +++ b/ext/ManoptLineSearchesExt.jl @@ -42,7 +42,7 @@ function (cs::Manopt.LineSearchesStepsize)( retract!(M, p_tmp, p, η, α, cs.retraction_method) get_gradient!(mp, X_tmp, p_tmp) vector_transport_to!(M, Y_tmp, p, η, p_tmp, cs.vector_transport_method) - return real(inner(M, p_tmp, Y_tmp, Y_tmp)) + return real(inner(M, p_tmp, X_tmp, Y_tmp)) end function ϕdϕ(α) # TODO: optimize? @@ -50,21 +50,12 @@ function (cs::Manopt.LineSearchesStepsize)( get_gradient!(mp, X_tmp, p_tmp) vector_transport_to!(M, Y_tmp, p, η, p_tmp, cs.vector_transport_method) phi = f(M, p_tmp) - dphi = real(inner(M, p_tmp, Y_tmp, Y_tmp)) + dphi = real(inner(M, p_tmp, X_tmp, Y_tmp)) return (phi, dphi) end - try - α, fp = cs.linesearch(ϕ, dϕ, ϕdϕ, α0, fp, dphi_0) - return α - catch ex - if isa(ex, LineSearches.LineSearchException) - # maybe indicate failure? - return zero(dphi_0) - else - rethrow(ex) - end - end + α, fp = cs.linesearch(ϕ, dϕ, ϕdϕ, α0, fp, dphi_0) + return α end end diff --git a/src/Manopt.jl b/src/Manopt.jl index 5aeacc80dc..18b0a67b5c 100644 --- a/src/Manopt.jl +++ b/src/Manopt.jl @@ -15,7 +15,7 @@ import ManifoldsBase: embed! using ColorSchemes using ColorTypes using Colors -using DataStructures: CircularBuffer, capacity, length, push!, size +using DataStructures: CircularBuffer, capacity, length, push!, size, isfull using Dates: Millisecond, Nanosecond, Period, canonicalize, value using LinearAlgebra: Diagonal, I, eigen, eigvals, tril, Symmetric, dot, cholesky, eigmin, opnorm diff --git a/src/plans/quasi_newton_plan.jl b/src/plans/quasi_newton_plan.jl index d224ccfe3d..d73b635577 100644 --- a/src/plans/quasi_newton_plan.jl +++ b/src/plans/quasi_newton_plan.jl @@ -457,8 +457,8 @@ When updating there are two cases: if there is still free memory, i.e. ``k < m`` update::AbstractQuasiNewtonUpdateRule, memory_size; initial_vector=zero_vector(M,x), - scale=1.0 - project=true + scale::Real=1.0 + project::Bool=true ) # See also @@ -489,8 +489,8 @@ function QuasiNewtonLimitedMemoryDirectionUpdate( ::NT, memory_size::Int; initial_vector::T=zero_vector(M, p), - scale=1.0, - project=true, + scale::Real=1.0, + project::Bool=true, vector_transport_method::VTM=default_vector_transport_method(M, typeof(p)), ) where {NT<:AbstractQuasiNewtonUpdateRule,T,VTM<:AbstractVectorTransportMethod} mT = allocate_result_type( @@ -531,6 +531,7 @@ function (d::QuasiNewtonLimitedMemoryDirectionUpdate{InverseBFGS})(r, mp, st) r .*= -1 return r end + # backward pass for i in m:-1:1 # what if we divide by zero here? Setting to zero ignores this in the next step # precompute in case inner is expensive @@ -558,12 +559,17 @@ function (d::QuasiNewtonLimitedMemoryDirectionUpdate{InverseBFGS})(r, mp, st) if (last_safe_index == -1) d.message = "$(d.message)$(length(d.message)>0 ? :"\n" : "")" d.message = "$(d.message) All memory yield zero inner products, falling back to a gradient step." - return -get_gradient(st) + + r .*= -1 + return r end - r .*= 1 / (d.ρ[last_safe_index] * norm(M, p, d.memory_y[last_safe_index])^2) + # initial scaling guess + r ./= d.ρ[last_safe_index] * norm(M, p, d.memory_y[last_safe_index])^2 + # forward pass for i in eachindex(d.ρ) if abs(d.ρ[i]) > 0 - r .+= (d.ξ[i] - d.ρ[i] * inner(M, p, d.memory_y[i], r)) .* d.memory_s[i] + coeff = d.ξ[i] - d.ρ[i] * inner(M, p, d.memory_y[i], r) + r .+= coeff .* d.memory_s[i] end end d.project && embed_project!(M, r, p, r) diff --git a/src/solvers/augmented_Lagrangian_method.jl b/src/solvers/augmented_Lagrangian_method.jl index d96da13990..0f1b3a9997 100644 --- a/src/solvers/augmented_Lagrangian_method.jl +++ b/src/solvers/augmented_Lagrangian_method.jl @@ -63,6 +63,7 @@ mutable struct AugmentedLagrangianMethodState{ θ_ϵ::R penalty::R stop::TStopping + last_stepsize::R function AugmentedLagrangianMethodState( M::AbstractManifold, co::ConstrainedManifoldObjective, @@ -81,9 +82,12 @@ mutable struct AugmentedLagrangianMethodState{ θ_ρ::R=0.3, ϵ_exponent=1 / 100, θ_ϵ=(ϵ_min / ϵ)^(ϵ_exponent), - stopping_criterion::SC=StopAfterIteration(300) | ( - StopWhenSmallerOrEqual(:ϵ, ϵ_min) & StopWhenChangeLess(1e-10) - ), + stopping_criterion::SC=StopAfterIteration(300) | + ( + StopWhenSmallerOrEqual(:ϵ, ϵ_min) & + StopWhenChangeLess(1e-10) + ) | + StopWhenChangeLess(1e-10), kwargs..., ) where { P, @@ -110,6 +114,7 @@ mutable struct AugmentedLagrangianMethodState{ alms.θ_ϵ = θ_ϵ alms.penalty = Inf alms.stop = stopping_criterion + alms.last_stepsize = Inf return alms end end @@ -241,7 +246,7 @@ function augmented_Lagrangian_method( f::TF, grad_f::TGF, p=rand(M); - evaluation=AllocatingEvaluation(), + evaluation::AbstractEvaluationType=AllocatingEvaluation(), g=nothing, h=nothing, grad_g=nothing, @@ -265,7 +270,7 @@ function augmented_Lagrangian_method( f::TF, grad_f::TGF, p::Number; - evaluation=AllocatingEvaluation(), + evaluation::AbstractEvaluationType=AllocatingEvaluation(), g=nothing, grad_g=nothing, grad_h=nothing, @@ -287,7 +292,7 @@ function augmented_Lagrangian_method( end @doc raw""" - augmented_Lagrangian_method!(M, f, grad_f p=rand(M); kwargs...) + augmented_Lagrangian_method!(M, f, grad_f, p=rand(M); kwargs...) perform the augmented Lagrangian method (ALM) in-place of `p`. @@ -298,7 +303,7 @@ function augmented_Lagrangian_method!( f::TF, grad_f::TGF, p; - evaluation=AllocatingEvaluation(), + evaluation::AbstractEvaluationType=AllocatingEvaluation(), g=nothing, h=nothing, grad_g=nothing, @@ -315,11 +320,11 @@ function augmented_Lagrangian_method!( M::AbstractManifold, cmo::O, p; - evaluation=AllocatingEvaluation(), + evaluation::AbstractEvaluationType=AllocatingEvaluation(), ϵ::Real=1e-3, ϵ_min::Real=1e-6, - ϵ_exponent=1 / 100, - θ_ϵ=(ϵ_min / ϵ)^(ϵ_exponent), + ϵ_exponent::Real=1 / 100, + θ_ϵ::Real=(ϵ_min / ϵ)^(ϵ_exponent), μ::Vector=ones(length(get_inequality_constraints(M, cmo, p))), μ_max::Real=20.0, λ::Vector=ones(length(get_equality_constraints(M, cmo, p))), @@ -332,16 +337,16 @@ function augmented_Lagrangian_method!( sub_cost=AugmentedLagrangianCost(cmo, ρ, μ, λ), sub_grad=AugmentedLagrangianGrad(cmo, ρ, μ, λ), sub_kwargs=(;), - sub_stopping_criterion=StopAfterIteration(300) | - StopWhenGradientNormLess(ϵ) | - StopWhenStepsizeLess(1e-8), + sub_stopping_criterion::StoppingCriterion=StopAfterIteration(300) | + StopWhenGradientNormLess(ϵ) | + StopWhenStepsizeLess(1e-8), sub_state::AbstractManoptSolverState=decorate_state!( QuasiNewtonState( M, copy(p); initial_vector=zero_vector(M, p), direction_update=QuasiNewtonLimitedMemoryDirectionUpdate( - M, copy(M, p), InverseBFGS(), 30 + M, copy(M, p), InverseBFGS(), min(manifold_dimension(M), 30) ), stopping_criterion=sub_stopping_criterion, stepsize=default_stepsize(M, QuasiNewtonState), @@ -359,9 +364,12 @@ function augmented_Lagrangian_method!( sub_kwargs..., ), ), - stopping_criterion::StoppingCriterion=StopAfterIteration(300) | ( - StopWhenSmallerOrEqual(:ϵ, ϵ_min) & StopWhenChangeLess(1e-10) - ), + stopping_criterion::StoppingCriterion=StopAfterIteration(300) | + ( + StopWhenSmallerOrEqual(:ϵ, ϵ_min) & + StopWhenChangeLess(1e-10) + ) | + StopWhenStepsizeLess(1e-10), kwargs..., ) where {O<:Union{ConstrainedManifoldObjective,AbstractDecoratedManifoldObjective}} alms = AugmentedLagrangianMethodState( @@ -410,7 +418,9 @@ function step_solver!(mp::AbstractManoptProblem, alms::AugmentedLagrangianMethod update_stopping_criterion!(alms, :MinIterateChange, alms.ϵ) - copyto!(M, alms.p, get_solver_result(solve!(alms.sub_problem, alms.sub_state))) + new_p = get_solver_result(solve!(alms.sub_problem, alms.sub_state)) + alms.last_stepsize = distance(M, alms.p, new_p, default_inverse_retraction_method(M)) + copyto!(M, alms.p, new_p) # update multipliers cost_ineq = get_inequality_constraints(mp, alms.p) @@ -440,3 +450,7 @@ function step_solver!(mp::AbstractManoptProblem, alms::AugmentedLagrangianMethod return alms end get_solver_result(alms::AugmentedLagrangianMethodState) = alms.p + +function get_last_stepsize(p::AbstractManoptProblem, s::AugmentedLagrangianMethodState, i) + return s.last_stepsize +end diff --git a/src/solvers/quasi_Newton.jl b/src/solvers/quasi_Newton.jl index d80783b105..c54b9afc4f 100644 --- a/src/solvers/quasi_Newton.jl +++ b/src/solvers/quasi_Newton.jl @@ -119,7 +119,7 @@ function show(io::IO, qns::QuasiNewtonState) ## Parameters * direction update: $(status_summary(qns.direction_update)) * retraction method: $(qns.retraction_method) - * vector trnasport method: $(qns.vector_transport_method) + * vector transport method: $(qns.vector_transport_method) ## Stepsize $(qns.stepsize) @@ -276,7 +276,7 @@ function quasi_Newton!( basis::AbstractBasis=DefaultOrthonormalBasis(), direction_update::AbstractQuasiNewtonUpdateRule=InverseBFGS(), memory_size::Int=min(manifold_dimension(M), 20), - stabilize=true, + stabilize::Bool=true, initial_operator::AbstractMatrix=( if memory_size >= 0 fill(1.0, 0, 0) # don't allocate initial_operator for limited memory operation @@ -349,8 +349,13 @@ function step_solver!(mp::AbstractManoptProblem, qns::QuasiNewtonState, iter) copyto!(M, qns.p_old, get_iterate(qns)) retract!(M, qns.p, qns.p, qns.η, α, qns.retraction_method) qns.η .*= α - β = locking_condition_scale( - M, qns.direction_update, qns.p_old, qns.η, qns.p, qns.vector_transport_method + # qns.yk update fails if α is equal to 0 because then β is NaN + β = ifelse( + iszero(α), + one(α), + locking_condition_scale( + M, qns.direction_update, qns.p_old, qns.η, qns.p, qns.vector_transport_method + ), ) vector_transport_to!( M, @@ -617,8 +622,21 @@ function update_hessian!( end # add newest - push!(d.memory_s, st.sk) - push!(d.memory_y, st.yk) + # reuse old memory if buffer is full or allocate a copy if it is not + if isfull(d.memory_s) + old_sk = popfirst!(d.memory_s) + copyto!(M, old_sk, st.sk) + push!(d.memory_s, old_sk) + else + push!(d.memory_s, copy(M, st.sk)) + end + if isfull(d.memory_y) + old_yk = popfirst!(d.memory_y) + copyto!(M, old_yk, st.yk) + push!(d.memory_y, old_yk) + else + push!(d.memory_y, copy(M, st.yk)) + end return d end diff --git a/test/helpers/test_linesearches.jl b/test/helpers/test_linesearches.jl index 4e82f6bb83..24dd397024 100644 --- a/test/helpers/test_linesearches.jl +++ b/test/helpers/test_linesearches.jl @@ -48,7 +48,7 @@ using Test @test get_last_stepsize(mp, x_opt, x_opt.stepsize, 1) > 0.0 # this tests catching LineSearchException - @test iszero(ls_hz(mp, x_opt, 1, NaN * zero_vector(M, x0))) + @test_throws LineSearchException ls_hz(mp, x_opt, 1, NaN * zero_vector(M, x0)) # test rethrowing errors function rosenbrock_throw(::AbstractManifold, x)