diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e62aa1960f..91db81de98 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -25,6 +25,7 @@ jobs: - uses: julia-actions/julia-processcoverage@v1 - uses: codecov/codecov-action@v4 with: + token: ${{ secrets.CODECOV_TOKEN }} file: ./lcov.info name: codecov-umbrella fail_ci_if_error: false diff --git a/.vale.ini b/.vale.ini index cd03c71ae7..771fd92f56 100644 --- a/.vale.ini +++ b/.vale.ini @@ -15,6 +15,8 @@ BasedOnStyles = Vale, Google [docs/src/contributing.md] Google.FirstPerson = No +Google.We = No + [src/*.md] ; actually .jl but they are identified those above I think? BasedOnStyles = Vale, Google @@ -23,7 +25,7 @@ BasedOnStyles = Vale, Google Google.Units = false #wto ignore formats= for now. TokenIgnores = \$.+?\$,\[.+?\]\(@(ref|id|cite).+?\),`.+`,``.*``,\s{4}.+\n -[test/plans/test_debug.jl] #repeat previous until I find out how to combine them +[test/plans/test_debug.md] #repeat previous until I find out how to combine them Google.Units = false #wto ignore formats= for now. TokenIgnores = \$.+?\$,\[.+?\]\(@(ref|id|cite).+?\),`.+`,``.*``,\s{4}.+\n diff --git a/.zenodo.json b/.zenodo.json index 004a426411..73ab025f17 100644 --- a/.zenodo.json +++ b/.zenodo.json @@ -3,7 +3,8 @@ { "affiliation": "NTNU Trondheim", "name": "Jasa, Hajg", - "type": "Other" + "type": "ProjectMember", + "orcid": "0009-0002-7917-0530" }, { "name": "Mathieu Besançon", diff --git a/Changelog.md b/Changelog.md index 944dc724ea..551ddfbfbc 100644 --- a/Changelog.md +++ b/Changelog.md @@ -5,12 +5,18 @@ All notable Changes to the Julia package `Manopt.jl` will be documented in this The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [0.4.54] unreleased +## [0.4.54] February 28, 2024 + +### Added + +* `convex_bundle_method` optimization algorithm for non-smooth geodesically convex functions +* `proximal_bundle_method` optimization algorithm for non-smooth functions. +* `StopWhenSubgradientNormLess`, `StopWhenLagrangeMultiplierLess`, and stopping criteria. ### Fixed * Doc strings now follow a [vale.sh](https://vale.sh) policy. Though this is not fully working, - this PR imporves a lot of the doc strings concerning wording and spelling. + this PR improves a lot of the doc strings concerning wording and spelling. ## [0.4.53] February 13, 2024 diff --git a/Project.toml b/Project.toml index 075fe203fe..d37bfb8626 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Manopt" uuid = "0fc0a36d-df90-57f3-8f93-d78a9fc72bb5" authors = ["Ronny Bergmann "] -version = "0.4.53" +version = "0.4.54" [deps] ColorSchemes = "35d6a980-a343-548e-a6ea-1d62b119f2f4" @@ -10,6 +10,7 @@ Colors = "5ae59095-9a9b-59fe-a467-6f913c188581" DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +LinearOperators = "5c8ed15e-5a4c-59e4-a42b-c7e8811fb125" ManifoldDiff = "af67fdf4-a580-4b9f-bbec-742ef357defd" ManifoldsBase = "3362f125-f0bb-47a3-aa74-596ffd7ef2fb" Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a" @@ -28,13 +29,16 @@ LRUCache = "8ac3fa9e-de4c-5943-b1dc-09c6b5f20637" LineSearches = "d3d80556-e9d4-5f37-9878-2ab0fcc64255" Manifolds = "1cead3c2-87b3-11e9-0ccd-23c62b72b94e" Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" +QuadraticModels = "f468eda6-eac5-11e8-05a5-ff9e497bcd19" +RipQP = "1e40b3f8-35eb-4cd8-8edd-3e515bb9de08" [extensions] -ManoptJuMPExt = ["JuMP"] +ManoptJuMPExt = "JuMP" ManoptLRUCacheExt = "LRUCache" ManoptLineSearchesExt = "LineSearches" -ManoptManifoldsExt = ["Manifolds"] +ManoptManifoldsExt = "Manifolds" ManoptPlotsExt = "Plots" +ManoptRipQPQuadraticModelsExt = ["RipQP", "QuadraticModels"] [compat] ColorSchemes = "3.5.0" @@ -45,6 +49,7 @@ Dates = "1.6" JuMP = "1.15" LRUCache = "1.4" LinearAlgebra = "1.6" +LinearOperators = "~2.6" ManifoldDiff = "0.3.8" Manifolds = "0.9.11" ManifoldsBase = "0.15" @@ -53,8 +58,10 @@ Markdown = "1.6" PolynomialRoots = "1" Preferences = "1.4" Printf = "1.6" +QuadraticModels = "0.9" Random = "1.6" Requires = "0.5, 1" +RipQP = "0.6" SparseArrays = "1.6" Statistics = "1.6" Test = "1.6" @@ -69,7 +76,9 @@ ManifoldDiff = "af67fdf4-a580-4b9f-bbec-742ef357defd" Manifolds = "1cead3c2-87b3-11e9-0ccd-23c62b72b94e" ManoptExamples = "5b8d5e80-5788-45cb-83d6-5e8f1484217d" Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" +QuadraticModels = "f468eda6-eac5-11e8-05a5-ff9e497bcd19" +RipQP = "1e40b3f8-35eb-4cd8-8edd-3e515bb9de08" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["Test", "ForwardDiff", "JuMP", "Manifolds", "ManoptExamples", "ManifoldDiff", "Plots", "LineSearches", "LRUCache"] +test = ["Test", "ForwardDiff", "JuMP", "Manifolds", "ManoptExamples", "ManifoldDiff", "Plots", "LineSearches", "LRUCache", "RipQP", "QuadraticModels"] diff --git a/benchmarks/benchmark_subgradient.jl b/benchmarks/benchmark_subgradient.jl deleted file mode 100644 index 540505087f..0000000000 --- a/benchmarks/benchmark_subgradient.jl +++ /dev/null @@ -1,31 +0,0 @@ -using Manopt, ManifoldsBase, Manifolds, LinearAlgebra, BenchmarkTools, Test - -M = Euclidean(2) -x = [4.0, 2.0] -x0 = [5.0, 2.0] -f(M, y) = distance(M, y, x) -function ∂f(M, y) - if distance(M, x, y) == 0 - return zero_vector(M, y) - end - return -2 * log(M, y, x) / max(10 * eps(Float64), distance(M, x, y)) -end -x1 = subgradient_method(M, f, ∂f, x0) -@btime subgradient_method($M, $f, $∂f, $x0) - -function ∂f!(M, X, y) - d = distance(M, x, y) - if d == 0 - return zero_vector!(M, X, y) - end - log!(M, X, y, x) - X .*= -2 / max(10 * eps(Float64), d) - return X -end -x2 = copy(x0) -subgradient_method!(M, f, ∂f!, x2; evaluation=InplaceEvaluation()) -@btime subgradient_method!($M, $f, $∂f!, x3; evaluation=$(InplaceEvaluation())) setup = ( - x3 = deepcopy($x0) -) - -@test x1 == x2 diff --git a/benchmarks/benchmark_trust_regions.jl b/benchmarks/benchmark_trust_regions.jl deleted file mode 100644 index 495a5794a3..0000000000 --- a/benchmarks/benchmark_trust_regions.jl +++ /dev/null @@ -1,35 +0,0 @@ - -using Manifolds, Manopt, BenchmarkTools, Test - -include("../test/solvers/trust_region_model.jl") - -n = size(A, 1) -p = 2 -N = Grassmann(n, p) -M = PowerManifold(N, ArrayPowerRepresentation(), 2) -x = rand(M) - -x_opt = trust_regions(M, cost, rgrad, rhess, x; max_trust_region_radius=8.0) -@btime x_opt = trust_regions($M, $cost, $rgrad, $rhess, $x; max_trust_region_radius=8.0) - -h = RHess(M, A, p) -g = RGrad(M, A) -x_opt2 = trust_regions( - M, cost, g, h, x; max_trust_region_radius=8.0, evaluation=InplaceEvaluation() -) - -@btime trust_regions( - $M, $cost, $g, $h, x2; max_trust_region_radius=8.0, evaluation=$(InplaceEvaluation()) -) setup = (x2 = deepcopy($x)) - -x3 = deepcopy(x) -trust_regions!( - M, cost, g, h, x3; max_trust_region_radius=8.0, evaluation=InplaceEvaluation() -) - -@btime trust_regions!( - $M, $cost, $g, $h, x3; max_trust_region_radius=8.0, evaluation=$(InplaceEvaluation()) -) setup = (x3 = deepcopy($x)) - -@test distance(M, x_opt, x_opt2) ≈ 0 -@test distance(M, x_opt, x3) ≈ 0 diff --git a/docs/Project.toml b/docs/Project.toml index 424afa482a..b07a85099f 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -18,7 +18,9 @@ Manifolds = "1cead3c2-87b3-11e9-0ccd-23c62b72b94e" ManifoldsBase = "3362f125-f0bb-47a3-aa74-596ffd7ef2fb" Manopt = "0fc0a36d-df90-57f3-8f93-d78a9fc72bb5" Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" +QuadraticModels = "f468eda6-eac5-11e8-05a5-ff9e497bcd19" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +RipQP = "1e40b3f8-35eb-4cd8-8edd-3e515bb9de08" [compat] BenchmarkTools = "1.3" @@ -38,4 +40,5 @@ Literate = "2" Manifolds = "0.8.81, 0.9" ManifoldsBase = "0.14.12, 0.15" Manopt = "0.4" +QuadraticModels = "0.9.6" Plots = "1" diff --git a/docs/make.jl b/docs/make.jl index a03f38e0e5..56ecaee942 100755 --- a/docs/make.jl +++ b/docs/make.jl @@ -7,7 +7,7 @@ if "--help" ∈ ARGS """ docs/make.jl -Render the `Manopt.jl` documenation with optinal arguments +Render the `Manopt.jl` documenation with optional arguments Arguments * `--exclude-tutorials` - exclude the tutorials from the menu of Documenter, @@ -18,8 +18,11 @@ Arguments * `--quarto` – run the Quarto notebooks from the `tutorials/` folder before generating the documentation this has to be run locally at least once for the `tutorials/*.md` files to exist that are included in the documentation (see `--exclude-tutorials`) for the alternative. - If they are generated ones they are cached accordingly. + If they are generated once they are cached accordingly. Then you can spare time in the rendering by not passing this argument. + If quarto is not run, some tutorials are generated as empty files, since they + are referenced from within the documentation. These are currently + `Optimize.md` and `ImplementOwnManifold.md`. """, ) exit(0) @@ -50,6 +53,9 @@ if "--quarto" ∈ ARGS Pkg.activate(@__DIR__) # but return to the docs one before run(`quarto render $(tutorials_folder)`) end +else # fallback to at least create empty files for Optimize and Implement + touch(joinpath(@__DIR__, "src/tutorials/Optimize.md")) + touch(joinpath(@__DIR__, "src/tutorials/ImplementOwnManifold.md")) end tutorials_in_menu = true @@ -67,6 +73,7 @@ end using Documenter using DocumenterCitations using JuMP, LineSearches, LRUCache, Manopt, Manifolds, Plots +using RipQP, QuadraticModels # (d) add contributing.md to docs generated_path = joinpath(@__DIR__, "src") @@ -142,6 +149,11 @@ makedocs(; else Manopt.ManoptPlotsExt end, + if isdefined(Base, :get_extension) + Base.get_extension(Manopt, :ManoptRipQPQuadraticModelsExt) + else + Manopt.ManoptRipQPQuadraticModelsExt + end, ], authors="Ronny Bergmann and contributors.", sitename="Manopt.jl", @@ -156,6 +168,7 @@ makedocs(; "Augmented Lagrangian Method" => "solvers/augmented_Lagrangian_method.md", "Chambolle-Pock" => "solvers/ChambollePock.md", "Conjugate gradient descent" => "solvers/conjugate_gradient_descent.md", + "Convex bundle method" => "solvers/convex_bundle_method.md", "Cyclic Proximal Point" => "solvers/cyclic_proximal_point.md", "Difference of Convex" => "solvers/difference_of_convex.md", "Douglas—Rachford" => "solvers/DouglasRachford.md", @@ -166,6 +179,7 @@ makedocs(; "Nelder–Mead" => "solvers/NelderMead.md", "Particle Swarm Optimization" => "solvers/particle_swarm.md", "Primal-dual Riemannian semismooth Newton" => "solvers/primal_dual_semismooth_Newton.md", + "Proximal bundle method" => "solvers/proximal_bundle_method.md", "Quasi-Newton" => "solvers/quasi_Newton.md", "Stochastic Gradient Descent" => "solvers/stochastic_gradient_descent.md", "Subgradient method" => "solvers/subgradient.md", diff --git a/docs/src/about.md b/docs/src/about.md index f35f57d849..e76b065f23 100644 --- a/docs/src/about.md +++ b/docs/src/about.md @@ -7,6 +7,7 @@ The following people contributed * [Constantin Ahlmann-Eltze](https://const-ae.name) implemented the [gradient and differential `check` functions](helpers/checks.md) * [Renée Dornig](https://github.com/r-dornig) implemented the [particle swarm](solvers/particle_swarm.md), the [Riemannian Augmented Lagrangian Method](solvers/augmented_Lagrangian_method.md), the [Exact Penalty Method](solvers/exact_penalty_method.md), as well as the [`NonmonotoneLinesearch`](@ref) * [Willem Diepeveen](https://www.maths.cam.ac.uk/person/wd292) implemented the [primal-dual Riemannian semismooth Newton](solvers/primal_dual_semismooth_Newton.md) solver. +* [Hajg Jasa](https://www.ntnu.edu/employees/hajg.jasa) implemented the [convex bundle method](solvers/convex_bundle_method.md) and the [proximal bundle method](solvers/proximal_bundle_method.md). * Even Stephansen Kjemsås contributed to the implementation of the [Frank Wolfe Method](solvers/FrankWolfe.md) solver * Mathias Ravn Munkvold contributed most of the implementation of the [Adaptive Regularization with Cubics](solvers/adaptive-regularization-with-cubics.md) solver * [Tom-Christian Riemer](https://www.tu-chemnitz.de/mathematik/wire/mitarbeiter.php) implemented the [trust regions](solvers/trust_regions.md) and [quasi Newton](solvers/quasi_Newton.md) solvers. diff --git a/docs/src/references.bib b/docs/src/references.bib index 18405c1c1b..ec831f011d 100644 --- a/docs/src/references.bib +++ b/docs/src/references.bib @@ -166,6 +166,15 @@ @article{BergmannHerzogSilvaLouzeiroTenbrinckVidalNunez:2021 VOLUME = {21}, } +@article{BergmannHerzogJasa:2024, + AUTHOR = {Bergmann, Ronny and Herzog, Roland and Jasa, Hajg}, + JOURNAL = {preprint}, + EPRINT = {2402.13670}, + EPRINTTYPE = {arXiv}, + TITLE = {The Riemannian Convex Bundle Method}, + YEAR = {2024}, +} + @article{BergmannLausSteidlWeinmann:2014:1, AUTHOR = {Bergmann, Ronny and Laus, Friederike and Steidl, Gabriele and Weinmann, Andreas}, EPRINT = {1405.5349}, @@ -415,6 +424,17 @@ @article{HestenesStiefel:1952 VOLUME = {49}, YEAR = {1952} } +@article{HoseiniMonjeziNobakhtianPouryayevali:2021, + AUTHOR = {Hoseini Monjezi, Najmeh and Nobakhtian, Soghra and Pouryayevali, Mohamad Reza}, + DOI = {10.1093/imanum/drab091}, + JOURNAL = {IMA Journal of Numerical Analysis}, + NUMBER = {1}, + PAGES = {293–325}, + PUBLISHER = {Oxford University Press (OUP)}, + TITLE = {A proximal bundle algorithm for nonsmooth optimization on Riemannian manifolds}, + VOLUME = {43}, + YEAR = {2023}, +} @phdthesis{Huang:2014, AUTHOR = {Huang, W.}, SCHOOL = {Flordia State University}, diff --git a/docs/src/solvers/convex_bundle_method.md b/docs/src/solvers/convex_bundle_method.md new file mode 100644 index 0000000000..eff7a768e2 --- /dev/null +++ b/docs/src/solvers/convex_bundle_method.md @@ -0,0 +1,44 @@ +# [Convex Bundle Method](@id ConvexBundleMethodSolver) + +```@meta +CurrentModule = Manopt +``` + +```@docs +convex_bundle_method +convex_bundle_method! +``` + +## State + +```@docs +ConvexBundleMethodState +``` + +## Stopping Criteria +```@docs +StopWhenLagrangeMultiplierLess +``` + +## Debug Functions + +```@docs +DebugWarnIfLagrangeMultiplierIncreases +``` + +## Helpers and internal functions + +```@docs +convex_bundle_method_subsolver +sectional_curvature +ζ_1 +ζ_2 +close_point +``` + +## Literature + +```@bibliography +Pages = ["convex_bundle_method.md"] +Canonical=false +``` diff --git a/docs/src/solvers/index.md b/docs/src/solvers/index.md index 0aa7e4fd62..78fed17003 100644 --- a/docs/src/solvers/index.md +++ b/docs/src/solvers/index.md @@ -18,6 +18,7 @@ The following algorithms are currently available [Augmented Lagrangian Method](augmented_Lagrangian_method.md) | [`augmented_Lagrangian_method`](@ref), [`AugmentedLagrangianMethodState`](@ref) | ``f``, ``\operatorname{grad} f``, ``g``, ``\operatorname{grad} g_i``, ``h``, ``\operatorname{grad} h_j`` | [Chambolle-Pock](ChambollePock.md) | [`ChambollePock`](@ref), [`ChambollePockState`](@ref) (using [`TwoManifoldProblem`](@ref)) | ``f=F+G(Λ\cdot)``, ``\operatorname{prox}_{σ F}``, ``\operatorname{prox}_{τ G^*}``, ``Λ`` | [Conjugate Gradient Descent](conjugate_gradient_descent.md) | [`conjugate_gradient_descent`](@ref), [`ConjugateGradientDescentState`](@ref) | ``f``, ``\operatorname{grad} f`` +[Convex Bundle Method](convex_bundle_method.md) | [`convex_bundle_method`](@ref), [`ConvexBundleMethodState`](@ref) | ``f``, ``\partial f`` [Cyclic Proximal Point](cyclic_proximal_point.md) | [`cyclic_proximal_point`](@ref), [`CyclicProximalPointState`](@ref) | ``f=\sum f_i``, ``\operatorname{prox}_{\lambda f_i}`` | [Difference of Convex Algorithm](@ref solver-difference-of-convex) | [`difference_of_convex_algorithm`](@ref), [`DifferenceOfConvexState`](@ref) | ``f=g-h``, ``∂h``, and for example ``g``, ``\operatorname{grad} g`` | [Difference of Convex Proximal Point](@ref solver-difference-of-convex-proximal-point) | [`difference_of_convex_proximal_point`](@ref), [`DifferenceOfConvexProximalState`](@ref) | ``f=g-h``, ``∂h``, and for example ``g``, ``\operatorname{grad} g`` | @@ -29,6 +30,7 @@ The following algorithms are currently available [Nelder-Mead](NelderMead.md) | [`NelderMead`](@ref), [`NelderMeadState`](@ref) | ``f`` [Particle Swarm](particle_swarm.md) | [`particle_swarm`](@ref), [`ParticleSwarmState`](@ref) | ``f`` | [Primal-dual Riemannian semismooth Newton Algorithm](@ref solver-pdrssn) | [`primal_dual_semismooth_Newton`](@ref), [`PrimalDualSemismoothNewtonState`](@ref) (using [`TwoManifoldProblem`](@ref)) | ``f=F+G(Λ\cdot)``, ``\operatorname{prox}_{σ F}`` & diff., ``\operatorname{prox}_{τ G^*}`` & diff., ``Λ`` +[Proximal Bundle Method](proximal_bundle_method.md) | [`proximal_bundle_method`](@ref), [`ProximalBundleMethodState`](@ref) | ``f``, ``\partial f`` [Quasi-Newton Method](quasi_Newton.md) | [`quasi_Newton`](@ref), [`QuasiNewtonState`](@ref) | ``f``, ``\operatorname{grad} f`` | [Steihaug-Toint Truncated Conjugate-Gradient Method](@ref tCG) | [`truncated_conjugate_gradient_descent`](@ref), [`TruncatedConjugateGradientState`](@ref) | ``f``, ``\operatorname{grad} f``, ``\operatorname{Hess} f`` | [Subgradient Method](subgradient.md) | [`subgradient_method`](@ref), [`SubGradientMethodState`](@ref) | ``f``, ``∂ f`` | diff --git a/docs/src/solvers/proximal_bundle_method.md b/docs/src/solvers/proximal_bundle_method.md new file mode 100644 index 0000000000..6a00801036 --- /dev/null +++ b/docs/src/solvers/proximal_bundle_method.md @@ -0,0 +1,29 @@ +# [Proximal Bundle Method](@id ProxBundleMethodSolver) + +```@meta +CurrentModule = Manopt +``` + +```@docs +proximal_bundle_method +proximal_bundle_method! +``` + +## State + +```@docs +ProximalBundleMethodState +``` + +## Helpers and internal functions + +```@docs +proximal_bundle_method_subsolver +``` + +## Literature + +```@bibliography +Pages = ["proximal_bundle_method.md"] +Canonical=false +``` diff --git a/ext/ManoptRipQPQuadraticModelsExt.jl b/ext/ManoptRipQPQuadraticModelsExt.jl new file mode 100644 index 0000000000..e70c4bc2e0 --- /dev/null +++ b/ext/ManoptRipQPQuadraticModelsExt.jl @@ -0,0 +1,87 @@ +module ManoptRipQPQuadraticModelsExt + +using Manopt +import Manopt: + convex_bundle_method_subsolver, + convex_bundle_method_subsolver!, + proximal_bundle_method_subsolver, + proximal_bundle_method_subsolver! +using ManifoldsBase +using LinearAlgebra: tril +using SparseArrays: sparse + +if isdefined(Base, :get_extension) + using QuadraticModels: QuadraticModel + using RipQP: ripqp +else + # imports need to be relative for Requires.jl-based workflows: + # https://github.com/JuliaArrays/ArrayInterface.jl/pull/387 + using ..QuadraticModels: QuadraticModel + using ..RipQP: ripqp +end + +function convex_bundle_method_subsolver( + M::A, p_last_serious, linearization_errors, transported_subgradients +) where {A<:AbstractManifold} + d = length(linearization_errors) + λ = zeros(d) + convex_bundle_method_subsolver!( + M, λ, p_last_serious, linearization_errors, transported_subgradients + ) + return λ +end +function convex_bundle_method_subsolver!( + M::A, λ, p_last_serious, linearization_errors, transported_subgradients +) where {A<:AbstractManifold} + d = length(linearization_errors) + H = [ + inner(M, p_last_serious, X, Y) for X in transported_subgradients, + Y in transported_subgradients + ] + qm = QuadraticModel( + linearization_errors, + sparse(tril(H)); + A=reshape(ones(d), 1, d), + lcon=[one(eltype(linearization_errors))], + ucon=[one(eltype(linearization_errors))], + lvar=zeros(d), + uvar=[Inf for i in 1:d], + c0=zero(eltype(linearization_errors)), + ) + λ .= ripqp(qm; display=false).solution + return λ +end + +function proximal_bundle_method_subsolver( + M::A, p_last_serious, μ, approximation_errors, transported_subgradients +) where {A<:AbstractManifold} + d = length(approximation_errors) + λ = zeros(d) + proximal_bundle_method_subsolver!( + M, λ, p_last_serious, μ, approximation_errors, transported_subgradients + ) + return λ +end +function proximal_bundle_method_subsolver!( + M::A, λ, p_last_serious, μ, approximation_errors, transported_subgradients +) where {A<:AbstractManifold} + d = length(approximation_errors) + H = + 1 / μ * [ + inner(M, p_last_serious, X, Y) for X in transported_subgradients, + Y in transported_subgradients + ] + qm = QuadraticModel( + approximation_errors, + sparse(tril(H)); + A=reshape(ones(d), 1, d), + lcon=[one(eltype(approximation_errors))], + ucon=[one(eltype(approximation_errors))], + lvar=zeros(d), + uvar=[Inf for i in 1:d], + c0=zero(eltype(approximation_errors)), + ) + λ .= ripqp(qm; display=false).solution + return λ +end +end diff --git a/src/Manopt.jl b/src/Manopt.jl index adaa2b6e77..e4c26d4bbc 100644 --- a/src/Manopt.jl +++ b/src/Manopt.jl @@ -106,6 +106,8 @@ using ManifoldsBase: project, project!, rand!, + riemann_tensor, + riemann_tensor!, representation_size, requires_caching, retract, @@ -137,6 +139,7 @@ include("solvers/solver.jl") include("solvers/adaptive_regularization_with_cubics.jl") include("solvers/alternating_gradient_descent.jl") include("solvers/augmented_Lagrangian_method.jl") +include("solvers/convex_bundle_method.jl") include("solvers/ChambollePock.jl") include("solvers/conjugate_gradient_descent.jl") include("solvers/cyclic_proximal_point.jl") @@ -151,6 +154,7 @@ include("solvers/gradient_descent.jl") include("solvers/LevenbergMarquardt.jl") include("solvers/particle_swarm.jl") include("solvers/primal_dual_semismooth_Newton.jl") +include("solvers/proximal_bundle_method.jl") include("solvers/quasi_Newton.jl") include("solvers/truncated_conjugate_gradient_descent.jl") include("solvers/trust_regions.jl") @@ -200,6 +204,27 @@ Shape of an `Array{T,N}` of size `size`. global JuMP_ArrayShape function __init__() + # + # Error Hints + # + @static if isdefined(Base.Experimental, :register_error_hint) + Base.Experimental.register_error_hint(MethodError) do io, exc, argtypes, kwargs + if exc.f === convex_bundle_method_subsolver + print( + io, + "\nThe `convex_bundle_method_subsolver` has to be implemented. A default is available currently when loading QuadraticModels.jl and RipQP.jl. That is\n", + ) + printstyled(io, "`using QuadraticModels, RipQP`"; color=:cyan) + end + if exc.f === proximal_bundle_method_subsolver + print( + io, + "\nThe `proximal_bundle_method_subsolver` has to be implemented. A default is available currently when loading QuadraticModels.jl and RipQP.jl. That is\n", + ) + printstyled(io, "`using QuadraticModels, RipQP`"; color=:cyan) + end + end + end # # Requires fallback for Julia < 1.9 # @@ -219,7 +244,13 @@ function __init__() @require LRUCache = "8ac3fa9e-de4c-5943-b1dc-09c6b5f20637" begin include("../ext/ManoptLRUCacheExt.jl") end + @require QuadraticModels = "f468eda6-eac5-11e8-05a5-ff9e497bcd19" begin + @require RipQP = "1e40b3f8-35eb-4cd8-8edd-3e515bb9de08" begin + include("../ext/ManoptRipQPQuadraticModelsExt.jl") + end + end end + return nothing end # @@ -266,6 +297,7 @@ export AbstractGradientSolverState, AdaptiveRegularizationState, AlternatingGradientDescentState, AugmentedLagrangianMethodState, + ConvexBundleMethodState, ChambollePockState, ConjugateGradientDescentState, CyclicProximalPointState, @@ -280,6 +312,7 @@ export AbstractGradientSolverState, NelderMeadState, ParticleSwarmState, PrimalDualSemismoothNewtonState, + ProximalBundleMethodState, RecordSolverState, StochasticGradientDescentState, SubGradientMethodState, @@ -378,6 +411,8 @@ export adaptive_regularization_with_cubics, alternating_gradient_descent!, augmented_Lagrangian_method, augmented_Lagrangian_method!, + convex_bundle_method, + convex_bundle_method!, ChambollePock, ChambollePock!, conjugate_gradient_descent, @@ -403,6 +438,8 @@ export adaptive_regularization_with_cubics, particle_swarm, particle_swarm!, primal_dual_semismooth_Newton, + proximal_bundle_method, + proximal_bundle_method!, quasi_Newton, quasi_Newton!, stochastic_gradient_descent, @@ -441,11 +478,14 @@ export StopAfter, StopWhenAny, StopWhenChangeLess, StopWhenCostLess, + StopWhenCostNaN, StopWhenCurvatureIsNegative, StopWhenEntryChangeLess, StopWhenGradientChangeLess, StopWhenGradientNormLess, StopWhenFirstOrderProgress, + StopWhenIterateNaN, + StopWhenLagrangeMultiplierLess, StopWhenModelIncreased, StopWhenPopulationConcentrated, StopWhenSmallerOrEqual, @@ -473,6 +513,7 @@ export DebugProximalParameter, DebugWarnIfCostIncreases export DebugGradient, DebugGradientNorm, DebugStepsize export DebugWhenActive, DebugWarnIfFieldNotFinite, DebugIfEntry export DebugWarnIfCostNotFinite, DebugWarnIfFieldNotFinite +export DebugWarnIfLagrangeMultiplierIncreases export DebugWarnIfGradientNormTooLarge, DebugMessages # # Records - and access functions diff --git a/src/plans/bundle_plan.jl b/src/plans/bundle_plan.jl new file mode 100644 index 0000000000..c47362369f --- /dev/null +++ b/src/plans/bundle_plan.jl @@ -0,0 +1,158 @@ +# +# Common files for bunlde-based solvers +# + +function convex_bundle_method_subsolver end +function convex_bundle_method_subsolver! end +@doc raw""" + λ = convex_bundle_method_subsolver(M, p_last_serious, linearization_errors, transported_subgradients) + convex_bundle_method_subsolver!(M, λ, p_last_serious, linearization_errors, transported_subgradients) + +solver for the subproblem of the convex bundle method +at the last serious iterate ``p_k`` given the current linearization errors ``c_j^k``, +and transported subgradients ``\mathrm{P}_{p_k←q_j} X_{q_j}``. + +The computation can also be done in-place of `λ`. + +The subproblem for the convex bundle method is +```math +\begin{align*} + \operatorname*{arg\,min}_{λ ∈ ℝ^{\lvert J_k\rvert}}& + \frac{1}{2} \Bigl\lVert \sum_{j ∈ J_k} λ_j \mathrm{P}_{p_k←q_j} X_{q_j} \Bigr\rVert^2 + + \sum_{j ∈ J_k} λ_j \, c_j^k + \\ + \text{s. t.}\quad & + \sum_{j ∈ J_k} λ_j = 1, + \quad λ_j ≥ 0 + \quad \text{for all } + j ∈ J_k, +\end{align*} +``` + +where ``J_k = \{j ∈ J_{k-1} \ | \ λ_j > 0\} \cup \{k\}``. +See [BergmannHerzogJasa:2024](@cite) for mre details + +!!! tip + A default subsolver based on [`RipQP`.jl](https://github.com/JuliaSmoothOptimizers/RipQP.jl) and [`QuadraticModels`](https://github.com/JuliaSmoothOptimizers/QuadraticModels.jl) + is available if these two packages are loaded. +""" +convex_bundle_method_subsolver( + M, p_last_serious, linearization_errors, transported_subgradients +) + +function proximal_bundle_method_subsolver end +function proximal_bundle_method_subsolver! end +@doc raw""" + λ = proximal_bundle_method_subsolver(M, p_last_serious, μ, approximation_errors, transported_subgradients) + proximal_bundle_method_subsolver!(M, λ, p_last_serious, μ, approximation_errors, transported_subgradients) + +solver for the subproblem of the proximal bundle method. + +The subproblem for the proximal bundle method is +```math +\begin{align*} + \operatorname*{arg\,min}_{λ ∈ ℝ^{\lvert L_l\rvert}} & + \frac{1}{2 \mu_l} \Bigl\lVert \sum_{j ∈ L_l} λ_j \mathrm{P}_{p_k←q_j} X_{q_j} \Bigr\rVert^2 + + \sum_{j ∈ L_l} λ_j \, c_j^k + \\ + \text{s. t.} \quad & + \sum_{j ∈ L_l} λ_j = 1, + \quad λ_j ≥ 0 + \quad \text{for all } j ∈ L_l, +\end{align*} +``` +where ``L_l = \{k\}`` if ``q_k`` is a serious iterate, and ``L_l = L_{l-1} \cup \{k\}`` otherwise. +See [HoseiniMonjeziNobakhtianPouryayevali:2021](@cite). + +!!! tip + A default subsolver based on [`RipQP`.jl](https://github.com/JuliaSmoothOptimizers/RipQP.jl) and [`QuadraticModels`](https://github.com/JuliaSmoothOptimizers/QuadraticModels.jl) + is available if these two packages are loaded. +""" +proximal_bundle_method_subsolver( + M, p_last_serious, μ, approximation_errors, transported_subgradients +) + +@doc raw""" + StopWhenLagrangeMultiplierLess <: StoppingCriterion + +Stopping Criteria for Lagrange multipliers. + +Currenlty these are meant for the [`convex_bundle_method`](@ref) and [`proximal_bundle_method`](@ref), +where based on the Lagrange multipliers an approximate (sub)gradient ``g`` and an error estimate ``ε`` +is computed. + +In `mode=:both` we require that both +``ε`` and ``\lvert g \rvert`` are smaller than their `tolerance`s for the [`convex_bundle_method`](@ref), +and that +``c`` and ``\lvert d \rvert`` are smaller than their `tolerance`s for the [`proximal_bundle_method`](@ref). + +In the `mode=:estimate` we require that, for the [`convex_bundle_method`](@ref) +``-ξ = \lvert g \rvert^2 + ε`` is less than a given `tolerance`. +For the [`proximal_bundle_method`](@ref), the equation reads ``-ν = μ \lvert d \rvert^2 + c``. + +# Constructors + + StopWhenLagrangeMultiplierLess(tolerance=1e-6; mode::Symbol=:estimate) + +Create the stopping criterion for one of the `mode`s mentioned. +Note that tolerance can be a single number for the `:estimate` case, +but a vector of two values is required for the `:both` mode. +Here the first entry specifies the tolerance for ``ε`` (``c``), +the second the tolerance for ``\lvert g \rvert`` (``\lvert d \rvert``), respectively. +""" +mutable struct StopWhenLagrangeMultiplierLess{T<:Real,A<:AbstractVector{<:T}} <: + StoppingCriterion + tolerance::A + mode::Symbol + reason::String + at_iteration::Int + function StopWhenLagrangeMultiplierLess(tol::T; mode::Symbol=:estimate) where {T<:Real} + return new{T,Vector{T}}([tol], mode, "", 0) + end + function StopWhenLagrangeMultiplierLess( + tols::A; mode::Symbol=:estimate + ) where {T<:Real,A<:AbstractVector{<:T}} + return new{T,A}(tols, mode, "", 0) + end +end +function status_summary(sc::StopWhenLagrangeMultiplierLess) + s = length(sc.reason) > 0 ? "reached" : "not reached" + msg = "" + (sc.mode === :both) && (msg = " ε ≤ $(sc.tolerance[1]) and |g| ≤ $(sc.tolerance[2])") + (sc.mode === :estimate) && (msg = " -ξ ≤ $(sc.tolerance[1])") + return "Stopping parameter: $(msg) :\t$(s)" +end +function show(io::IO, sc::StopWhenLagrangeMultiplierLess) + return print( + io, + "StopWhenLagrangeMultiplierLess($(sc.tolerance); mode=:$(sc.mode))\n $(status_summary(sc))", + ) +end + +@doc raw""" + DebugWarnIfLagrangeMultiplierIncreases <: DebugAction + +print a warning if the Lagrange parameter based value ``-ξ`` of the bundle method increases. + +# Constructor + + DebugWarnIfLagrangeMultiplierIncreases(warn=:Once; tol=1e2) + +Initialize the warning to warning level (`:Once`) and introduce a tolerance for the test of `1e2`. + +The `warn` level can be set to `:Once` to only warn the first time the cost increases, +to `:Always` to report an increase every time it happens, and it can be set to `:No` +to deactivate the warning, then this [`DebugAction`](@ref) is inactive. +All other symbols are handled as if they were `:Always:` +""" +mutable struct DebugWarnIfLagrangeMultiplierIncreases <: DebugAction + status::Symbol + old_value::Float64 + tol::Float64 + function DebugWarnIfLagrangeMultiplierIncreases(warn::Symbol=:Once; tol=1e2) + return new(warn, Float64(Inf), tol) + end +end +function show(io::IO, di::DebugWarnIfLagrangeMultiplierIncreases) + return print(io, "DebugWarnIfLagrangeMultiplierIncreases(; tol=\"$(di.tol)\")") +end diff --git a/src/plans/debug.jl b/src/plans/debug.jl index 9800683674..73e932eb34 100644 --- a/src/plans/debug.jl +++ b/src/plans/debug.jl @@ -1101,6 +1101,7 @@ Note that the Shortcut symbols should all start with a capital letter. * `:Stepsize` creates a [`DebugStepsize`](@ref) * `:WarnCost` creates a [`DebugWarnIfCostNotFinite`](@ref) * `:WarnGradient` creates a [`DebugWarnIfFieldNotFinite`](@ref) for the `::Gradient`. +* `:WarnBundle` creates a [`DebugWarnIfLagrangeMultiplierIncreases`](@ref) * `:Time` creates a [`DebugTime`](@ref) * `:WarningMessages`creates a [`DebugMessages`](@ref)`(:Warning)` * `:InfoMessages`creates a [`DebugMessages`](@ref)`(:Info)` @@ -1118,6 +1119,7 @@ function DebugActionFactory(d::Symbol) (d == :Iterate) && return DebugIterate() (d == :Iteration) && return DebugIteration() (d == :Stepsize) && return DebugStepsize() + (d == :WarnBundle) && return DebugWarnIfLagrangeMultiplierIncreases() (d == :WarnCost) && return DebugWarnIfCostNotFinite() (d == :WarnGradient) && return DebugWarnIfFieldNotFinite(:Gradient) (d == :Time) && return DebugTime() diff --git a/src/plans/plan.jl b/src/plans/plan.jl index 8ae5a71a19..e40540f04c 100644 --- a/src/plans/plan.jl +++ b/src/plans/plan.jl @@ -105,6 +105,7 @@ include("record.jl") include("stopping_criterion.jl") include("stepsize.jl") +include("bundle_plan.jl") include("cost_plan.jl") include("gradient_plan.jl") include("hessian_plan.jl") diff --git a/src/plans/stopping_criterion.jl b/src/plans/stopping_criterion.jl index 843fe6a206..846015f463 100644 --- a/src/plans/stopping_criterion.jl +++ b/src/plans/stopping_criterion.jl @@ -623,6 +623,84 @@ function update_stopping_criterion!(c::StopWhenStepsizeLess, ::Val{:MinStepsize} return c end +""" + StopWhenCostNaN <: StoppingCriterion + +stop looking at the cost function of the optimization problem from within a [`AbstractManoptProblem`](@ref), i.e `get_cost(p,get_iterate(o))`. + +# Constructor + + StopWhenCostNaN() + +initialize the stopping criterion to NaN. +""" +mutable struct StopWhenCostNaN <: StoppingCriterion + reason::String + at_iteration::Int + StopWhenCostNaN() = new("", 0) +end +function (c::StopWhenCostNaN)( + p::AbstractManoptProblem, s::AbstractManoptSolverState, i::Int +) + if i == 0 # reset on init + c.reason = "" + c.at_iteration = 0 + end + if i > 0 && isnan(get_cost(p, get_iterate(s))) + c.reason = "The algorithm reached a cost function value ($(get_cost(p,get_iterate(s)))).\n" + c.at_iteration = 0 + return true + end + return false +end +function status_summary(c::StopWhenCostNaN) + has_stopped = length(c.reason) > 0 + s = has_stopped ? "reached" : "not reached" + return "f(x) is NaN:\t$s" +end +function show(io::IO, c::StopWhenCostNaN) + return print(io, "StopWhenCostNaN()\n $(status_summary(c))") +end + +""" + StopWhenIterateNaN <: StoppingCriterion + +stop looking at the cost function of the optimization problem from within a [`AbstractManoptProblem`](@ref), i.e `get_cost(p,get_iterate(o))`. + +# Constructor + + StopWhenIterateNaN() + +initialize the stopping criterion to NaN. +""" +mutable struct StopWhenIterateNaN <: StoppingCriterion + reason::String + at_iteration::Int + StopWhenIterateNaN() = new("", 0) +end +function (c::StopWhenIterateNaN)( + p::AbstractManoptProblem, s::AbstractManoptSolverState, i::Int +) + if i == 0 # reset on init + c.reason = "" + c.at_iteration = 0 + end + if i > 0 && any(isnan.(get_iterate(s))) + c.reason = "The algorithm reached a $(get_iterate(s)) iterate.\n" + c.at_iteration = 0 + return true + end + return false +end +function status_summary(c::StopWhenIterateNaN) + has_stopped = length(c.reason) > 0 + s = has_stopped ? "reached" : "not reached" + return "f(x) is NaN:\t$s" +end +function show(io::IO, c::StopWhenIterateNaN) + return print(io, "StopWhenIterateNaN()\n $(status_summary(c))") +end + @doc raw""" StopWhenSmallerOrEqual <: StoppingCriterion diff --git a/src/solvers/NelderMead.jl b/src/solvers/NelderMead.jl index 968e8705d1..06b4bbd377 100644 --- a/src/solvers/NelderMead.jl +++ b/src/solvers/NelderMead.jl @@ -74,7 +74,7 @@ after the description # Constructors - NelderMead(M[, population::NelderMeadSimplex]; kwargs...) + NelderMeadState(M[, population::NelderMeadSimplex]; kwargs...) Construct a Nelder-Mead Option with a default population (if not provided) of set of `dimension(M)+1` random points stored in [`NelderMeadSimplex`](@ref). diff --git a/src/solvers/conjugate_gradient_descent.jl b/src/solvers/conjugate_gradient_descent.jl index 5ac07f750e..b0b842f726 100644 --- a/src/solvers/conjugate_gradient_descent.jl +++ b/src/solvers/conjugate_gradient_descent.jl @@ -59,12 +59,14 @@ They all compute ``β_k`` such that this algorithm updates the search direction ```` # Input + * `M` a manifold ``\mathcal M`` * `f` a cost function ``F:\mathcal M→ℝ`` to minimize implemented as a function `(M,p) -> v` * `grad_f` the gradient ``\operatorname{grad}F:\mathcal M → T\mathcal M`` of ``F`` implemented also as `(M,x) -> X` * `p` an initial value ``x∈\mathcal M`` # Optional + * `coefficient`: ([`ConjugateDescentCoefficient`](@ref) `<:` [`DirectionUpdateRule`](@ref)) rule to compute the descent direction update coefficient ``β_k``, as a functor, where the resulting function maps are `(amp, cgs, i) -> β` with `amp` an [`AbstractManoptProblem`](@ref), diff --git a/src/solvers/convex_bundle_method.jl b/src/solvers/convex_bundle_method.jl new file mode 100644 index 0000000000..ba461c8e16 --- /dev/null +++ b/src/solvers/convex_bundle_method.jl @@ -0,0 +1,635 @@ +@doc raw""" + sectional_curvature(M, p, X, Y) + +compute the sectional curvature of a manifold ``\mathcal M`` at a point ``p \in \mathcal M`` +on two tangent vectors at ``p``. +The formula reads +```math + + \kappa_p(X, Y) = \frac{⟨R(X, Y, Y), X⟩_p}{\lVert X \rVert^2_p \lVert Y \rVert^2_p - ⟨X, Y⟩^2_p} + +``` +where ``R(X, Y, Y)`` is the [`riemann_tensor`](https://juliamanifolds.github.io/Manifolds.jl/stable/manifolds/connection.html#ManifoldsBase.riemann_tensor-Tuple{AbstractManifold,%20Any,%20AbstractBasis}) on ``\mathcal M``. + +# Input + +* `M`: a manifold ``\mathcal M`` +* `p`: a point ``p \in \mathcal M`` +* `X`: a tangent vector ``X \in T_p \mathcal M`` +* `Y`: a tangent vector ``Y \in T_p \mathcal M`` + +""" +function sectional_curvature(M, p, X, Y) + R = riemann_tensor(M, p, X, Y, Y) + return inner(M, p, R, X) / ((norm(M, p, X)^2 * norm(M, p, Y)^2) - (inner(M, p, X, Y)^2)) +end + +@doc raw""" + sectional_curvature(M, p) + +compute the sectional curvature of a manifold ``\mathcal M`` at a point ``p \in \mathcal M`` +on two random tangent vectors at ``p`` that are orthogonal to eachother. + +# See also +[`sectional_curvature`](@ref) +""" +function sectional_curvature(M, p) + X = rand(M; vector_at=p) + Y = rand(M; vector_at=p) + Y = Y - (inner(M, p, X, Y) / norm(M, p, X)^2 * X) + return sectional_curvature(M, p, X, Y) +end + +@doc raw""" + ζ_1(ω, δ) + +compute a curvature-dependent bound. +The formula reads + +```math +\zeta_{1, ω}(δ) +\coloneqq +\begin{cases} + 1 & \text{if } ω ≥ 0, \\ + \sqrt{-ω} \, δ \cot(\sqrt{-ω} \, δ) & \text{if } ω < 0, +\end{cases} +``` + +where ``ω ≤ κ_p`` for all ``p ∈ \mathcal U`` is a lower bound to the sectional curvature in +a (strongly geodesically convex) bounded subset ``\mathcal U ⊆ \mathcal M`` with diameter ``δ``. +""" +function ζ_1(k_min, diameter) + (k_min < zero(k_min)) && return sqrt(-k_min) * diameter * coth(sqrt(-k_min) * diameter) + return one(k_min) +end + +@doc raw""" + ζ_2(Ω, δ) + +compute a curvature-dependent bound. +The formula reads + +```math +\zeta_{2, Ω}(δ) \coloneqq +\begin{cases} + 1 & \text{if } Ω ≤ 0,\\ + \sqrt{Ω} \, δ \cot(\sqrt{Ω} \, δ) & \text{if } Ω > 0, +\end{cases} +``` + +where ``Ω ≥ κ_p`` for all ``p ∈ \mathcal U`` is an upper bound to the sectional curvature in +a (strongly geodesically convex) bounded subset ``\mathcal U ⊆ \mathcal M`` with diameter ``δ``. +""" +function ζ_2(k_max, diameter) + (k_max > zero(k_max)) && return sqrt(k_max) * diameter * cot(sqrt(k_max) * diameter) + return one(k_max) +end + +@doc raw""" + close_point(M, p, tol; retraction_method=default_retraction_method(M, typeof(p))) + +sample a random point close to ``p ∈ \mathcal M`` within a tolerance `tol` +and a [retraction](https://juliamanifolds.github.io/ManifoldsBase.jl/stable/retractions/). +""" +function close_point(M, p, tol; retraction_method=default_retraction_method(M, typeof(p))) + X = rand(M; vector_at=p) + X .= tol * rand() * X / norm(M, p, X) + return retract(M, p, X, retraction_method) +end + +@doc raw""" + ConvexBundleMethodState <: AbstractManoptSolverState +stores option values for a [`convex_bundle_method`](@ref) solver. + +# Fields + +* `atol_λ`: (`eps()`) tolerance parameter for the convex coefficients in λ +* `atol_errors`: (`eps()`) tolerance parameter for the linearization errors +* `bundle`: bundle that collects each iterate with the computed subgradient at the iterate +* `bundle_cap`: (`25`) the maximal number of elements the bundle is allowed to remember +* `diameter`: (`50.0`) estimate for the diameter of the level set of the objective function at the starting point +* `domain`: (`(M, p) -> isfinite(f(M, p))`) a function to that evaluates to true when the current candidate is in the domain of the objective `f`, and false otherwise, e.g. : domain = (M, p) -> p ∈ dom f(M, p) ? true : false +* `g`: descent direction +* `inverse_retraction_method`: the inverse retraction to use within +* `linearization_errors`: linearization errors at the last serious step +* `m`: (`1e-3`) the parameter to test the decrease of the cost: ``f(q_{k+1}) \le f(p_k) + m \xi``. +* `p`: current candidate point +* `p_last_serious`: last serious iterate +* `retraction_method`: the retraction to use within +* `stop`: a [`StoppingCriterion`](@ref) +* `transported_subgradients`: subgradients of the bundle that are transported to p_last_serious +* `vector_transport_method`: the vector transport method to use within +* `X`: (`zero_vector(M, p)`) the current element from the possible subgradients at `p` that was last evaluated. +* `stepsize`: ([`ConstantStepsize`](@ref)`(M)`) a [`Stepsize`](@ref) +* `ε`: convex combination of the linearization errors +* `λ`: convex coefficients that solve the subproblem +* `ξ`: the stopping parameter given by ``ξ = -\lvert g\rvert^2 – ε`` +* `ϱ`: curvature-dependent bound +* `sub_problem`: ([`convex_bundle_method_subsolver`]) a function that solves the sub problem on `M` given the last serious iterate `p_last_serious`, the linearization errors `linearization_errors`, and the transported subgradients `transported_subgradients`, +* `sub_state`: an [`AbstractEvaluationType`](@ref) indicating whether `sub_problem` works inplace of `λ` or allocates a solution + +# Constructor + + ConvexBundleMethodState(M::AbstractManifold, p; kwargs...) + +with keywords for all fields with defaults besides `p_last_serious` which obtains the same type as `p`. + You can use e.g. `X=` to specify the type of tangent vector to use + +## Keyword arguments + +* `k_min`: lower bound on the sectional curvature of the manifold +* `k_max`: upper bound on the sectional curvature of the manifold +* `k_size`: (`100`) sample size for the estimation of the bounds on the sectional curvature of the manifold +* `p_estimate`: (`p`) the point around which to estimate the sectional curvature of the manifold +""" +mutable struct ConvexBundleMethodState{ + P, + T, + Pr, + St, + R, + A<:AbstractVector{<:R}, + B<:AbstractVector{Tuple{<:P,<:T}}, + C<:AbstractVector{T}, + D, + I, + IR<:AbstractInverseRetractionMethod, + TR<:AbstractRetractionMethod, + TS<:Stepsize, + TSC<:StoppingCriterion, + VT<:AbstractVectorTransportMethod, +} <: AbstractManoptSolverState where {R<:Real,P,T,I<:Int,Pr,St} + atol_λ::R + atol_errors::R + bundle::B + bundle_cap::I + diameter::R + domain::D + g::T + inverse_retraction_method::IR + last_stepsize::R + linearization_errors::A + m::R + p::P + p_last_serious::P + retraction_method::TR + stepsize::TS + stop::TSC + transported_subgradients::C + vector_transport_method::VT + X::T + ε::R + ξ::R + λ::A + ϱ::R + sub_problem::Pr + sub_state::St + function ConvexBundleMethodState( + M::TM, + p::P; + atol_λ::R=eps(), + atol_errors::R=eps(), + bundle_cap::Integer=25, + m::R=1e-2, + diameter::R=50.0, + domain::D,#(M, p) -> isfinite(f(M, p)), + k_min=nothing, + k_max=nothing, + k_size::Int=100, + p_estimate=p, + stepsize::S=default_stepsize(M, SubGradientMethodState), + ϱ=nothing, + inverse_retraction_method::IR=default_inverse_retraction_method(M, typeof(p)), + retraction_method::TR=default_retraction_method(M, typeof(p)), + stopping_criterion::SC=StopWhenLagrangeMultiplierLess(1e-8) | + StopAfterIteration(5000), + X::T=zero_vector(M, p), + vector_transport_method::VT=default_vector_transport_method(M, typeof(p)), + sub_problem::Pr=convex_bundle_method_subsolver, + sub_state::St=AllocatingEvaluation(), + ) where { + D, + IR<:AbstractInverseRetractionMethod, + P, + T, + Pr, + St, + TM<:AbstractManifold, + TR<:AbstractRetractionMethod, + SC<:StoppingCriterion, + S<:Stepsize, + VT<:AbstractVectorTransportMethod, + R<:Real, + } + bundle = Vector{Tuple{P,T}}() + g = zero_vector(M, p) + last_stepsize = one(R) + linearization_errors = Vector{R}() + transported_subgradients = Vector{T}() + ε = zero(R) + λ = Vector{R}() + ξ = zero(R) + if ϱ === nothing + if (k_max === nothing) + s = [ + sectional_curvature( + M, + close_point( + M, p_estimate, diameter / 2; retraction_method=retraction_method + ), + ) for _ in 1:k_size + ] + k_max = maximum(s) + end + ϱ = ζ_2(k_max, diameter) + end + return new{ + P, + T, + Pr, + St, + typeof(m), + typeof(linearization_errors), + typeof(bundle), + typeof(transported_subgradients), + typeof(domain), + typeof(bundle_cap), + IR, + TR, + S, + SC, + VT, + }( + atol_λ, + atol_errors, + bundle, + bundle_cap, + diameter, + domain, + g, + inverse_retraction_method, + last_stepsize, + linearization_errors, + m, + p, + copy(M, p), + retraction_method, + stepsize, + stopping_criterion, + transported_subgradients, + vector_transport_method, + X, + ε, + ξ, + λ, + ϱ, + sub_problem, + sub_state, + ) + end +end +get_iterate(bms::ConvexBundleMethodState) = bms.p_last_serious +function set_iterate!(bms::ConvexBundleMethodState, M, p) + copyto!(M, bms.p_last_serious, p) + return bms +end +get_subgradient(bms::ConvexBundleMethodState) = bms.g + +function show(io::IO, cbms::ConvexBundleMethodState) + i = get_count(cbms, :Iterations) + Iter = (i > 0) ? "After $i iterations\n" : "" + Conv = indicates_convergence(cbms.stop) ? "Yes" : "No" + s = """ + # Solver state for `Manopt.jl`s Convex Bundle Method + $Iter + ## Parameters + * tolerance parameter for the convex coefficients: $(cbms.atol_λ) + * tolerance parameter for the linearization errors: $(cbms.atol_errors) + * bundle cap size: $(cbms.bundle_cap) + * current bundle size: $(length(cbms.bundle)) + * curvature-dependent bound: $(cbms.ϱ) + * descent test parameter: $(cbms.m) + * diameter: $(cbms.diameter) + * inverse retraction: $(cbms.inverse_retraction_method) + * retraction: $(cbms.retraction_method) + * Lagrange parameter value: $(cbms.ξ) + * vector transport: $(cbms.vector_transport_method) + + ## Stopping Criterion + $(status_summary(cbms.stop)) + This indicates convergence: $Conv""" + return print(io, s) +end + +@doc raw""" + convex_bundle_method(M, f, ∂f, p) + +perform a convex bundle method ``p_{j+1} = \mathrm{retr}(p_k, -g_k)``, where ``\mathrm{retr}`` +is a retraction and + +```math +g_k = \sum_{j\in J_k} λ_j^k \mathrm{P}_{p_k←q_j}X_{q_j}, +``` + +``p_k`` is the last serious iterate, ``X_{q_j} ∈ ∂f(q_j)``, and the ``λ_j^k`` are solutions +to the quadratic subproblem provided by the [`convex_bundle_method_subsolver`](@ref). + +Though the subdifferential might be set valued, the argument `∂f` should always +return one element from the subdifferential, but not necessarily deterministic. + +For more details, see [BergmannHerzogJasa:2024](@cite). + +# Input + +* `M`: a manifold ``\mathcal M`` +* `f` a cost function ``f:\mathcal M→ℝ`` to minimize +* `∂f`: the subgradient ``∂f: \mathcal M → T\mathcal M`` of f + restricted to always only returning one value/element from the subdifferential. + This function can be passed as an allocation function `(M, p) -> X` or + a mutating function `(M, X, p) -> X`, see `evaluation`. +* `p`: (`rand(M)`) an initial value ``p_0 ∈ \mathcal M`` + +# Optional + +* `atol_λ`: (`eps()`) tolerance parameter for the convex coefficients in λ. +* `atol_errors`: (`eps()`) tolerance parameter for the linearization errors. +* `m`: (`1e-3`) the parameter to test the decrease of the cost: ``f(q_{k+1}) \le f(p_k) + m \xi``. +* `diameter`: (`50.0`) estimate for the diameter of the level set of the objective function at the starting point. +* `domain`: (`(M, p) -> isfinite(f(M, p))`) a function to that evaluates to true when the current candidate is in the domain of the objective `f`, and false otherwise, e.g. : domain = (M, p) -> p ∈ dom f(M, p) ? true : false. +* `k_min`: lower bound on the sectional curvature of the manifold. +* `k_max`: upper bound on the sectional curvature of the manifold. +* `k_size`: (`100``) sample size for the estimation of the bounds on the sectional curvature of the manifold if `k_min` + and `k_max` are not provided. +* `p_estimate`: (`p`) the point around which to estimate the sectional curvature of the manifold. +* `α`: (`(i) -> one(number_eltype(X)) / i`) a function for evaluating suitable stepsizes when obtaining candidate points at iteration `i`. +* `ϱ`: curvature-dependent bound. +* `evaluation`: ([`AllocatingEvaluation`](@ref)) specify whether the subgradient works by + allocation (default) form `∂f(M, q)` or [`InplaceEvaluation`](@ref) in place, i.e. is + of the form `∂f!(M, X, p)`. +* `inverse_retraction_method`: (`default_inverse_retraction_method(M, typeof(p))`) an inverse retraction method to use +* `retraction`: (`default_retraction_method(M, typeof(p))`) a `retraction(M, p, X)` to use. +* `stopping_criterion`: ([`StopWhenLagrangeMultiplierLess`](@ref)`(1e-8)`) a functor, see[`StoppingCriterion`](@ref), indicating when to stop +* `vector_transport_method`: (`default_vector_transport_method(M, typeof(p))`) a vector transport method to use +* `sub_problem`: a function evaluating with new allocations that solves the sub problem on `M` given the last serious iterate `p_last_serious`, the linearization errors `linearization_errors`, and the transported subgradients `transported_subgradients` + +# Output + +the obtained (approximate) minimizer ``p^*``, see [`get_solver_return`](@ref) for details +""" +function convex_bundle_method( + M::AbstractManifold, f::TF, ∂f::TdF, p=rand(M); kwargs... +) where {TF,TdF} + p_star = copy(M, p) + return convex_bundle_method!(M, f, ∂f, p_star; kwargs...) +end +@doc raw""" + convex_bundle_method!(M, f, ∂f, p) + +perform a bundle method ``p_{j+1} = \mathrm{retr}(p_k, -g_k)`` in place of `p`. + +# Input + +* `M`: a manifold ``\mathcal M`` +* `f`: a cost function ``f:\mathcal M→ℝ`` to minimize +* `∂f`: the (sub)gradient ``∂f:\mathcal M→ T\mathcal M`` of F + restricted to always only returning one value/element from the subdifferential. + This function can be passed as an allocation function `(M, p) -> X` or + a mutating function `(M, X, p) -> X`, see `evaluation`. +* `p`: an initial value ``p_0=p ∈ \mathcal M`` + +for more details and all optional parameters, see [`convex_bundle_method`](@ref). +""" +function convex_bundle_method!( + M::AbstractManifold, + f::TF, + ∂f!!::TdF, + p; + atol_λ::R=eps(), + atol_errors::R=eps(), + bundle_cap::Int=25, + diameter::R=π / 3,# k_max -> k_max === nothing ? π/2 : (k_max ≤ zero(R) ? typemax(R) : π/3), + domain=(M, p) -> isfinite(f(M, p)), + m::R=1e-3, + k_min=nothing, + k_max=nothing, + k_size::Int=100, + p_estimate=p, + stepsize::Stepsize=DecreasingStepsize(1, 1, 0, 1, 0, :relative), + ϱ=nothing, + debug=[DebugWarnIfLagrangeMultiplierIncreases()], + evaluation::AbstractEvaluationType=AllocatingEvaluation(), + inverse_retraction_method::IR=default_inverse_retraction_method(M, typeof(p)), + retraction_method::TRetr=default_retraction_method(M, typeof(p)), + stopping_criterion::StoppingCriterion=StopWhenAny( + StopWhenLagrangeMultiplierLess(1e-8), StopAfterIteration(5000) + ), + vector_transport_method::VTransp=default_vector_transport_method(M, typeof(p)), + sub_problem=convex_bundle_method_subsolver, + sub_state=evaluation, + kwargs..., #especially may contain debug +) where {R<:Real,TF,TdF,TRetr,IR,VTransp} + sgo = ManifoldSubgradientObjective(f, ∂f!!; evaluation=evaluation) + dsgo = decorate_objective!(M, sgo; kwargs...) + mp = DefaultManoptProblem(M, dsgo) + bms = ConvexBundleMethodState( + M, + p; + atol_λ=atol_λ, + atol_errors=atol_errors, + bundle_cap=bundle_cap, + diameter=diameter, + domain=domain, + m=m, + k_min=k_min, + k_max=k_max, + k_size=k_size, + p_estimate=p_estimate, + stepsize=stepsize, + ϱ=ϱ, + inverse_retraction_method=inverse_retraction_method, + retraction_method=retraction_method, + stopping_criterion=stopping_criterion, + vector_transport_method=vector_transport_method, + sub_problem=sub_problem, + sub_state=sub_state, + ) + bms = decorate_state!(bms; debug=debug, kwargs...) + return get_solver_return(solve!(mp, bms)) +end + +function initialize_solver!( + mp::AbstractManoptProblem, bms::ConvexBundleMethodState{P,T,Pr,St,R} +) where {P,T,Pr,St,R} + M = get_manifold(mp) + copyto!(M, bms.p_last_serious, bms.p) + get_subgradient!(mp, bms.X, bms.p) + copyto!(M, bms.g, bms.p_last_serious, bms.X) + empty!(bms.bundle) + push!(bms.bundle, (copy(M, bms.p), copy(M, bms.p, bms.X))) + empty!(bms.λ) + push!(bms.λ, zero(R)) + empty!(bms.linearization_errors) + push!(bms.linearization_errors, zero(R)) + empty!(bms.transported_subgradients) + push!(bms.transported_subgradients, zero_vector(M, bms.p)) + return bms +end +function step_solver!(mp::AbstractManoptProblem, bms::ConvexBundleMethodState, i) + M = get_manifold(mp) + # Refactor to inplace + for (j, (qj, Xj)) in enumerate(bms.bundle) + vector_transport_to!( + M, + bms.transported_subgradients[j], + qj, + Xj, + bms.p_last_serious, + bms.vector_transport_method, + ) + end + _convex_bundle_subsolver!(M, bms) + bms.g .= sum(bms.λ .* bms.transported_subgradients) + bms.ε = sum(bms.λ .* bms.linearization_errors) + bms.ξ = (-norm(M, bms.p_last_serious, bms.g)^2) - (bms.ε) + j = 1 + step = get_stepsize(mp, bms, j) + retract!(M, bms.p, bms.p_last_serious, -step * bms.g, bms.retraction_method) + while !bms.domain(M, bms.p) || + distance(M, bms.p, bms.p_last_serious) < step * norm(M, bms.p_last_serious, bms.g) + j += 1 + step = get_stepsize(mp, bms, j) + retract!(M, bms.p, bms.p_last_serious, -step * bms.g, bms.retraction_method) + end + bms.last_stepsize = step + get_subgradient!(mp, bms.X, bms.p) + if get_cost(mp, bms.p) ≤ (get_cost(mp, bms.p_last_serious) + bms.m * bms.ξ) + copyto!(M, bms.p_last_serious, bms.p) + end + v = findall(λj -> λj ≤ bms.atol_λ, bms.λ) + if !isempty(v) + deleteat!(bms.bundle, v) + # Update sizes of subgradient and lambda linearization errors as well + deleteat!(bms.λ, v) + deleteat!(bms.linearization_errors, v) + deleteat!(bms.transported_subgradients, v) + end + l = length(bms.bundle) + if l == bms.bundle_cap + # + deleteat!(bms.bundle, 1) + deleteat!(bms.λ, 1) + deleteat!(bms.linearization_errors, 1) + push!(bms.bundle, (copy(M, bms.p), copy(M, bms.p, bms.X))) + push!(bms.linearization_errors, 0.0) + push!(bms.λ, 0.0) + else + # push to bundle and update subgradients, λ and linearization_errors (+1 in length) + push!(bms.bundle, (copy(M, bms.p), copy(M, bms.p, bms.X))) + push!(bms.linearization_errors, 0.0) + push!(bms.λ, 0.0) + push!(bms.transported_subgradients, zero_vector(M, bms.p)) + end + for (j, (qj, Xj)) in enumerate(bms.bundle) + v = + get_cost(mp, bms.p_last_serious) - get_cost(mp, qj) - ( + bms.ϱ * inner( + M, + qj, + Xj, + inverse_retract( + M, qj, bms.p_last_serious, bms.inverse_retraction_method + ), + ) + ) + bms.linearization_errors[j] = (0 ≥ v ≥ -bms.atol_errors) ? 0 : v + end + return bms +end +get_solver_result(bms::ConvexBundleMethodState) = bms.p_last_serious +get_last_stepsize(bms::ConvexBundleMethodState) = bms.last_stepsize + +# +# +# Dispatching on different types of subsolvers +# (a) closed form allocating +function _convex_bundle_subsolver!( + M, bms::ConvexBundleMethodState{P,T,F,AllocatingEvaluation} +) where {P,T,F} + bms.λ = bms.sub_problem( + M, bms.p_last_serious, bms.linearization_errors, bms.transported_subgradients + ) + return bms +end +# (b) closed form in-place +function _convex_bundle_subsolver!( + M, bms::ConvexBundleMethodState{P,T,F,InplaceEvaluation} +) where {P,T,F} + bms.sub_problem( + M, bms.λ, bms.p_last_serious, bms.linearization_errors, bms.transported_subgradients + ) + return bms +end +# (c) if necessary one could implement the case where we have problem and state and call solve! + +# +# Lagrange stopping crtierion +function (sc::StopWhenLagrangeMultiplierLess)( + mp::AbstractManoptProblem, bms::ConvexBundleMethodState, i::Int +) + if i == 0 # reset on init + sc.reason = "" + sc.at_iteration = 0 + end + M = get_manifold(mp) + if (sc.mode == :estimate) && (-bms.ξ ≤ sc.tolerance[1]) && (i > 0) + sc.reason = "After $i iterations the algorithm reached an approximate critical point: the parameter -ξ = $(-bms.ξ) ≤ $(sc.tolerance[1]).\n" + sc.at_iteration = i + return true + end + ng = norm(M, bms.p_last_serious, bms.g) + if (sc.mode == :both) && (bms.ε ≤ sc.tolerance[1]) && (ng ≤ sc.tolerance[2]) && (i > 0) + sc.reason = "After $i iterations the algorithm reached an approximate critical point: the parameter ε = $(bms.ε) ≤ $(sc.tolerance[1]) and |g| = $(ng) ≤ $(sc.tolerance[2]).\n" + sc.at_iteration = i + return true + end + return false +end + +function (d::DebugWarnIfLagrangeMultiplierIncreases)( + ::AbstractManoptProblem, st::ConvexBundleMethodState, i::Int +) + (i < 1) && (return nothing) + if d.status !== :No + new_value = -st.ξ + if new_value ≥ d.old_value * d.tol + @warn """The Lagrange multiplier increased by at least $(d.tol). + At iteration #$i the negative of the Lagrange multiplier, -ξ, increased from $(d.old_value) to $(new_value).\n + Consider decreasing either the `diameter` keyword argument, or one + of the parameters involved in the estimation of the sectional curvature, such as `k_min`, + `k_max`, or `ϱ` in the `convex_bundle_method` call. + """ + if d.status === :Once + @warn "Further warnings will be supressed, use DebugWarnIfLagrangeMultiplierIncreases(:Always) to get all warnings." + d.status = :No + end + elseif new_value < zero(number_eltype(st.ξ)) + @warn """The Lagrange multiplier is positive. + At iteration #$i the negative of the Lagrange multiplier, -ξ, became negative.\n + Consider increasing either the `diameter` keyword argument, or changing + one of the parameters involved in the estimation of the sectional curvature, such as `k_min`, + `k_max`, or `ϱ` in the `convex_bundle_method` call. + """ + else + d.old_value = min(d.old_value, new_value) + end + end + return nothing +end + +function (d::DebugStepsize)( + ::P, bms::ConvexBundleMethodState, i::Int +) where {P<:AbstractManoptProblem} + (i < 1) && return nothing + Printf.format(d.io, Printf.Format(d.format), get_last_stepsize(bms)) + return nothing +end diff --git a/src/solvers/proximal_bundle_method.jl b/src/solvers/proximal_bundle_method.jl new file mode 100644 index 0000000000..49581b4fdf --- /dev/null +++ b/src/solvers/proximal_bundle_method.jl @@ -0,0 +1,502 @@ +@doc raw""" + ProximalBundleMethodState <: AbstractManoptSolverState + +stores option values for a [`proximal_bundle_method`](@ref) solver. + +# Fields + +* `approx_errors`: approximation of the linearization errors at the last serious step +* `bundle`: bundle that collects each iterate with the computed subgradient at the iterate +* `bundle_size`: (`50`) the maximal size of the bundle +* `c`: convex combination of the approximation errors +* `d`: descent direction +* `inverse_retraction_method`: the inverse retraction to use within +* `m`: (`0.0125`) the parameter to test the decrease of the cost +* `p`: current candidate point +* `p_last_serious`: last serious iterate +* `retraction_method`: the retraction to use within +* `stop`: a [`StoppingCriterion`](@ref) +* `transported_subgradients`: subgradients of the bundle that are transported to p_last_serious +* `vector_transport_method`: the vector transport method to use within +* `X`: (`zero_vector(M, p)`) the current element from the possible subgradients + at `p` that was last evaluated. +* `α₀`: (`1.2`) initalization value for `α`, used to update `η` +* `α`: curvature-dependent parameter used to update `η` +* `ε`: (`1e-2`) stepsize-like parameter related to the injectivity radius of the manifold +* `δ`: parameter for updating `μ`: if ``δ < 0`` then ``μ = \log(i + 1)``, else ``μ += δ μ`` +* `η`: curvature-dependent term for updating the approximation errors +* `λ`: convex coefficients that solve the subproblem +* `μ`: (`0.5`) (initial) proximal parameter for the subproblem +* `ν`: the stopping parameter given by ``ν = - μ |d|^2 - c`` +* `sub_problem`: a function evaluating with new allocations that solves the sub problem on `M` given the last serious iterate `p_last_serious`, the linearization errors `linearization_errors`, and the transported subgradients `transported_subgradients`, + +# Constructor + +ProximalBundleMethodState(M::AbstractManifold, p; kwargs...) + +with keywords for all fields above besides `p_last_serious` which obtains the same type as `p`. +You can use e.g. `X=` to specify the type of tangent vector to use + +""" +mutable struct ProximalBundleMethodState{ + P, + T, + Pr, + St, + IR<:AbstractInverseRetractionMethod, + TR<:AbstractRetractionMethod, + TSC<:StoppingCriterion, + VT<:AbstractVectorTransportMethod, + R<:Real, +} <: AbstractManoptSolverState where {P,T,Pr} + approx_errors::AbstractVector{R} + bundle::AbstractVector{Tuple{P,T}} + c::R + d::T + inverse_retraction_method::IR + lin_errors::AbstractVector{R} + m::R + p::P + p_last_serious::P + retraction_method::TR + bundle_size::Integer + stop::TSC + transported_subgradients::AbstractVector{T} + vector_transport_method::VT + X::T + α::R + α₀::R + ε::R + δ::R + η::R + λ::AbstractVector{R} + μ::R + ν::R + sub_problem::Pr + sub_state::St + function ProximalBundleMethodState( + M::TM, + p::P; + m::R=0.0125, + inverse_retraction_method::IR=default_inverse_retraction_method(M, typeof(p)), + retraction_method::TR=default_retraction_method(M, typeof(p)), + stopping_criterion::SC=StopWhenLagrangeMultiplierLess(1e-8) | + StopAfterIteration(5000), + bundle_size::Integer=50, + vector_transport_method::VT=default_vector_transport_method(M, typeof(p)), + X::T=zero_vector(M, p), + α₀::R=1.2, + ε::R=1e-2, + δ::R=1.0, + μ::R=0.5, + sub_problem::Pr=proximal_bundle_method_subsolver, + sub_state::St=AllocatingEvaluation(), + ) where { + P, + T, + Pr, + St, + R<:Real, + IR<:AbstractInverseRetractionMethod, + TM<:AbstractManifold, + TR<:AbstractRetractionMethod, + SC<:StoppingCriterion, + VT<:AbstractVectorTransportMethod, + } + # Initialize indes set, bundle points, linearization errors, and stopping parameter + approx_errors = [zero(R)] + bundle = [(copy(M, p), copy(M, p, X))] + c = zero(R) + d = copy(M, p, X) + lin_errors = [zero(R)] + transported_subgradients = [copy(M, p, X)] + α = zero(R) + λ = [zero(R)] + η = zero(R) + ν = zero(R) + return new{P,T,Pr,St,IR,TR,SC,VT,R}( + approx_errors, + bundle, + c, + d, + inverse_retraction_method, + lin_errors, + m, + p, + copy(M, p), + retraction_method, + bundle_size, + stopping_criterion, + transported_subgradients, + vector_transport_method, + X, + α, + α₀, + ε, + δ, + η, + λ, + μ, + ν, + sub_problem, + sub_state, + ) + end +end +get_iterate(pbms::ProximalBundleMethodState) = pbms.p_last_serious +function set_iterate!(pbms::ProximalBundleMethodState, M, p) + copyto!(M, pbms.p_last_serious, p) + return pbms +end +get_subgradient(pbms::ProximalBundleMethodState) = pbms.d + +function show(io::IO, pbms::ProximalBundleMethodState) + i = get_count(pbms, :Iterations) + Iter = (i > 0) ? "After $i iterations\n" : "" + Conv = indicates_convergence(pbms.stop) ? "Yes" : "No" + s = """ + # Solver state for `Manopt.jl`s Proximal Bundle Method + $Iter + + ## Parameters + + * bundle size: $(pbms.bundle_size) + * inverse retraction: $(pbms.inverse_retraction_method) + * descent test parameter: $(pbms.m) + * retraction: $(pbms.retraction_method) + * vector transport: $(pbms.vector_transport_method) + * stopping parameter value: $(pbms.ν) + * curvature-dependent α: $(pbms.α) + * stepsize-like parameter ε: $(pbms.ε) + * update parameter for proximal parameter, δ: $(pbms.δ) + * curvature-dependent η: $(pbms.η) + * proximal parameter μ: $(pbms.μ) + + ## Stopping Criterion + $(status_summary(pbms.stop)) + This indicates convergence: $Conv""" + return print(io, s) +end + +@doc raw""" + proximal_bundle_method(M, f, ∂f, p) + +perform a proximal bundle method ``p_{j+1} = \mathrm{retr}(p_k, -d_k)``, where +```math +d_k = \frac{1}{\mu_l} \sum_{j\in J_k} λ_j^k \mathrm{P}_{p_k←q_j}X_{q_j}, +``` +where ``X_{q_j}\in∂f(q_j)``, ``\mathrm{retr}`` is a retraction, +``p_k`` is the last serious iterate, ``\mu_l`` is a proximal parameter, and the +``λ_j^k`` are solutionsto the quadratic subproblem provided by the +[`proximal_bundle_method_subsolver`](@ref). + +Though the subdifferential might be set valued, the argument `∂f` should always +return _one_ element from the subdifferential, but not necessarily deterministic. + +For more details see [HoseiniMonjeziNobakhtianPouryayevali:2021](@cite). + +# Input + +* `M`: a manifold ``\mathcal M`` +* `f`: a cost function ``F:\mathcal M → ℝ`` to minimize +* `∂f`: the (sub)gradient ``∂ f: \mathcal M → T\mathcal M`` of f + restricted to always only returning one value/element from the subdifferential. + This function can be passed as an allocation function `(M, p) -> X` or + a mutating function `(M, X, p) -> X`, see `evaluation`. +* `p`: an initial value ``p ∈ \mathcal M`` + +# Optional + +* `m`: a real number that controls the decrease of the cost function +* `evaluation` – ([`AllocatingEvaluation`](@ref)) specify whether the subgradient works by + allocation (default) form `∂f(M, q)` or [`InplaceEvaluation`](@ref) in place, i.e. is + of the form `∂f!(M, X, p)`. +* `inverse_retraction_method`: (`default_inverse_retraction_method(M, typeof(p))`) an inverse retraction method to use +* `retraction` – (`default_retraction_method(M, typeof(p))`) a `retraction(M, p, X)` to use. +* `stopping_criterion` – ([`StopWhenLagrangeMultiplierLess`](@ref)`(1e-8)`) + a functor, see[`StoppingCriterion`](@ref), indicating when to stop. +* `vector_transport_method`: (`default_vector_transport_method(M, typeof(p))`) a vector transport method to use +... +and the ones that are passed to [`decorate_state!`](@ref) for decorators. + +# Output + +the obtained (approximate) minimizer ``p^*``, see [`get_solver_return`](@ref) for details +""" +function proximal_bundle_method( + M::AbstractManifold, f::TF, ∂f::TdF, p; kwargs... +) where {TF,TdF} + p_star = copy(M, p) + return proximal_bundle_method!(M, f, ∂f, p_star; kwargs...) +end +@doc raw""" + proximal_bundle_method!(M, f, ∂f, p) + +perform a proximal bundle method ``p_{j+1} = \mathrm{retr}(p_k, -d_k)`` in place of `p` + +# Input + +* `M` – a manifold ``\mathcal M`` +* `f` – a cost function ``f:\mathcal M→ℝ`` to minimize +* `∂f`- the (sub)gradient ``\partial f:\mathcal M→ T\mathcal M`` of F + restricted to always only returning one value/element from the subdifferential. + This function can be passed as an allocation function `(M, p) -> X` or + a mutating function `(M, X, p) -> X`, see `evaluation`. +* `p` – an initial value ``p_0=p ∈ \mathcal M`` + +for more details and all optional parameters, see [`proximal_bundle_method`](@ref). +""" +function proximal_bundle_method!( + M::AbstractManifold, + f::TF, + ∂f!!::TdF, + p; + m=0.0125, + evaluation::AbstractEvaluationType=AllocatingEvaluation(), + inverse_retraction_method::IR=default_inverse_retraction_method(M, typeof(p)), + retraction_method::TRetr=default_retraction_method(M, typeof(p)), + bundle_size=50, + stopping_criterion::StoppingCriterion=StopWhenLagrangeMultiplierLess(1e-8) | + StopAfterIteration(5000), + vector_transport_method::VTransp=default_vector_transport_method(M, typeof(p)), + α₀=1.2, + ε=1e-2, + δ=-1.0,#0.0, + μ=0.5,#1.0, + sub_problem=proximal_bundle_method_subsolver, + sub_state=evaluation, + kwargs..., #especially may contain debug +) where {TF,TdF,TRetr,IR,VTransp} + sgo = ManifoldSubgradientObjective(f, ∂f!!; evaluation=evaluation) + dsgo = decorate_objective!(M, sgo; kwargs...) + mp = DefaultManoptProblem(M, dsgo) + pbms = ProximalBundleMethodState( + M, + p; + m=m, + inverse_retraction_method=inverse_retraction_method, + retraction_method=retraction_method, + bundle_size=bundle_size, + stopping_criterion=stopping_criterion, + vector_transport_method=vector_transport_method, + α₀=α₀, + ε=ε, + δ=δ, + μ=μ, + sub_problem=sub_problem, + sub_state=sub_state, + ) + pbms = decorate_state!(pbms; kwargs...) + return get_solver_return(solve!(mp, pbms)) +end +function initialize_solver!(mp::AbstractManoptProblem, pbms::ProximalBundleMethodState) + M = get_manifold(mp) + copyto!(M, pbms.p_last_serious, pbms.p) + get_subgradient!(mp, pbms.X, pbms.p) + pbms.bundle = [(copy(M, pbms.p), copy(M, pbms.p, pbms.X))] + empty!(pbms.λ) + push!(pbms.λ, zero(eltype(pbms.p))) + return pbms +end +function step_solver!(mp::AbstractManoptProblem, pbms::ProximalBundleMethodState, i) + M = get_manifold(mp) + pbms.transported_subgradients = [ + if qj ≈ pbms.p_last_serious + Xj + else + vector_transport_to( + M, qj, Xj, pbms.p_last_serious, pbms.vector_transport_method + ) + + pbms.η * + inverse_retract(M, pbms.p_last_serious, qj, pbms.inverse_retraction_method) + end for (qj, Xj) in pbms.bundle + ] + v = [ + -2 * ej / + norm( + M, + pbms.p_last_serious, + inverse_retract(M, pbms.p_last_serious, qj, pbms.inverse_retraction_method), + )^2 for + (ej, (qj, Xj)) in zip(pbms.lin_errors, pbms.bundle) if !(qj ≈ pbms.p_last_serious) + ] + if !isempty(v) + pbms.η = pbms.α₀ + max(pbms.α₀, pbms.α, maximum(v)) + else + pbms.η = pbms.α₀ + max(pbms.α₀, pbms.α) + end + _proximal_bundle_subsolver!(M, pbms) + pbms.c = sum(pbms.λ .* pbms.approx_errors) + pbms.d .= -1 / pbms.μ .* sum(pbms.λ .* pbms.transported_subgradients) + nd = norm(M, pbms.p_last_serious, pbms.d) + pbms.ν = -pbms.μ * norm(M, pbms.p_last_serious, pbms.d)^2 - pbms.c + if nd ≤ pbms.ε + retract!(M, pbms.p, pbms.p_last_serious, pbms.d, pbms.retraction_method) + get_subgradient!(mp, pbms.X, pbms.p) + pbms.α = 0.0 + else + retract!( + M, pbms.p, pbms.p_last_serious, pbms.ε * pbms.d / nd, pbms.retraction_method + ) + get_subgradient!(mp, pbms.X, pbms.p) + pbms.α = + -inner( + M, + pbms.p_last_serious, + pbms.d, + vector_transport_to( + M, pbms.p, pbms.X, pbms.p_last_serious, pbms.vector_transport_method + ), + ) / (pbms.ε * nd) + end + if get_cost(mp, pbms.p) ≤ (get_cost(mp, pbms.p_last_serious) + pbms.m * pbms.ν) + copyto!(M, pbms.p_last_serious, pbms.p) + if pbms.δ < zero(eltype(pbms.μ)) + pbms.μ = log(i + 1) + else + pbms.μ += pbms.δ * pbms.μ + end + pbms.bundle = [(copy(M, pbms.p), copy(M, pbms.p, pbms.X))] + pbms.lin_errors = [0.0] + pbms.approx_errors = [0.0] + else + push!(pbms.bundle, (copy(M, pbms.p), copy(M, pbms.p, pbms.X))) + push!( + pbms.lin_errors, + get_cost(mp, pbms.p_last_serious) - get_cost(mp, pbms.p) + inner( + M, + pbms.p_last_serious, + vector_transport_to( + M, pbms.p, pbms.X, pbms.p_last_serious, pbms.vector_transport_method + ), + inverse_retract( + M, pbms.p_last_serious, pbms.p, pbms.inverse_retraction_method + ), + ), + ) + push!( + pbms.approx_errors, + get_cost(mp, pbms.p_last_serious) - get_cost(mp, pbms.p) + + inner( + M, + pbms.p_last_serious, + vector_transport_to( + M, pbms.p, pbms.X, pbms.p_last_serious, pbms.vector_transport_method + ), + inverse_retract( + M, pbms.p_last_serious, pbms.p, pbms.inverse_retraction_method + ), + ) + + pbms.η / 2 * + norm( + M, + pbms.p_last_serious, + inverse_retract( + M, pbms.p_last_serious, pbms.p, pbms.inverse_retraction_method + ), + )^2, + ) + if length(pbms.bundle) == pbms.bundle_size + deleteat!(pbms.bundle, 1) + deleteat!(pbms.lin_errors, 1) + deleteat!(pbms.approx_errors, 1) + end + end + return pbms +end +get_solver_result(pbms::ProximalBundleMethodState) = pbms.p_last_serious + +# +# +# Dispatching on different types of subsolvers +# (a) closed form allocating +function _proximal_bundle_subsolver!( + M, pbms::ProximalBundleMethodState{P,T,F,AllocatingEvaluation} +) where {P,T,F} + pbms.λ = pbms.sub_problem( + M, pbms.p_last_serious, pbms.μ, pbms.approx_errors, pbms.transported_subgradients + ) + return pbms +end +# (b) closed form in-place +function _proximal_bundle_subsolver!( + M, pbms::ProximalBundleMethodState{P,T,F,InplaceEvaluation} +) where {P,T,F} + pbms.sub_problem( + M, + pbms.λ, + pbms.p_last_serious, + pbms.μ, + pbms.approx_errors, + pbms.transported_subgradients, + ) + return pbms +end + +function (sc::StopWhenLagrangeMultiplierLess)( + mp::AbstractManoptProblem, pbms::ProximalBundleMethodState, i::Int +) + if i == 0 # reset on init + sc.reason = "" + sc.at_iteration = 0 + end + M = get_manifold(mp) + if (sc.mode == :estimate) && (-pbms.ν ≤ sc.tolerance[1]) && (i > 0) + sc.reason = "After $i iterations the algorithm reached an approximate critical point: the parameter -ν = $(-pbms.ν) ≤ $(sc.tolerance[1]).\n" + sc.at_iteration = i + return true + end + nd = norm(M, pbms.p_last_serious, pbms.d) + if (sc.mode == :both) && (pbms.c ≤ sc.tolerance[1]) && (nd ≤ sc.tolerance[2]) && (i > 0) + sc.reason = "After $i iterations the algorithm reached an approximate critical point: the parameter c = $(pbms.c) ≤ $(sc.tolerance[1]) and |d| = $(nd) ≤ $(sc.tolerance[2]).\n" + sc.at_iteration = i + return true + end + return false +end + +@doc raw""" + DebugWarnIfLagrangeMultiplierIncreases <: DebugAction + +print a warning if the stopping parameter of the bundle method increases. + +# Constructor + DebugWarnIfLagrangeMultiplierIncreases(warn=:Once; tol=1e2) + +Initialize the warning to warning level (`:Once`) and introduce a tolerance for the test of `1e2`. + +The `warn` level can be set to `:Once` to only warn the first time the cost increases, +to `:Always` to report an increase every time it happens, and it can be set to `:No` +to deactivate the warning, then this [`DebugAction`](@ref) is inactive. +All other symbols are handled as if they were `:Always:` +""" +function (d::DebugWarnIfLagrangeMultiplierIncreases)( + ::AbstractManoptProblem, st::ProximalBundleMethodState, i::Int +) + (i < 1) && (return nothing) + if d.status !== :No + new_value = -st.ν + if new_value ≥ d.old_value * d.tol + @warn """The stopping parameter increased by at least $(d.tol). + At iteration #$i the stopping parameter -ν increased from $(d.old_value) to $(new_value).\n + Consider changing either the initial proximal parameter `μ`, its update coefficient `δ`, or + the stepsize-like parameter `ε` related to the invectivity radius of the manifold in the + `proximal_bundle_method` call. + """ + if d.status === :Once + @warn "Further warnings will be supressed, use DebugWarnIfLagrangeMultiplierIncreases(:Always) to get all warnings." + d.status = :No + end + elseif new_value < zero(number_eltype(st.ν)) + @warn """The stopping parameter is negative. + At iteration #$i the stopping parameter -ν became negative.\n + Consider changing either the initial proximal parameter `μ`, its update coefficient `δ`, or + the stepsize-like parameter `ε` related to the invectivity radius of the manifold in the + `proximal_bundle_method` call. + """ + else + d.old_value = min(d.old_value, new_value) + end + end + return nothing +end diff --git a/src/solvers/subgradient.jl b/src/solvers/subgradient.jl index 78fb3ae7ad..ac4d81cfc7 100644 --- a/src/solvers/subgradient.jl +++ b/src/solvers/subgradient.jl @@ -89,7 +89,7 @@ where ``\mathrm{retr}`` is a retraction, ``s_k`` is a step size, usually the Though the subgradient might be set valued, the argument `∂f` should always return _one_ element from the subgradient, but not necessarily deterministic. -For more detils see [FerreiraOliveira:1998](@cite). +For more details see [FerreiraOliveira:1998](@cite). # Input diff --git a/test/plans/test_conjugate_gradient_plan.jl b/test/plans/test_conjugate_gradient_plan.jl index 89931ce1b5..4e270cc5bd 100644 --- a/test/plans/test_conjugate_gradient_plan.jl +++ b/test/plans/test_conjugate_gradient_plan.jl @@ -16,12 +16,20 @@ Manopt.update_rule_storage_vectors(::DummyCGCoeff) = Tuple{} p0 = [1.0, 0.0] pr = DefaultManoptProblem(M, ManifoldGradientObjective(f, grad_f)) cgs2 = ConjugateGradientDescentState( - M, p0, StopAfterIteration(2), ConstantStepsize(1.0), dur2 + M, + p0; + stopping_criterion=StopAfterIteration(2), + stepsize=ConstantStepsize(1.0), + coefficient=dur2, ) cgs2.X = [0.0, 0.2] @test cgs2.coefficient(pr, cgs2, 1) != 0 cgs3 = ConjugateGradientDescentState( - M, p0, StopAfterIteration(2), ConstantStepsize(1.0), dur3 + M, + p0; + stopping_criterion=StopAfterIteration(2), + stepsize=ConstantStepsize(1.0), + coefficient=dur3, ) cgs3.X = [0.0, 0.2] @test cgs3.coefficient(pr, cgs3, 1) == 0 diff --git a/test/plans/test_debug.jl b/test/plans/test_debug.jl index c7a989cc55..1205d03ff6 100644 --- a/test/plans/test_debug.jl +++ b/test/plans/test_debug.jl @@ -255,6 +255,8 @@ Manopt.get_message(::TestMessageState) = "DebugTest" @test isa(df1[:All].group[1], DebugWarnIfCostNotFinite) df2 = DebugFactory([:WarnGradient]) @test isa(df2[:All].group[1], DebugWarnIfFieldNotFinite) + df3 = DebugFactory([:WarnBundle]) + @test isa(df3[:All].group[1], DebugWarnIfLagrangeMultiplierIncreases) end @testset "Debug Time" begin io = IOBuffer() diff --git a/test/plans/test_record.jl b/test/plans/test_record.jl index 7452e9d877..ee83988419 100644 --- a/test/plans/test_record.jl +++ b/test/plans/test_record.jl @@ -123,7 +123,7 @@ using Manifolds, Manopt, Test, ManifoldsBase, Dates @test e.recorded_values == [1.0] # no p0 -> assume p is the first iterate dinvretr = RecordChange(; inverse_retraction_method=PolarInverseRetraction()) - dmani = RecordChange(Symplectic(2)) + dmani = RecordChange(SymplecticMatrices(2)) @test dinvretr.inverse_retraction_method === PolarInverseRetraction() @test dmani.inverse_retraction_method === CayleyInverseRetraction() @test d.inverse_retraction_method === LogarithmicInverseRetraction() diff --git a/test/plans/test_stopping_criteria.jl b/test/plans/test_stopping_criteria.jl index b735670509..68e1f6102b 100644 --- a/test/plans/test_stopping_criteria.jl +++ b/test/plans/test_stopping_criteria.jl @@ -130,6 +130,8 @@ struct DummyStoppingCriterion <: StoppingCriterion end @test swgcl(gp, gs, 1) # change 0 -> true @test endswith(Manopt.status_summary(swgcl), "reached") end + update_stopping_criterion!(swgcl2, :MinGradientChange, 1e-9) + @test swgcl2.threshold == 1e-9 end @testset "TCG stopping criteria" begin @@ -186,7 +188,6 @@ struct DummyStoppingCriterion <: StoppingCriterion end update_stopping_criterion!(swecl, :Threshold, 1e-1) @test swecl.threshold == 1e-1 end - @testset "Subgradient Norm Stopping Criterion" begin M = Euclidean(2) p = [1.0, 2.0] @@ -214,4 +215,24 @@ struct DummyStoppingCriterion <: StoppingCriterion end update_stopping_criterion!(c2, :MinSubgradNorm, 1e-8) @test c2.threshold == 1e-8 end + @testset "StopWhenCostNaN & StopWhenIterateNaN" begin + sc1 = StopWhenCostNaN() + f(M, p) = NaN + M = Euclidean(2) + p = [1.0, 2.0] + @test startswith(repr(sc1), "StopWhenCostNaN()\n") + mco = ManifoldCostObjective(f) + mp = DefaultManoptProblem(M, mco) + s = NelderMeadState(M) + @test sc1(mp, s, 1) #always returns true since `f` is always NaN + @test !sc1(mp, s, 0) # test reset + @test length(sc1.reason) == sc1.at_iteration # verify reset + + s.p .= NaN + sc2 = StopWhenIterateNaN() + @test startswith(repr(sc2), "StopWhenIterateNaN()\n") + @test sc2(mp, s, 1) #always returns true since p was now set to NaN + @test !sc2(mp, s, 0) # test reset + @test length(sc2.reason) == sc2.at_iteration # verify reset + end end diff --git a/test/runtests.jl b/test/runtests.jl index f68404b643..fbe6847d36 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -37,6 +37,7 @@ include("utils/example_tasks.jl") include("solvers/test_adaptive_regularization_with_cubics.jl") include("solvers/test_alternating_gradient.jl") include("solvers/test_augmented_lagrangian.jl") + include("solvers/test_convex_bundle_method.jl") include("solvers/test_ChambollePock.jl") include("solvers/test_conjugate_gradient.jl") include("solvers/test_difference_of_convex.jl") @@ -47,6 +48,7 @@ include("utils/example_tasks.jl") include("solvers/test_gradient_descent.jl") include("solvers/test_Levenberg_Marquardt.jl") include("solvers/test_Nelder_Mead.jl") + include("solvers/test_proximal_bundle_method.jl") include("solvers/test_quasi_Newton.jl") include("solvers/test_particle_swarm.jl") include("solvers/test_primal_dual_semismooth_Newton.jl") diff --git a/test/solvers/test_conjugate_gradient.jl b/test/solvers/test_conjugate_gradient.jl index b87179e62d..46ee8fc309 100644 --- a/test/solvers/test_conjugate_gradient.jl +++ b/test/solvers/test_conjugate_gradient.jl @@ -22,13 +22,31 @@ include("../utils/example_tasks.jl") diff = grad_2 - grad_1 dU = SteepestDirectionUpdateRule() - s1 = ConjugateGradientDescentState(M, x0, sC, s, dU, retr, vtm, zero_vector(M, x0)) + s1 = ConjugateGradientDescentState( + M, + x0; + stopping_criterion=sC, + stepsize=s, + coefficient=dU, + retraction_method=retr, + vector_transport_method=vtm, + initial_gradient=zero_vector(M, x0), + ) @test s1.coefficient(dmp, s1, 1) == 0 @test default_stepsize(M, typeof(s1)) isa ArmijoLinesearch @test Manopt.get_message(s1) == "" dU = ConjugateDescentCoefficient() - s2 = ConjugateGradientDescentState(M, x0, sC, s, dU, retr, vtm, zero_vector(M, x0)) + s2 = ConjugateGradientDescentState( + M, + x0; + stopping_criterion=sC, + stepsize=s, + coefficient=dU, + retraction_method=retr, + vector_transport_method=vtm, + initial_gradient=zero_vector(M, x0), + ) s2.X = grad_1 s2.δ = δ1 # the first case is zero @@ -38,7 +56,15 @@ include("../utils/example_tasks.jl") @test s2.coefficient(dmp, s2, 2) == dot(grad_2, grad_2) / dot(-δ2, grad_1) dU = DaiYuanCoefficient() - s3 = ConjugateGradientDescentState(M, x0, sC, s, dU, retr, vtm) + s3 = ConjugateGradientDescentState( + M, + x0; + stopping_criterion=sC, + stepsize=s, + coefficient=dU, + retraction_method=retr, + vector_transport_method=vtm, + ) s3.X = grad_1 s3.δ = δ1 # the first case is zero @@ -48,7 +74,15 @@ include("../utils/example_tasks.jl") @test s3.coefficient(dmp, s3, 2) == dot(grad_2, grad_2) / dot(δ2, grad_2 - grad_1) dU = FletcherReevesCoefficient() - s4 = ConjugateGradientDescentState(M, x0, sC, s, dU, retr, vtm) + s4 = ConjugateGradientDescentState( + M, + x0; + stopping_criterion=sC, + stepsize=s, + coefficient=dU, + retraction_method=retr, + vector_transport_method=vtm, + ) s4.X = grad_1 s4.δ = δ1 # the first case is zero @@ -58,7 +92,15 @@ include("../utils/example_tasks.jl") @test s4.coefficient(dmp, s4, 2) == dot(grad_2, grad_2) / dot(grad_1, grad_1) dU = HagerZhangCoefficient() - s5 = ConjugateGradientDescentState(M, x0, sC, s, dU, retr, vtm) + s5 = ConjugateGradientDescentState( + M, + x0; + stopping_criterion=sC, + stepsize=s, + coefficient=dU, + retraction_method=retr, + vector_transport_method=vtm, + ) s5.X = grad_1 s5.δ = δ1 # the first case is zero @@ -71,7 +113,15 @@ include("../utils/example_tasks.jl") dot(diff, grad_2) / denom - 2 * ndiffsq * dot(δ1, grad_2) / denom^2 dU = HestenesStiefelCoefficient() - s6 = ConjugateGradientDescentState(M, x0, sC, s, dU, retr, vtm) + s6 = ConjugateGradientDescentState( + M, + x0; + stopping_criterion=sC, + stepsize=s, + coefficient=dU, + retraction_method=retr, + vector_transport_method=vtm, + ) s6.X = grad_1 s6.δ = δ1 @test s6.coefficient(dmp, s6, 1) == 0.0 @@ -80,7 +130,15 @@ include("../utils/example_tasks.jl") @test s6.coefficient(dmp, s6, 2) == dot(diff, grad_2) / dot(δ1, diff) dU = LiuStoreyCoefficient() - s7 = ConjugateGradientDescentState(M, x0, sC, s, dU, retr, vtm) + s7 = ConjugateGradientDescentState( + M, + x0; + stopping_criterion=sC, + stepsize=s, + coefficient=dU, + retraction_method=retr, + vector_transport_method=vtm, + ) s7.X = grad_1 s7.δ = δ1 @test s7.coefficient(dmp, s7, 1) == 0.0 @@ -89,7 +147,15 @@ include("../utils/example_tasks.jl") @test s7.coefficient(dmp, s7, 2) == -dot(grad_2, diff) / dot(δ1, grad_1) dU = PolakRibiereCoefficient() - s8 = ConjugateGradientDescentState(M, x0, sC, s, dU, retr, vtm) + s8 = ConjugateGradientDescentState( + M, + x0; + stopping_criterion=sC, + stepsize=s, + coefficient=dU, + retraction_method=retr, + vector_transport_method=vtm, + ) s8.X = grad_1 s8.δ = δ1 @test s8.coefficient(dmp, s8, 1) == 0.0 diff --git a/test/solvers/test_convex_bundle_method.jl b/test/solvers/test_convex_bundle_method.jl new file mode 100644 index 0000000000..baa7555166 --- /dev/null +++ b/test/solvers/test_convex_bundle_method.jl @@ -0,0 +1,180 @@ +using Manopt, Manifolds, Test, QuadraticModels, RipQP, ManifoldDiff +using Manopt: convex_bundle_method_subsolver, convex_bundle_method_subsolver! +using Manopt: sectional_curvature, ζ_1, ζ_2, close_point + +@testset "The Convex Bundle Method" begin + M = Hyperbolic(4) + p = [0.0, 0.0, 0.0, 0.0, 1.0] + p0 = [0.0, 0.0, 0.0, 0.0, -1.0] + diameter = floatmax() + Ω = 0.0 + cbms = ConvexBundleMethodState( + M, + p0; + atol_λ=1e0, + diameter=diameter, + domain=(M, q) -> distance(M, q, p0) < diameter / 2 ? true : false, + k_max=Ω, + stopping_criterion=StopAfterIteration(200), + ) + @test get_iterate(cbms) == p0 + + cbms.X = [1.0, 0.0, 0.0, 0.0, 0.0] + @testset "Special Stopping Criteria" begin + sc1 = StopWhenLagrangeMultiplierLess(1e-8) + @test startswith( + repr(sc1), "StopWhenLagrangeMultiplierLess([1.0e-8]; mode=:estimate)\n" + ) + sc2 = StopWhenLagrangeMultiplierLess([1e-8, 1e-8]; mode=:both) + @test startswith( + repr(sc2), "StopWhenLagrangeMultiplierLess([1.0e-8, 1.0e-8]; mode=:both)\n" + ) + end + @testset "Allocating Subgradient" begin + f(M, q) = distance(M, q, p) + function ∂f(M, q) + if distance(M, p, q) == 0 + return zero_vector(M, q) + end + return -log(M, q, p) / max(10 * eps(Float64), distance(M, p, q)) + end + mp = DefaultManoptProblem(M, ManifoldSubgradientObjective(f, ∂f)) + X = zero_vector(M, p) + Y = get_subgradient(mp, p) + get_subgradient!(mp, X, p) + @test isapprox(M, p, X, Y) + oR = solve!(mp, cbms) + xHat = get_solver_result(oR) + # Check Fallbacks of Problem + @test get_cost(mp, p) == 0.0 + @test norm(M, p, get_subgradient(mp, p)) == 0 + @test_throws MethodError get_gradient(mp, cbms.p) + @test_throws MethodError get_proximal_map(mp, 1.0, cbms.p, 1) + bms2 = convex_bundle_method( + M, + f, + ∂f, + p0; + diameter=diameter, + domain=(M, q) -> distance(M, q, p0) < diameter / 2 ? true : false, + k_max=Ω, + stopping_criterion=StopAfterIteration(200), + return_state=true, + debug=[], + ) + p_star2 = get_solver_result(bms2) + @test get_subgradient(bms2) == -∂f(M, p_star2) + @test f(M, p_star2) <= f(M, p0) + set_iterate!(bms2, M, p) + @test get_iterate(bms2) == p + io = IOBuffer() + ds = DebugStepsize(; io=io) + # reset stepsize + bms2.stepsize(mp, bms2, 0) + bms2.stepsize(mp, bms2, 1) + ds(mp, bms2, 1) + s = String(take!(io)) + @test s == "s:1.0" + # Test warnings + dw1 = DebugWarnIfLagrangeMultiplierIncreases(:Once; tol=0.0) + @test repr(dw1) == "DebugWarnIfLagrangeMultiplierIncreases(; tol=\"0.0\")" + cbms.ξ = 101.0 + @test_logs (:warn,) dw1(mp, cbms, 1) + dw2 = DebugWarnIfLagrangeMultiplierIncreases(:Once; tol=1e1) + dw2.old_value = -101.0 + @test repr(dw2) == "DebugWarnIfLagrangeMultiplierIncreases(; tol=\"10.0\")" + cbms.ξ = -1.0 + @test_logs (:warn,) (:warn,) dw2(mp, cbms, 1) + end + @testset "Mutating Subgradient" begin + f(M, q) = distance(M, q, p) + function ∂f!(M, X, q) + d = distance(M, p, q) + if d == 0 + zero_vector!(M, X, q) + return X + end + log!(M, X, q, p) + X .*= -1 / max(10 * eps(Float64), d) + return X + end + bmom = ManifoldSubgradientObjective(f, ∂f!; evaluation=InplaceEvaluation()) + mp = DefaultManoptProblem(M, bmom) + X = zero_vector(M, p) + Y = get_subgradient(mp, p) + get_subgradient!(mp, X, p) + @test isapprox(M, p, X, Y) + sr = solve!(mp, cbms) + xHat = get_solver_result(sr) + # Check Fallbacks of Problem + @test get_cost(mp, p) == 0.0 + @test norm(M, p, get_subgradient(mp, p)) == 0 + @test_throws MethodError get_gradient(mp, cbms.p) + @test_throws MethodError get_proximal_map(mp, 1.0, cbms.p, 1) + s2 = convex_bundle_method( + M, + f, + ∂f!, + copy(p0); + diameter=diameter, + domain=(M, q) -> distance(M, q, p0) < diameter / 2 ? true : false, + k_max=Ω, + stopping_criterion=StopAfterIteration(200), + evaluation=InplaceEvaluation(), + sub_problem=convex_bundle_method_subsolver!, + return_state=true, + debug=[], + ) + p_star2 = get_solver_result(s2) + @test f(M, p_star2) <= f(M, p0) + end + @testset "Utility Functions for the Convex Bundle Method" begin + M = Sphere(2) + p = [1.0, 0.0, 0.0] + κ = 1.0 + R = π / 2 + @test sectional_curvature(M, p) ≈ κ + @test ζ_1(κ, R) ≈ 1.0 + @test -10eps() ≤ ζ_2(κ, R) ≤ 10eps() + @test distance(M, p, close_point(M, p, R)) ≤ R + cbms3 = ConvexBundleMethodState( + M, + p; + diameter=R, + domain=(M, q) -> distance(M, q, p) < R / 2 ? true : false, + stopping_criterion=StopAfterIteration(10), + ) + @test -10eps() ≤ cbms3.ϱ ≤ 10eps() + end + @testset "A simple median rum" begin + M = Sphere(2) + p1 = [1.0, 0.0, 0.0] + p2 = 1 / sqrt(2) .* [1.0, 1.0, 0.0] + p3 = 1 / sqrt(2) .* [1.0, 0.0, 1.0] + data = [p1, p2, p3] + f(M, p) = sum(1 / length(data) * distance.(Ref(M), Ref(p), data)) + function ∂f(M, p) + return sum( + 1 / length(data) * + ManifoldDiff.subgrad_distance.(Ref(M), data, Ref(p), 1; atol=1e-8), + ) + end + p0 = p1 + cbm_s = convex_bundle_method(M, f, ∂f, p0; return_state=true) + @test startswith( + repr(cbm_s), "# Solver state for `Manopt.jl`s Convex Bundle Method\n" + ) + q = get_solver_result(cbm_s) + m = median(M, data) + @test distance(M, q, m) < 1e-2 #with default params this is not very precise + # tst the other stopping criterion mode + q2 = convex_bundle_method( + M, + f, + ∂f, + p0; + stopping_criterion=StopWhenLagrangeMultiplierLess([1e-6, 1e-6]; mode=:both), + ) + @test distance(M, q2, m) < 1e-2 + end +end diff --git a/test/solvers/test_proximal_bundle_method.jl b/test/solvers/test_proximal_bundle_method.jl new file mode 100644 index 0000000000..e9352ed5ac --- /dev/null +++ b/test/solvers/test_proximal_bundle_method.jl @@ -0,0 +1,161 @@ +using Manopt, Manifolds, Test, QuadraticModels, RipQP, ManifoldDiff +import Manopt: proximal_bundle_method_subsolver, proximal_bundle_method_subsolver! + +@testset "The Proximal Bundle Method" begin + M = Hyperbolic(4) + p = [0.0, 0.0, 0.0, 0.0, 1.0] + p0 = [0.0, 0.0, 0.0, 0.0, -1.0] + pbms = ProximalBundleMethodState(M, p0; stopping_criterion=StopAfterIteration(200)) + @test get_iterate(pbms) == p0 + + pbms.X = [1.0, 0.0, 0.0, 0.0, 0.0] + @testset "Special Stopping Criteria" begin + sc1 = StopWhenLagrangeMultiplierLess(1e-8) + @test startswith( + repr(sc1), "StopWhenLagrangeMultiplierLess([1.0e-8]; mode=:estimate)\n" + ) + sc2 = StopWhenLagrangeMultiplierLess([1e-8, 1e-8]; mode=:both) + @test startswith( + repr(sc2), "StopWhenLagrangeMultiplierLess([1.0e-8, 1.0e-8]; mode=:both)\n" + ) + end + @testset "Allocating Subgradient" begin + f(M, q) = distance(M, q, p) + function ∂f(M, q) + if distance(M, p, q) == 0 + return zero_vector(M, q) + end + return -log(M, q, p) / max(10 * eps(Float64), distance(M, p, q)) + end + mp = DefaultManoptProblem(M, ManifoldSubgradientObjective(f, ∂f)) + X = zero_vector(M, p) + Y = get_subgradient(mp, p) + get_subgradient!(mp, X, p) + @test isapprox(M, p, X, Y) + oR = solve!(mp, pbms) + xHat = get_solver_result(oR) + # Check Fallbacks of Problem + @test get_cost(mp, p) == 0.0 + @test norm(M, p, get_subgradient(mp, p)) == 0 + @test_throws MethodError get_gradient(mp, pbms.p) + @test_throws MethodError get_proximal_map(mp, 1.0, pbms.p, 1) + pbms2 = proximal_bundle_method( + M, + f, + ∂f, + p0; + stopping_criterion=StopAfterIteration(200), + return_state=true, + debug=[], + ) + p_star2 = get_solver_result(pbms2) + @test get_subgradient(pbms2) == -∂f(M, p_star2) + @test f(M, p_star2) <= f(M, p0) + set_iterate!(pbms2, M, p) + @test get_iterate(pbms2) == p + # Test warnings + dw1 = DebugWarnIfLagrangeMultiplierIncreases(:Once; tol=0.0) + dw1(mp, pbms, 1) #do one normal run. + @test repr(dw1) == "DebugWarnIfLagrangeMultiplierIncreases(; tol=\"0.0\")" + pbms.ν = 101.0 + @test_logs (:warn,) dw1(mp, pbms, 2) + dw2 = DebugWarnIfLagrangeMultiplierIncreases(:Once; tol=1e1) + dw2.old_value = -101.0 + @test repr(dw2) == "DebugWarnIfLagrangeMultiplierIncreases(; tol=\"10.0\")" + pbms.ν = -1.0 + @test_logs (:warn,) (:warn,) dw2(mp, pbms, 1) + end + @testset "Mutating Subgradient" begin + f(M, q) = distance(M, q, p) + function ∂f!(M, X, q) + d = distance(M, p, q) + if d == 0 + zero_vector!(M, X, q) + return X + end + log!(M, X, q, p) + X .*= -1 / max(10 * eps(Float64), d) + return X + end + bmom = ManifoldSubgradientObjective(f, ∂f!; evaluation=InplaceEvaluation()) + mp = DefaultManoptProblem(M, bmom) + X = zero_vector(M, p) + Y = get_subgradient(mp, p) + get_subgradient!(mp, X, p) + @test isapprox(M, p, X, Y) + sr = solve!(mp, pbms) + xHat = get_solver_result(sr) + # Check Fallbacks of Problem + @test get_cost(mp, p) == 0.0 + @test norm(M, p, get_subgradient(mp, p)) == 0 + @test_throws MethodError get_gradient(mp, pbms.p) + @test_throws MethodError get_proximal_map(mp, 1.0, pbms.p, 1) + s2 = proximal_bundle_method( + M, + f, + ∂f!, + copy(p0); + stopping_criterion=StopAfterIteration(200), + evaluation=InplaceEvaluation(), + sub_state=AllocatingEvaluation(), #keep the default allocating subsolver here + return_state=true, + debug=[], + ) + p_star2 = get_solver_result(s2) + @test f(M, p_star2) <= f(M, p0) + end + @testset "A simple median run" begin + M = Sphere(2) + p1 = [1.0, 0.0, 0.0] + p2 = 1 / sqrt(2) .* [1.0, 1.0, 0.0] + p3 = 1 / sqrt(2) .* [1.0, 0.0, 1.0] + data = [p1, p2, p3] + f(M, p) = sum(1 / length(data) * distance.(Ref(M), Ref(p), data)) + function ∂f(M, p) + return sum( + 1 / length(data) * + ManifoldDiff.subgrad_distance.(Ref(M), data, Ref(p), 1; atol=1e-8), + ) + end + p0 = p1 + pbm_s = proximal_bundle_method(M, f, ∂f, p0; return_state=true) + @test startswith( + repr(pbm_s), "# Solver state for `Manopt.jl`s Proximal Bundle Method\n" + ) + q = get_solver_result(pbm_s) + # with default parameters for both median and proximal bundle, this is not very precise + m = median(M, data) + @test distance(M, q, m) < 2 * 1e-3 + # test accessors + @test get_iterate(pbm_s) == q + @test norm(M, q, get_subgradient(pbm_s)) < 1e-4 + # twst the other stopping criterion mode + q2 = proximal_bundle_method( + M, + f, + ∂f, + p0; + stopping_criterion=StopWhenLagrangeMultiplierLess([1e-8, 1e-8]; mode=:both), + ) + @test distance(M, q2, m) < 2 * 1e-3 + # Test bundle_size and inplace + p_size = copy(p0) + function ∂f!(M, X, p) + X = sum( + 1 / length(data) * + ManifoldDiff.subgrad_distance!.(Ref(M), Ref(X), data, Ref(p), 1; atol=1e-8), + ) + return X + end + proximal_bundle_method!( + M, + f, + ∂f!, + p_size; + bundle_size=2, + evaluation=InplaceEvaluation(), + stopping_criterion=StopAfterIteration(200), + sub_problem=proximal_bundle_method_subsolver!, + ) + end +end