diff --git a/Changelog.md b/Changelog.md index 46522160ab..f28ebc36cc 100644 --- a/Changelog.md +++ b/Changelog.md @@ -5,6 +5,14 @@ All notable Changes to the Julia package `Manopt.jl` will be documented in this The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.4.48] + +### Fixed + +* fixes an imprecision in the interface of `get_iterate` that sometimes led to the swarm of `particle_swarm` being returned as the iterate. +* refactor `particle_swarm` in naming and access functions to avoid this also in the future. + To access the whole swarm, one now should use `get_manopt_parameter(pss, :Population)` + ## [0.4.47] January 6, 2024 ### Fixed diff --git a/Project.toml b/Project.toml index 9b782bbdb5..9b7bae686f 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Manopt" uuid = "0fc0a36d-df90-57f3-8f93-d78a9fc72bb5" authors = ["Ronny Bergmann "] -version = "0.4.47" +version = "0.4.48" [deps] ColorSchemes = "35d6a980-a343-548e-a6ea-1d62b119f2f4" diff --git a/docs/src/plans/index.md b/docs/src/plans/index.md index 857f324b94..365c8e0861 100644 --- a/docs/src/plans/index.md +++ b/docs/src/plans/index.md @@ -29,18 +29,22 @@ The column “generic” refers to a short hand that might be used for readabil | Symbol | Used in | Description | generic | | :----------- | :------: | ;-------------------------------------------------------- | :------ | | `:active` | [`DebugWhenActive`](@ref) | activity of the debug action stored within | | -| `:Basepoint` | [`TangentSpace`]() | the point the tangent space is at | `:p` | +| `:Basepoint` | [`TangentSpace`]() | the point the tangent space is at | often `:p` | | `:Cost` | generic |the cost function (within an objective, as pass down) | | | `:Debug` | [`DebugSolverState`](@ref) | the stored `debugDictionary` | | -| `:Gradient` | generic |the gradient function (within an objective, as pass down) | | +| `:Gradient` | generic | the gradient function (within an objective, as pass down) | | | `:Iterate` | generic | the (current) iterate, similar to [`set_iterate!`](@ref), within a state | | | `:Manifold` | generic |the manifold (within a problem, as pass down) | | | `:Objective` | generic | the objective (within a problem, as pass down) | | | `:SubProblem` | generic | the sub problem (within a state, as pass down) | | | `:SubState` | generic | the sub state (within a state, as pass down) | | | `:λ` | [`ProximalDCCost`](@ref), [`ProximalDCGrad`](@ref) | set the proximal parameter within the proximal sub objective elements | | -| `:p` | generic | a certain point | | -| `:X` | generic | a certain tangent vector | | +| `:Population` | [`ParticleSwarmState`](@ref) | a certain population of points, e.g. [`particle_swarm`](@ref)s swarm | | | `:TrustRegionRadius` | [`TrustRegionsState`](@ref) | the trust region radius | `:σ` | | `:ρ`, `:u` | [`ExactPenaltyCost`](@ref), [`ExactPenaltyGrad`](@ref) | Parameters within the exact penalty objective | | | `:ρ`, `:μ`, `:λ` | [`AugmentedLagrangianCost`](@ref) and [`AugmentedLagrangianGrad`](@ref) | Parameters of the Lagrangian function | | + +Since the iterate is often stored in the states fields `s.p` one _could_ access the iterate +often also with `:p` and similarly the gradient with `:X`. +This is discouraged for both readability as well as to star more generic, and it is recommended +to use `:Iterate` and `:Gradient` instead in generic settings. \ No newline at end of file diff --git a/src/Manopt.jl b/src/Manopt.jl index 18b0a67b5c..5898fec2e6 100644 --- a/src/Manopt.jl +++ b/src/Manopt.jl @@ -320,7 +320,6 @@ export get_state, forward_operator, forward_operator!, get_objective -export set_manopt_parameter! export get_hessian, get_hessian! export ApproxHessianFiniteDifference export is_state_decorator, dispatch_state_decorator diff --git a/src/plans/plan.jl b/src/plans/plan.jl index aaaf62b3b0..c1fa227e53 100644 --- a/src/plans/plan.jl +++ b/src/plans/plan.jl @@ -14,8 +14,6 @@ status_summary(e) = "$(e)" For any `f` and a `Symbol` `e` we dispatch on its value so by default, to set some `args...` in `f` or one of uts sub elements. - - """ function set_manopt_parameter!(f, e::Symbol, args...) return set_manopt_parameter!(f, Val(e), args...) @@ -24,6 +22,19 @@ function set_manopt_parameter!(f, args...) return f end +""" + get_manopt_parameter(f, element::Symbol, args...) + +For any `f` and a `Symbol` `e` we dispatch on its value so by default, to +get some element from `f` potentially further qulalified by `args...`. + +This functions returns `nothing` if `f` does not have the property `element` +""" +function get_manopt_parameter(f, e::Symbol, args...) + return get_manopt_parameter(f, Val(e), args...) +end +get_manopt_parameter(f, args...) = nothing + include("objective.jl") include("problem.jl") include("solver_state.jl") diff --git a/src/plans/solver_state.jl b/src/plans/solver_state.jl index b807c1722f..735c319e82 100644 --- a/src/plans/solver_state.jl +++ b/src/plans/solver_state.jl @@ -190,6 +190,8 @@ end get_iterate(O::AbstractManoptSolverState) return the (last stored) iterate within [`AbstractManoptSolverState`](@ref)` `s`. +This should usually refer to a single point on the manifold the solver is working on + By default also undecorates the state beforehand. """ get_iterate(s::AbstractManoptSolverState) = _get_iterate(s, dispatch_state_decorator(s)) @@ -202,16 +204,6 @@ function _get_iterate(s::AbstractManoptSolverState, ::Val{false}) end _get_iterate(s::AbstractManoptSolverState, ::Val{true}) = get_iterate(s.state) -""" - get_manopt_parameter(ams::AbstractManoptSolverState, element::Symbol, args...) - -Obtain a certain field or semantic element from the [`AbstractManoptSolverState`](@ref) `ams`. -This function passes to `Val(element)` and specific setters should dispatch on `Val{element}`. -""" -function get_manopt_parameter(ams::AbstractManoptSolverState, e::Symbol, args...) - return get_manopt_parameter(ams, Val(e), args...) -end - _set_iterate!(s::AbstractManoptSolverState, M, p, ::Val{true}) = set_iterate!(s.state, M, p) """ @@ -570,6 +562,11 @@ function update_storage!( a.values[key] = deepcopy(get_iterate(s)) elseif key === :Gradient a.values[key] = deepcopy(get_gradient(s)) + elseif key === :Population + swarm = get_manopt_parameter(s, key) + if !isnothing(swarm) + a.values[key] = deepcopy.(swarm) + end else a.values[key] = deepcopy(getproperty(s, key)) end diff --git a/src/plans/stopping_criterion.jl b/src/plans/stopping_criterion.jl index 1c29e54b36..a254a3f71d 100644 --- a/src/plans/stopping_criterion.jl +++ b/src/plans/stopping_criterion.jl @@ -206,7 +206,7 @@ end function StopWhenChangeLess( M::AbstractManifold, ε::Float64; - storage::StoreStateAction=StoreStateAction(M; store_points=Tuple{:Iterate}), + storage::StoreStateAction=StoreStateAction(M; store_points=Tuple{:Iterate,:Population}), inverse_retraction_method::IRT=default_inverse_retraction_method(M), ) where {IRT<:AbstractInverseRetractionMethod} return StopWhenChangeLess{IRT,typeof(storage)}( @@ -215,12 +215,12 @@ function StopWhenChangeLess( end function StopWhenChangeLess( ε::Float64; - storage::StoreStateAction=StoreStateAction([:Iterate]), + storage::StoreStateAction=StoreStateAction([:Iterate, :Population]), manifold::AbstractManifold=DefaultManifold(), inverse_retraction_method::IRT=default_inverse_retraction_method(manifold), ) where {IRT<:AbstractInverseRetractionMethod} if !(manifold isa DefaultManifold) - @warn "The `manifold` keyword is deprecated, use the first positional argument `M`. This keyword for now sets `inverse_retracion_method`." + @warn "The `manifold` keyword is deprecated, use the first positional argument `M` instead." end return StopWhenChangeLess{IRT,typeof(storage)}( ε, "", storage, inverse_retraction_method, 0 diff --git a/src/solvers/particle_swarm.jl b/src/solvers/particle_swarm.jl index 0a101681ad..41a0de6d77 100644 --- a/src/solvers/particle_swarm.jl +++ b/src/solvers/particle_swarm.jl @@ -8,25 +8,30 @@ Describes a particle swarm optimizing algorithm, with # Fields -* `x` – a set of points (of type `AbstractVector{P}`) on a manifold as initial particle positions -* `velocity` – a set of tangent vectors (of type `AbstractVector{T}`) representing the velocities of the particles -* `inertia` – (`0.65`) the inertia of the particles -* `social_weight` – (`1.4`) a social weight factor -* `cognitive_weight` – (`1.4`) a cognitive weight factor -* `p_temp` – temporary storage for a point to avoid allocations during a step of the algorithm -* `social_vec` - temporary storage for a tangent vector related to `social_weight` -* `cognitive_vector` - temporary storage for a tangent vector related to `cognitive_weight` -* `stopping_criterion` – (`[`StopAfterIteration`](@ref)`(500) | `[`StopWhenChangeLess`](@ref)`(1e-4)`) +* `cognitive_weight` – (`1.4`) a cognitive weight factor +* `inertia` – (`0.65`) the inertia of the particles +* `inverse_retraction_method` - (`default_inverse_retraction_method(M, eltype(swarm))`) an inverse retraction to use. +* `retraction_method` – (`default_retraction_method(M, eltype(swarm))`) the retraction to use +* `social_weight` – (`1.4`) a social weight factor +* `stopping_criterion` – (`[`StopAfterIteration`](@ref)`(500) | `[`StopWhenChangeLess`](@ref)`(1e-4)`) a functor inheriting from [`StoppingCriterion`](@ref) indicating when to stop. -* `retraction_method` – (`default_retraction_method(M, eltype(x))`) the retraction to use -* `inverse_retraction_method` - (`default_inverse_retraction_method(M, eltype(x))`) an inverse retraction to use. -* `vector_transport_method` - (`default_vector_transport_method(M, eltype(x))`) a vector transport to use +* `vector_transport_method` - (`default_vector_transport_method(M, eltype(swarm))`) a vector transport to use +* `velocity` – a set of tangent vectors (of type `AbstractVector{T}`) representing the velocities of the particles + +# Internal and temporary fields + +* `cognitive_vector` - temporary storage for a tangent vector related to `cognitive_weight` +* `positional_best` – storing the best position ``p_i`` every single swarm participant visited +* `q` – temporary storage for a point to avoid allocations during a step of the algorithm +* `social_vec` - temporary storage for a tangent vector related to `social_weight` +* `swarm` – a set of points (of type `AbstractVector{P}`) on a manifold ``\{s_i\}_{i=1}^N`` +* `p` - storage for the best point ``p`` visited by all particles. # Constructor - ParticleSwarmState(M, x0, velocity; kawrgs...) + ParticleSwarmState(M, initial_swarm, velocity; kawrgs...) -construct a particle swarm Option for the manifold `M` starting at initial population `x0` with velocities `x0`, +construct a particle swarm solver state for the manifold `M` starting at initial population `x0` with velocities `x0`, where the manifold is used within the defaults of the other fields mentioned above, which are keyword arguments here. @@ -45,14 +50,14 @@ mutable struct ParticleSwarmState{ TInvRetraction<:AbstractInverseRetractionMethod, TVTM<:AbstractVectorTransportMethod, } <: AbstractManoptSolverState - x::TX - p::TX - g::P + swarm::TX + positional_best::TX + p::P velocity::TVelocity inertia::TParams social_weight::TParams cognitive_weight::TParams - p_temp::P + q::P social_vector::T cognitive_vector::T stop::TStopping @@ -62,15 +67,15 @@ mutable struct ParticleSwarmState{ function ParticleSwarmState( M::AbstractManifold, - x::VP, + swarm::VP, velocity::VT; inertia=0.65, social_weight=1.4, cognitive_weight=1.4, stopping_criterion::SCT=StopAfterIteration(500) | StopWhenChangeLess(1e-4), - retraction_method::RTM=default_retraction_method(M, eltype(x)), - inverse_retraction_method::IRM=default_inverse_retraction_method(M, eltype(x)), - vector_transport_method::VTM=default_vector_transport_method(M, eltype(x)), + retraction_method::RTM=default_retraction_method(M, eltype(swarm)), + inverse_retraction_method::IRM=default_inverse_retraction_method(M, eltype(swarm)), + vector_transport_method::VTM=default_vector_transport_method(M, eltype(swarm)), ) where { P, T, @@ -84,11 +89,12 @@ mutable struct ParticleSwarmState{ s = new{ P,T,VP,VT,typeof(inertia + social_weight + cognitive_weight),SCT,RTM,IRM,VTM }() - s.x = x - s.p = copy.(Ref(M), x) - s.p_temp = copy(M, first(x)) - s.social_vector = zero_vector(M, s.p_temp) - s.cognitive_vector = zero_vector(M, s.p_temp) + s.swarm = swarm + s.positional_best = copy.(Ref(M), swarm) + s.q = copy(M, first(swarm)) + s.p = copy(M, first(swarm)) + s.social_vector = zero_vector(M, s.q) + s.cognitive_vector = zero_vector(M, s.q) s.velocity = velocity s.inertia = inertia s.social_weight = social_weight @@ -123,10 +129,16 @@ end # # Accessors # -get_iterate(O::ParticleSwarmState) = O.x -function set_iterate!(O::ParticleSwarmState, p) - O.x = p - return O +get_iterate(pss::ParticleSwarmState) = pss.p +function set_iterate!(pss::ParticleSwarmState, p) + pss.p = p + return pss +end +function set_manopt_parameter!(pss::ParticleSwarmState, ::Val{:Population}, swarm) + return pss.swarm = swarm +end +function get_manopt_parameter(pss::ParticleSwarmState, ::Val{:Population}) + return pss.swarm end # # Constructors @@ -141,18 +153,21 @@ perform the particle swarm optimization algorithm (PSO), starting with an initia If no `swarm` is provided, `swarm_size` many random points are used. Note that since this method does not work in-place – these points are duplicated internally. -The aim of PSO is to find the particle position ``g`` on the `Manifold M` that solves +The aim of PSO is to find the particle position ``p`` on the `Manifold M` that solves approximately + ```math -\min_{x ∈\mathcal{M}} F(x). +\min_{p ∈\mathcal{M}} F(p). ``` -To this end, a swarm of particles is moved around the `Manifold M` in the following manner. -For every particle ``k`` we compute the new particle velocities ``v_k^{(i)}`` in every step ``i`` of the algorithm by + +To this end, a swarm ``S = \{s_1,\ldots_s_n\}`` of particles is moved around the manifold `M` in the following manner. +For every particle ``s_k^{(i)}`` we compute the new particle velocities ``X_k^{(i)}`` in every step ``i`` of the algorithm by ```math -v_k^{(i)} = ω \, \operatorname{T}_{x_k^{(i)}\gets x_k^{(i-1)}}v_k^{(i-1)} + c \, r_1 \operatorname{retr}_{x_k^{(i)}}^{-1}(p_k^{(i)}) + s \, r_2 \operatorname{retr}_{x_k^{(i)}}^{-1}(g), +begin{aligned*} + X_k^{(i)} &= ω \, \operatorname{T}_{s_k^{(i)}\gets s_k^{(i-1)}}X_k^{(i-1)} + c r_1 \operatorname{retr}_{s_k^{(i)}}^{-1}(p_k^{(i)}) + s r_2 \operatorname{retr}_{s_k^{(i)}}^{-1}(p), ``` -where ``x_k^{(i)}`` is the current particle position, ``ω`` denotes the inertia, +where ``s_k^{(i)}`` is the current particle position, ``ω`` denotes the inertia, ``c`` and ``s`` are a cognitive and a social weight, respectively, ``r_j``, ``j=1,2`` are random factors which are computed new for each particle and step, ``\operatorname{retr}^{-1}`` denotes an inverse retraction on the `Manifold` `M`, and @@ -161,26 +176,27 @@ where ``x_k^{(i)}`` is the current particle position, ``ω`` denotes the inertia Then the position of the particle is updated as ```math -x_k^{(i+1)} = \operatorname{retr}_{x_k^{(i)}}(v_k^{(i)}), +s_k^{(i+1)} = \operatorname{retr}_{s_k^{(i)}}(X_k^{(i)}), ``` -where ``\operatorname{retr}`` denotes a retraction on the `Manifold` `M`. At the end of each step for every particle, we set +where ``\operatorname{retr}`` denotes a retraction on the `Manifold` `M`. +Then we update the single every particles best entries ``p_k^{(i)}`` as ```math p_k^{(i+1)} = \begin{cases} -x_k^{(i+1)}, & \text{if } F(x_k^{(i+1)}) 0 c.reason = "The algorithm performed a step with a change ($d in the population) less than $(c.threshold).\n" diff --git a/test/solvers/test_particle_swarm.jl b/test/solvers/test_particle_swarm.jl index 94158f89d0..04d792f393 100644 --- a/test/solvers/test_particle_swarm.jl +++ b/test/solvers/test_particle_swarm.jl @@ -31,7 +31,7 @@ using Random j = argmin([f(M, y) for y in p1]) g0 = deepcopy(p1[j]) @test f(M, g) <= f(M, g0) # global did not get worse - for (p, q) in zip(o.p, p1) + for (p, q) in zip(o.positional_best, p1) @test f(M, p) <= f(M, q) # nonincreased # the cost of g is not greater than the cost of any p[i] @test f(M, g) <= f(M, p) @@ -46,16 +46,18 @@ using Random p = DefaultManoptProblem(M, ManifoldCostObjective(f)) o = ParticleSwarmState(M, zero.(p_start), X_start) # test set_iterate - set_iterate!(o, p_start) - @test sum(norm.(get_iterate(o) .- p_start)) == 0 + Manopt.set_manopt_parameter!(o, :Population, p_start) + @test sum(norm.(Manopt.get_manopt_parameter(o, :Population) .- p_start)) == 0 initialize_solver!(p, o) step_solver!(p, o, 1) - for (p, v) in zip(o.x, o.velocity) + for (p, v) in zip(o.swarm, o.velocity) # check that the new particle locations are on the manifold @test is_point(M, p, true) # check that the new velocities are tangent vectors of the original particle locations @test is_vector(M, p, v, true; atol=2e-15) end + set_iterate!(o, p_start[1]) + @test get_iterate(o) == p_start[1] end @testset "Spherical Particle Swarm" begin Random.seed!(42) diff --git a/test/utils/dummy_types.jl b/test/utils/dummy_types.jl index 6ee0ec7e55..9d5792a8a4 100644 --- a/test/utils/dummy_types.jl +++ b/test/utils/dummy_types.jl @@ -1,19 +1,23 @@ -using Manopt +using Manopt, ManifoldsBase import Manopt: get_iterate, set_manopt_parameter! -struct DummyDecoratedObjective{E,O<:AbstractManifoldObjective} <: - Manopt.AbstractDecoratedManifoldObjective{E,O} - objective::O -end -function DummyDecoratedObjective( - o::O -) where {E<:AbstractEvaluationType,O<:AbstractManifoldObjective{E}} - return DummyDecoratedObjective{E,O}(o) -end +s = @isdefined _dummy_types_includeed +if !s + _dummy_types_includeed = true + struct DummyDecoratedObjective{E,O<:AbstractManifoldObjective} <: + Manopt.AbstractDecoratedManifoldObjective{E,O} + objective::O + end + function DummyDecoratedObjective( + o::O + ) where {E<:AbstractEvaluationType,O<:AbstractManifoldObjective{E}} + return DummyDecoratedObjective{E,O}(o) + end -struct DummyStateProblem{M<:AbstractManifold} <: AbstractManoptProblem{M} end -mutable struct DummyState <: AbstractManoptSolverState - storage::Vector{Float64} + struct DummyStateProblem{M<:AbstractManifold} <: AbstractManoptProblem{M} end + mutable struct DummyState <: AbstractManoptSolverState + storage::Vector{Float64} + end + DummyState() = DummyState([]) + get_iterate(::DummyState) = NaN + set_manopt_parameter!(s::DummyState, ::Val, v) = s end -DummyState() = DummyState([]) -get_iterate(::DummyState) = NaN -set_manopt_parameter!(s::DummyState, ::Val, v) = s diff --git a/test/utils/example_tasks.jl b/test/utils/example_tasks.jl index 4e93a70dc0..7da2d87b33 100644 --- a/test/utils/example_tasks.jl +++ b/test/utils/example_tasks.jl @@ -1,13 +1,18 @@ -""" - M, f, grad_f, p0, p_star = Circle_mean_task() +s = @isdefined _example_tasks_included +if !s + _example_tasks_included = true -Create a small mean problem on the circle to test Number-based algorithms -""" -function Circle_mean_task() - M = Circle() - data = [-π / 2, π / 4, 0.0, π / 4] - p_star = 0.0 - f(M, p) = 1 / 10 * sum(distance.(Ref(M), data, Ref(p)) .^ 2) - grad_f(M, p) = 1 / 5 * sum(-log.(Ref(M), Ref(p), data)) - return M, f, grad_f, data[1], p_star + """ + M, f, grad_f, p0, p_star = Circle_mean_task() + + Create a small mean problem on the circle to test Number-based algorithms + """ + function Circle_mean_task() + M = Circle() + data = [-π / 2, π / 4, 0.0, π / 4] + p_star = 0.0 + f(M, p) = 1 / 10 * sum(distance.(Ref(M), data, Ref(p)) .^ 2) + grad_f(M, p) = 1 / 5 * sum(-log.(Ref(M), Ref(p), data)) + return M, f, grad_f, data[1], p_star + end end