diff --git a/NEWS.md b/NEWS.md index 0167ac539b..631ae99942 100644 --- a/NEWS.md +++ b/NEWS.md @@ -5,6 +5,17 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.9.x] + +### Fixed + +* a bug that cause `project` for tangent vectors to return wrong results on `MultinomialDoublyStochastic` + +### Added + +* the new manifold of `MultinomialSymmetricPositiveDefinite` matrices +* `rand!` for `MultinomialDoublyStochastic` and `MultinomialSymmetric` + ## [0.9.11] – 2023-12-27 ### Fixed diff --git a/docs/make.jl b/docs/make.jl index 0caf926f5a..23261a7abc 100755 --- a/docs/make.jl +++ b/docs/make.jl @@ -118,6 +118,7 @@ makedocs(; "Multinomial doubly stochastic matrices" => "manifolds/multinomialdoublystochastic.md", "Multinomial matrices" => "manifolds/multinomial.md", "Multinomial symmetric matrices" => "manifolds/multinomialsymmetric.md", + "Multinomial symmetric positive definite matrices" => "manifolds/multinomialsymmetricpositivedefinite.md", "Oblique manifold" => "manifolds/oblique.md", "Probability simplex" => "manifolds/probabilitysimplex.md", "Positive numbers" => "manifolds/positivenumbers.md", diff --git a/docs/src/manifolds/multinomialdoublystochastic.md b/docs/src/manifolds/multinomialdoublystochastic.md index 8cbaba852c..2b8cfcddc0 100644 --- a/docs/src/manifolds/multinomialdoublystochastic.md +++ b/docs/src/manifolds/multinomialdoublystochastic.md @@ -7,3 +7,8 @@ Order = [:type, :function] ``` ## Literature + +```@bibliography +Pages = ["multinomialdoublystochastic.md"] +Canonical=false +``` diff --git a/docs/src/manifolds/multinomialsymmetric.md b/docs/src/manifolds/multinomialsymmetric.md index d895045cdb..d8de6978ff 100644 --- a/docs/src/manifolds/multinomialsymmetric.md +++ b/docs/src/manifolds/multinomialsymmetric.md @@ -7,3 +7,8 @@ Order = [:type, :function] ``` ## Literature + +```@bibliography +Pages = ["multinomialsymmetric.md"] +Canonical=false +``` diff --git a/docs/src/manifolds/multinomialsymmetricpositivedefinite.md b/docs/src/manifolds/multinomialsymmetricpositivedefinite.md new file mode 100644 index 0000000000..a4059f05e5 --- /dev/null +++ b/docs/src/manifolds/multinomialsymmetricpositivedefinite.md @@ -0,0 +1,14 @@ +# Multinomial symmetric positive definite matrices + +```@autodocs +Modules = [Manifolds] +Pages = ["manifolds/MultinomialSymmetricPoitiveDefinite.jl"] +Order = [:type, :function] +``` + +## Literature + +```@bibliography +Pages = ["multinomialsymmetricpositivedefinite.md"] +Canonical=false +``` diff --git a/docs/src/references.bib b/docs/src/references.bib index 98a24f12f6..3f2cc96041 100644 --- a/docs/src/references.bib +++ b/docs/src/references.bib @@ -275,6 +275,7 @@ @article{DouikHassibi:2019 TITLE = {Manifold Optimization Over the Set of Doubly Stochastic Matrices: A Second-Order Geometry}, JOURNAL = {IEEE Transactions on Signal Processing} } + # # E # ---------------------------------------------------------------------------------------- diff --git a/src/Manifolds.jl b/src/Manifolds.jl index 89c466e728..b9755fae35 100644 --- a/src/Manifolds.jl +++ b/src/Manifolds.jl @@ -425,6 +425,7 @@ include("manifolds/GeneralizedStiefel.jl") include("manifolds/Hyperbolic.jl") include("manifolds/MultinomialDoublyStochastic.jl") include("manifolds/MultinomialSymmetric.jl") +include("manifolds/MultinomialSymmetricPoitiveDefinite.jl") include("manifolds/PositiveNumbers.jl") include("manifolds/ProjectiveSpace.jl") include("manifolds/SkewHermitian.jl") diff --git a/src/manifolds/Multinomial.jl b/src/manifolds/Multinomial.jl index e01f2b763d..220f1885e5 100644 --- a/src/manifolds/Multinomial.jl +++ b/src/manifolds/Multinomial.jl @@ -5,8 +5,11 @@ The multinomial manifold consists of `m` column vectors, where each column is of `n` and unit norm, i.e. ````math -\mathcal{MN}(n,m) \coloneqq \bigl\{ p ∈ ℝ^{n×m}\ \big|\ p_{i,j} > 0 \text{ for all } i=1,…,n, j=1,…,m \text{ and } p^{\mathrm{T}}\mathbb{1}_m = \mathbb{1}_n\bigr\}, +\mathcal{MN}(n,m) \coloneqq \bigl\{ + p ∈ ℝ^{n×m}\ \big|\ p_{i,j} > 0 \text{ for all } i=1,…,n, j=1,…,m + \text{ and } p^{\mathrm{T}}\mathbb{1}_m = \mathbb{1}_n\bigr\}, ```` + where $\mathbb{1}_k$ is the vector of length $k$ containing ones. This yields exactly the same metric as diff --git a/src/manifolds/MultinomialDoublyStochastic.jl b/src/manifolds/MultinomialDoublyStochastic.jl index 40a4ef3e03..6d4161362e 100644 --- a/src/manifolds/MultinomialDoublyStochastic.jl +++ b/src/manifolds/MultinomialDoublyStochastic.jl @@ -3,6 +3,7 @@ A common type for manifolds that are doubly stochastic, for example by direct constraint [`MultinomialDoubleStochastic`](@ref) or by symmetry [`MultinomialSymmetric`](@ref), +or additionally by positife definiteness [`MultinomialSymmetricPoitiveDefinite`](@ref) as long as they are also modeled as [`IsIsometricEmbeddedManifold`](https://juliamanifolds.github.io/ManifoldsBase.jl/stable/decorator.html#ManifoldsBase.IsIsometricEmbeddedManifold). """ abstract type AbstractMultinomialDoublyStochastic <: AbstractDecoratorManifold{ℝ} end @@ -14,8 +15,9 @@ end @doc raw""" MultinomialDoublyStochastic{T} <: AbstractMultinomialDoublyStochastic -The set of doubly stochastic multinomial matrices consists of all $n×n$ matrices with +The set of doubly stochastic multinomial matrices consists of all ``n×n`` matrices with stochastic columns and rows, i.e. + ````math \begin{aligned} \mathcal{DP}(n) \coloneqq \bigl\{p ∈ ℝ^{n×n}\ \big|\ &p_{i,j} > 0 \text{ for all } i=1,…,n, j=1,…,m,\\ @@ -24,7 +26,7 @@ stochastic columns and rows, i.e. \end{aligned} ```` -where $\mathbf{1}_n$ is the vector of length $n$ containing ones. +where ``\mathbf{1}_n`` is the vector of length ``n`` containing ones. The tangent space can be written as @@ -35,7 +37,7 @@ X\mathbf{1}_n = X^{\mathrm{T}}\mathbf{1}_n = \mathbf{0}_n \bigr\}, ```` -where $\mathbf{0}_n$ is the vector of length $n$ containing zeros. +where ``\mathbf{0}_n`` is the vector of length ``n`` containing zeros. More details can be found in Section III [DouikHassibi:2019](@cite). @@ -43,7 +45,7 @@ More details can be found in Section III [DouikHassibi:2019](@cite). MultinomialDoubleStochastic(n::Int; parameter::Symbol=:type) -Generate the manifold of matrices $\mathbb R^{n×n}$ that are doubly stochastic and symmetric. +Generate the manifold of matrices ``\mathbb R^{n×n}`` that are doubly stochastic and symmetric. """ struct MultinomialDoubleStochastic{T} <: AbstractMultinomialDoublyStochastic size::T @@ -60,21 +62,12 @@ end Checks whether `p` is a valid point on the [`MultinomialDoubleStochastic`](@ref)`(n)` `M`, i.e. is a matrix with positive entries whose rows and columns sum to one. """ -function check_point( - M::MultinomialDoubleStochastic, - p::T; - atol::Real=sqrt(prod(representation_size(M))) * eps(real(float(number_eltype(T)))), - kwargs..., -) where {T} +function check_point(M::MultinomialDoubleStochastic, p::T; kwargs...) where {T} n = get_parameter(M.size)[1] - r = sum(p, dims=2) - if !isapprox(norm(r - ones(n, 1)), 0.0; atol=atol, kwargs...) - return DomainError( - r, - "The point $(p) does not lie on $M, since its rows do not sum up to one.", - ) - end - return nothing + s = check_point(MultinomialMatrices(n, n), p; kwargs...) + isnothing(s) && return s + s2 = check_point(MultinomialMatrices(n, n), p'; kwargs...) + return s2 end @doc raw""" check_vector(M::MultinomialDoubleStochastic p, X; kwargs...) @@ -83,21 +76,11 @@ Checks whether `X` is a valid tangent vector to `p` on the [`MultinomialDoubleSt This means, that `p` is valid, that `X` is of correct dimension and sums to zero along any column or row. """ -function check_vector( - M::MultinomialDoubleStochastic, - p, - X::T; - atol::Real=sqrt(prod(representation_size(M))) * eps(real(float(number_eltype(T)))), - kwargs..., -) where {T} - r = sum(X, dims=2) # check for stochastic rows - if !isapprox(norm(r), 0.0; atol=atol, kwargs...) - return DomainError( - r, - "The matrix $(X) is not a tangent vector to $(p) on $(M), since its rows do not sum up to zero.", - ) - end - return nothing +function check_vector(M::MultinomialDoubleStochastic, p, X::T; kwargs...) where {T} + s = check_vector(MultinomialMatrices(n, n), p, X; kwargs...) + isnothing(s) && return s + s2 = check_vector(MultinomialMatrices(n, n), p, X'; kwargs...) + return s end function get_embedding(::MultinomialDoubleStochastic{TypeParameter{Tuple{n}}}) where {n} @@ -134,26 +117,30 @@ end Project `Y` onto the tangent space at `p` on the [`MultinomialDoubleStochastic`](@ref) `M`, return the result in `X`. The formula reads + ````math \operatorname{proj}_p(Y) = Y - (α\mathbf{1}_n^{\mathrm{T}} + \mathbf{1}_nβ^{\mathrm{T}}) ⊙ p, ```` -where $⊙$ denotes the Hadamard or elementwise product and $\mathbb{1}_n$ is the vector of length $n$ containing ones. -The two vectors $α,β ∈ ℝ^{n×n}$ are computed as a solution (typically using the left pseudo inverse) of + +where ``⊙`` denotes the Hadamard or elementwise product and ``\mathbb{1}_n`` is the vector of length ``n`` containing ones. +The two vectors ``α,β ∈ ℝ^{n×n}`` are computed as a solution (typically using the left pseudo inverse) of + ````math \begin{pmatrix} I_n & p\\p^{\mathrm{T}} & I_n \end{pmatrix} \begin{pmatrix} α\\ β\end{pmatrix} = \begin{pmatrix} Y\mathbf{1}\\Y^{\mathrm{T}}\mathbf{1}\end{pmatrix}, ```` -where $I_n$ is the $n×n$ unit matrix and $\mathbf{1}_n$ is the vector of length $n$ containing ones. + +where ``I_n`` is the ``n×n`` unit matrix and ``\mathbf{1}_n`` is the vector of length ``n`` containing ones. """ project(::MultinomialDoubleStochastic, ::Any, ::Any) function project!(M::MultinomialDoubleStochastic, X, p, Y) n = get_parameter(M.size)[1] - ζ = [I p; p I] \ [sum(Y, dims=2); sum(Y, dims=1)'] # Formula (25) from 1802.02628 - return X .= Y .- (repeat(ζ[1:n], 1, 3) .+ repeat(ζ[(n + 1):end]', 3, 1)) .* p + ζ = [I p; p' I] \ [sum(Y, dims=2); sum(Y, dims=1)'] # Formula (25) from 1802.02628 + return X .= Y .- (repeat(ζ[1:n], 1, n) .+ repeat(ζ[(n + 1):end]', n, 1)) .* p end @doc raw""" @@ -206,11 +193,46 @@ function representation_size(M::MultinomialDoubleStochastic) return (n, n) end +@doc raw""" + rand(::MultinomialDoubleStochastic; vector_at=nothing, σ::Real=1.0, kwargs...) + +Generate random points on the [`MultinomialDoubleStochastic`](@ref) manifold +or tangent vectors at the point `vector_at` if that is not `nothing`. + +Let ``n×n`` denote the matrix dimension of the [`MultinomialDoubleStochastic`](@ref). + +When `vector_at` is nothing, this is done by generating a random matrix`rand(n,n)` +with positive entries and projecting it onto the manifold. The `kwargs...` are +passed to this projection. + +When `vector_at` is not `nothing`, a random matrix in the ambient space is generated +and projected onto the tangent space +""" +rand(::MultinomialDoubleStochastic; σ::Real=1.0) + +function Random.rand!( + rng::AbstractRNG, + M::MultinomialDoubleStochastic, + pX; + vector_at=nothing, + σ::Real=one(real(eltype(pX))), + kwargs..., +) + rand!(rng, pX) + pX .*= σ + if vector_at === nothing + project!(M, pX, pX; kwargs...) + else + project!(M, pX, vector_at, pX) + end + return pX +end + @doc raw""" retract(M::MultinomialDoubleStochastic, p, X, ::ProjectionRetraction) -compute a projection based retraction by projecting $p\odot\exp(X⨸p)$ back onto the manifold, -where $⊙,⨸$ are elementwise multiplication and division, respectively. Similarly, $\exp$ +compute a projection based retraction by projecting ``p\odot\exp(X⨸p)`` back onto the manifold, +where ``⊙,⨸`` are elementwise multiplication and division, respectively. Similarly, ``\exp`` refers to the elementwise exponentiation. """ retract(::MultinomialDoubleStochastic, ::Any, ::Any, ::ProjectionRetraction) diff --git a/src/manifolds/MultinomialSymmetric.jl b/src/manifolds/MultinomialSymmetric.jl index ae89797cd4..e59af604c0 100644 --- a/src/manifolds/MultinomialSymmetric.jl +++ b/src/manifolds/MultinomialSymmetric.jl @@ -1,7 +1,7 @@ @doc raw""" - MultinomialSymmetric{T} <: AbstractMultinomialDoublyStochastic{N} + MultinomialSymmetric{T} <: AbstractMultinomialDoublyStochastic -The multinomial symmetric matrices manifold consists of all symmetric $n×n$ matrices with +The multinomial symmetric matrices manifold consists of all symmetric ``n×n`` matrices with positive entries such that each column sums to one, i.e. ````math @@ -13,7 +13,7 @@ positive entries such that each column sums to one, i.e. \end{aligned} ```` -where $\mathbf{1}_n$ is the vector of length $n$ containing ones. +where ``\mathbf{1}_n`` is the vector of length ``n`` containing ones. It is modeled as [`IsIsometricEmbeddedManifold`](https://juliamanifolds.github.io/ManifoldsBase.jl/stable/decorator.html#ManifoldsBase.IsIsometricEmbeddedManifold). via the [`AbstractMultinomialDoublyStochastic`](@ref) type, since it shares a few functions @@ -29,7 +29,7 @@ X\mathbf{1}_n = \mathbf{0}_n \bigr\}, ```` -where $\mathbf{0}_n$ is the vector of length $n$ containing zeros. +where ``\mathbf{0}_n`` is the vector of length ``n`` containing zeros. More details can be found in Section IV [DouikHassibi:2019](@cite). @@ -37,7 +37,7 @@ More details can be found in Section IV [DouikHassibi:2019](@cite). MultinomialSymmetric(n) -Generate the manifold of matrices $\mathbb R^{n×n}$ that are doubly stochastic and symmetric. +Generate the manifold of matrices ``\mathbb R^{n×n}`` that are doubly stochastic and symmetric. """ struct MultinomialSymmetric{T} <: AbstractMultinomialDoublyStochastic size::T @@ -56,22 +56,10 @@ i.e. is a symmetric matrix with positive entries whose rows sum to one. """ function check_point(M::MultinomialSymmetric, p; kwargs...) n = get_parameter(M.size)[1] - s = check_point(SymmetricMatrices(n, ℝ), p) + s = check_point(SymmetricMatrices(n, ℝ), p; kwargs...) isnothing(s) && return s - r = sum(p, dims=2) - if !isapprox(r, ones(n, 1); kwargs...) - return DomainError( - r, - "The point $(p) does not lie on $M, since its rows do not sum up to one.", - ) - end - if !(minimum(p) > 0) || !(maximum(p) < 1) - return DomainError( - minimum(p), - "The point $(p) does not lie on $M, since at least one of its entries is nonpositive.", - ) - end - return nothing + s2 = check_point(MultinomialMatrices(n, n), p; kwargs...) + return s2 end @doc raw""" check_vector(M::MultinomialSymmetric p, X; kwargs...) @@ -84,14 +72,8 @@ function check_vector(M::MultinomialSymmetric, p, X; kwargs...) n = get_parameter(M.size)[1] s = check_vector(SymmetricMatrices(n, ℝ), p, X; kwargs...) isnothing(s) && return s - r = sum(X, dims=2) # due to symmetry, we only have to check columns - if !isapprox(r, zeros(n); kwargs...) - return DomainError( - r, - "The matrix $(X) is not a tangent vector to $(p) on $(M), since its columns/rows do not sum up to zero.", - ) - end - return nothing + s2 = check_vector(MultinomialMatrices(n, n), p, X) + return s2 end embed!(::MultinomialSymmetric, q, p) = copyto!(q, p) @@ -130,12 +112,14 @@ end project(M::MultinomialSymmetric, p, Y) Project `Y` onto the tangent space at `p` on the [`MultinomialSymmetric`](@ref) `M`, return the result in `X`. -The formula reads + +The formula from [DouikHassibi:2019](@cite), Sec. VI reads + ````math \operatorname{proj}_p(Y) = Y - (α\mathbf{1}_n^{\mathrm{T}} + \mathbf{1}_n α^{\mathrm{T}}) ⊙ p, ```` -where $⊙$ denotes the Hadamard or elementwise product and $\mathbb{1}_n$ is the vector of length $n$ containing ones. -The two vector $α ∈ ℝ^{n×n}$ is given by solving +where ``⊙`` denotes the Hadamard or elementwise product and ``\mathbb{1}_n`` is the vector of length ``n`` containing ones. +The two vector ``α ∈ ℝ^{n×n}`` is given by solving ````math (I_n+p)α = Y\mathbf{1}, ```` @@ -148,6 +132,41 @@ function project!(::MultinomialSymmetric, X, p, Y) return X .= Y .- (repeat(α, 1, 3) .+ repeat(α', 3, 1)) .* p end +@doc raw""" + rand(::MultinomialSymmetric; vector_at=nothing, σ::Real=1.0, kwargs...) + +Generate random points on the [`MultinomialSymmetric`](@ref) manifold +or tangent vectors at the point `vector_at` if that is not `nothing`. + +Let ``n×n`` denote the matrix dimension of the [`MultinomialSymmetric`](@ref). + +When `vector_at` is nothing, this is done by generating a random matrix`rand(n,n)` +with positive entries and projecting it onto the manifold. The `kwargs...` are +passed to this projection. + +When `vector_at` is not `nothing`, a random matrix in the ambient space is generated +and projected onto the tangent space +""" +rand(::MultinomialSymmetric; σ::Real=1.0) + +function Random.rand!( + rng::AbstractRNG, + M::MultinomialSymmetric, + pX; + vector_at=nothing, + σ::Real=one(real(eltype(pX))), + kwargs..., +) + rand!(rng, pX) + pX .*= σ + if vector_at === nothing + project!(M, pX, pX; kwargs...) + else + project!(M, pX, vector_at, pX) + end + return pX +end + function representation_size(M::MultinomialSymmetric) n = get_parameter(M.size)[1] return (n, n) @@ -156,8 +175,8 @@ end @doc raw""" retract(M::MultinomialSymmetric, p, X, ::ProjectionRetraction) -compute a projection based retraction by projecting $p\odot\exp(X⨸p)$ back onto the manifold, -where $⊙,⨸$ are elementwise multiplication and division, respectively. Similarly, $\exp$ +compute a projection based retraction by projecting ``p\odot\exp(X⨸p)`` back onto the manifold, +where ``⊙,⨸`` are elementwise multiplication and division, respectively. Similarly, ``\exp`` refers to the elementwise exponentiation. """ retract(::MultinomialSymmetric, ::Any, ::Any, ::ProjectionRetraction) diff --git a/src/manifolds/MultinomialSymmetricPoitiveDefinite.jl b/src/manifolds/MultinomialSymmetricPoitiveDefinite.jl new file mode 100644 index 0000000000..5e3bacfedf --- /dev/null +++ b/src/manifolds/MultinomialSymmetricPoitiveDefinite.jl @@ -0,0 +1,52 @@ +@doc raw""" + MultinomialSymmetricPoitiveDefinite <: AbstractMultinomialDoublyStochastic + +The symmetric positive definite multinomial matrices manifold consists of all +symmetric ``n×n`` matrices with positive eigenvalues, and +positive entries such that each column sums to one, i.e. + +````math +\begin{aligned} +\mathcal{SP}^+(n) \coloneqq \bigl\{ + p ∈ ℝ^{n×n}\ \big|\ &p_{i,j} > 0 \text{ for all } i=1,…,n, j=1,…,m,\\ +& p^\mathrm{T} = p,\\ +& p\mathbf{1}_n = \mathbf{1}_n\\ +a^\mathrm{T}pa > 0 \text{ for all } a ∈ ℝ^{n}\backslash\{\mathbf{0}_n\} +\bigr\}, +\end{aligned} +```` + +where ``\mathbf{1}_n`` and ``\mathbr{0}_n`` are the vectors of length ``n`` +containing ones and zeros, respectively. + +# Constructor + + MultinomialSymmetricPositiveDefinite(n) + +Generate the manifold of matrices ``\mathbb R^{n×n}`` that are doubly stochastic and symmetric. + +""" +struct MultinomialSymmetricPositiveDefinite{T} <: AbstractMultinomialDoublyStochastic + size::T +end + +function MultinomialSymmetricPositiveDefinite(n::Int; parameter::Symbol=:type) + size = wrap_type_parameter(parameter, (n,)) + return MultinomialSymmetricPositiveDefinite{typeof(size)}(size) +end + +function check_point(M::MultinomialSymmetricPositiveDefinite, p; kwargs...) + n = get_parameter(M.size)[1] + s = check_point(SymmetricPositiveDefinite(n), p; kwargs...) + isnothing(s) && return s + s2 = check_point(MultinomialMatrices(n, n), p; kwargs...) + return s2 +end + +function check_vector(M::MultinomialSymmetricPositiveDefinite, p, X; kwargs...) + n = get_parameter(M.size)[1] + s = check_vector(SymmetricPositiveDefinite(n), p, X; kwargs...) + isnothing(s) && return s + s2 = check_vector(MultinomialMatrices(n, n), p, X) + return s2 +end