From 48994417d6b385bafb291354e8f7b92dee95e608 Mon Sep 17 00:00:00 2001 From: Oskar Laverny Date: Fri, 20 Sep 2024 16:52:15 +0200 Subject: [PATCH 1/5] Formalize Kendall Tau and Spearman Rho Interfaces (#222) * Add bivariate gaussian kendall tau * add bivariate student kendall tau * Add williamsongenerator kendall tau * add subsetcopula kendall tau * Define `StatsBase.corkendall` as the outside interface for the matrix of bivariate kendall taus, and `Copulas.\tau` for the multivariate ones. * modify the docs for kendall taus * add corspearman * improve the docs * up * fix test * finish the docs --- docs/src/assets/references.bib | 19 +++++++++ docs/src/dependence_measures.md | 42 +++++++++---------- src/Copula.jl | 22 +++++++++- src/EllipticalCopulas/GaussianCopula.jl | 5 +++ src/EllipticalCopulas/TCopula.jl | 4 ++ src/Generator/WilliamsonGenerator.jl | 5 ++- src/MiscellaneousCopulas/EmpiricalCopula.jl | 3 +- src/SubsetCopula.jl | 7 ++-- .../WilliamsonFromFrailty.jl | 4 +- test/kendall_tau_notnan.jl | 4 +- 10 files changed, 82 insertions(+), 33 deletions(-) diff --git a/docs/src/assets/references.bib b/docs/src/assets/references.bib index e2d4e96a..d88d5578 100644 --- a/docs/src/assets/references.bib +++ b/docs/src/assets/references.bib @@ -845,4 +845,23 @@ @book{mai2014financial author={Mai, Jan-Frederik and Scherer, Matthias}, year={2014}, publisher={Springer} +} + +@article{fang2002meta, + title={The meta-elliptical distributions with given marginals}, + author={Fang, Hong-Bin and Fang, Kai-Tai and Kotz, Samuel}, + journal={Journal of multivariate analysis}, + volume={82}, + number={1}, + pages={1--16}, + year={2002}, + publisher={Elsevier} +} +@incollection{lindskog2003kendall, + title={Kendall’s tau for elliptical distributions}, + author={Lindskog, Filip and McNeil, Alexander and Schmock, Uwe}, + booktitle={Credit risk: Measurement, evaluation and management}, + pages={149--156}, + year={2003}, + publisher={Springer} } \ No newline at end of file diff --git a/docs/src/dependence_measures.md b/docs/src/dependence_measures.md index 5a15946a..64fe9b45 100644 --- a/docs/src/dependence_measures.md +++ b/docs/src/dependence_measures.md @@ -1,43 +1,39 @@ # Measures of dependency -Although the copula is an object that summarizes completely the dependence structure of any random vector, it is an infinite dimensional object and the interpretation of its properties can be difficult when the dimension gets high. Therefore, the literature has come up with some quantification of the dependence structure that might be used as univariate summaries, of course imperfect, of certain properties of the copula at hand. +Although Copulas summarize completely the dependence structure of corresponding random vector, it is an infinite dimensional object and the interpretation of its properties can be difficult when the dimension gets high. Therefore, the literature has come up with some quantification of the dependence structure that might be used as univariate summaries, of course imperfect, of certain properties of the copula at hand. +## Kendall's τ and Spearman's ρ: bivariate and multivariate cases -!!! note "Unfinished work" - Unfortunately these dependence measures are not yet well-specified in the package and their implementation is experimental for the moment. These functions might change in the future, in particular see https://github.com/lrnv/Copulas.jl/issues/134 for future improvements. - - -## Kendall's Tau - -> **Definition (Kendall' τ):** For a copula $C$ with a density $c$, Kendall's τ is defined as: +> **Definition (Kendall' τ):** For a copula $C$ with a density $c$, **whatever its dimension $d$**, its Kendall's τ is defined as: > >$$\tau = 4 \int C(\bm u) \, c(\bm u) \;d\bm u -1$$ -Kendall's tau can be obtained through `τ(C::Copula)`. Its value only depends on the dependence structure and not the marginals. +> **Definition (Spearman's ρ):** For a copula $C$ with a density $c$, **whatever its dimension $d$**, the Spearman's ρ is defined as: +> +> $$\rho = 12 \int C(\bm u) d\bm u -3.$$ -!!! warn "Multivariate case" - There exists several multivariate extensions of Kendall's tau. The one implemented here is the one we just defined what ever the dimension $d$, be careful as the normalization might differ from other places in the literature. +These two dependence measures make more sense in the bivariate case than in other cases, and therefore we sometimes refer to the Kendall's matrix or the Spearman's matrix for the collection of bivariate coefficients associated to a multivariate copula. We thus provide two different interfaces, one internal through `Copulas.τ(C::Copula)` and `Copulas.ρ(C::Copula)`, providing true multivariate Kendall taus and Spearman rhos, and one implementing `StatsBase.corkendall(C::Copula)` and `StatsBase.corspearman(C::Copula)` for matrices of bivariate dependence measures. +Thus, for a given copula `C`, its Kendall's tau can be obtained through `τ(C::Copula)` and its Spearman's Rho through `ρ(C::Copula)`. +In the literature however, for multivariate copulas, the matrices of bivariate Kendall taus and bivariate Spearman rhos are sometimes used, and this is these matrices that are obtained by `StatsBase.corkendall(data)` and `StatsBase.corspearman(data)` where `data::Matrix{n,d}` is a matrix of observations. The corresponding theoretical values for copulas in this package can be obtained through the `StatsBase.corkendall(C::Copula)` and `StatsBase.corspearman(C::Copula)` interface. -## Spearman's Rho -> **Definition (Spearman's ρ):** For a copula $C$ with a density $c$, the Spearman's ρ is defined as: -> -> $$\rho = 12 \int C(\bm u) d\bm u -3.$$ +!!! note "Specific values of tau and rho" + Kendall's $\tau$ and Spearman's $\rho$ have values between -1 and 1, and are -1 in case of complete anticomonotony and 1 in case of comonotony. Moreover, they are 0 in case of independence. Moreover, their values only depends on the dependence structure and not the marginals. This is why we say that they measure the 'strength' of the dependency. -Spearman's Rho can be obtained through `ρ(C::Copula)`. Its value only depends on the dependence structure and not the marginals. +Many copula estimators are based on these coefficients, see e.g., [genest2011,fredricks2007,derumigny2017](@cite). -!!! warn "Multivariate case" - There exists several multivariate extensions of Spearman's rho. The one implemented here is the one we just defined what ever the dimension $d$, be careful as the normalization might differ from other places in the literature. +A few remarks on the state of the implementation: -!!! note "Specific values of tau and rho" - Kendall's $\tau$ and Spearman's $\rho$ have values between -1 and 1, and are -1 in case of complete anticomonotony and 1 in case of comonotony. Moreover, they are 0 in case of independence. This is - why we say that they measure the 'strength' of the dependency. +* Bivariate elliptical cases use $\tau = asin(\rho) * 2 / \pi$ where $\rho$ is the spearman correlation, as soon as the radial part does not have atoms. See [fang2002meta](@cite) for historical credits and [lindskog2003kendall](@cite) for a good review. +* Many Archimedean copulas have specific formulas for their Kendall tau's, but generic ones use [mcneil2009](@cite). +* Generic copulas use directly the upper formula. +* Estimation is done for some copulas wia inversion of Kendall's tau or Spearman's rho. -!!! tip "More-that-bivariate cases" - These two dependence measures make more sense in the bivariate case than in other cases, and therefore we sometimes refer to the Kendall's matrix or the Spearman's matrix for the collection of bivariate coefficients associated to a multivariate copula. Many copula estimators are based on these coefficients, see e.g., [genest2011,fredricks2007,derumigny2017](@cite). +!!! note "Spearman's rho: work in progress" + If most of the efficient family-specific formulas for Kendall's tau are already implemented in the package, Spearman's $\rho$'s tend to leverage the generic (slow) implementation much more. If you feel like a specific method for a certain copula is missing, do not hesitate to open an issue ! ## Tail dependency diff --git a/src/Copula.jl b/src/Copula.jl index 5b4c8a36..29f8246a 100644 --- a/src/Copula.jl +++ b/src/Copula.jl @@ -36,9 +36,29 @@ function ρ(C::Copula{d}) where d end function τ(C::Copula) F(x) = Distributions.cdf(C,x) - r = Distributions.expectation(F,C) + r = Distributions.expectation(F,C; nsamples=10^4) return 4*r-1 end +function StatsBase.corkendall(C::Copula{d}) where d + # returns the matrix of bivariate kendall taus. + K = zeros(d,d) + for i in 1:d + for j in 1:d + K[i,j] = i==j ? 1 : τ(SubsetCopula(C::Copula{d},(i,j))) + end + end + return K +end +function StatsBase.corspearman(C::Copula{d}) where d + # returns the matrix of bivariate spearman rhos. + K = zeros(d,d) + for i in 1:d + for j in 1:d + K[i,j] = i==j ? 1 : ρ(SubsetCopula(C::Copula{d},(i,j))) + end + end + return K +end function measure(C::CT, u,v) where {CT<:Copula} # Computes the value of the cdf at each corner of the hypercube [u,v] diff --git a/src/EllipticalCopulas/GaussianCopula.jl b/src/EllipticalCopulas/GaussianCopula.jl index 54b81206..903c40c5 100644 --- a/src/EllipticalCopulas/GaussianCopula.jl +++ b/src/EllipticalCopulas/GaussianCopula.jl @@ -63,3 +63,8 @@ function _cdf(C::CT,u) where {CT<:GaussianCopula} lb = repeat([T(-Inf)],d) return MvNormalCDF.mvnormcdf(μ, C.Σ, lb, x)[1] end + +# Kendall tau of bivariate gaussian: +# Theorem 3.1 in Fang, Fang, & Kotz, The Meta-elliptical Distributions with Given Marginals Journal of Multivariate Analysis, Elsevier, 2002, 82, 1–16 +τ(C::GaussianCopula{2,MT}) where MT = 2*asin(C.Σ[1,2])/π + diff --git a/src/EllipticalCopulas/TCopula.jl b/src/EllipticalCopulas/TCopula.jl index cc51cb88..f35f3bf8 100644 --- a/src/EllipticalCopulas/TCopula.jl +++ b/src/EllipticalCopulas/TCopula.jl @@ -55,3 +55,7 @@ function Distributions.fit(::Type{CT},u) where {CT<:TCopula} df = N.df return TCopula(df,Σ) end + +# Kendall tau of bivariate student: +# Lindskog, F., McNeil, A., & Schmock, U. (2003). Kendall’s tau for elliptical distributions. In Credit risk: Measurement, evaluation and management (pp. 149-156). Heidelberg: Physica-Verlag HD. +τ(C::TCopula{2,MT}) where MT = 2*asin(C.Σ[1,2])/π \ No newline at end of file diff --git a/src/Generator/WilliamsonGenerator.jl b/src/Generator/WilliamsonGenerator.jl index 178579b8..a2309910 100644 --- a/src/Generator/WilliamsonGenerator.jl +++ b/src/Generator/WilliamsonGenerator.jl @@ -54,4 +54,7 @@ function williamson_dist(G::WilliamsonGenerator, d) # what about d < G.d ? Mayeb we can do some frailty stuff ? return WilliamsonTransforms.𝒲₋₁(t -> ϕ(G,t),d) end -ϕ(G::WilliamsonGenerator, t) = WilliamsonTransforms.𝒲(G.X,G.d)(t) \ No newline at end of file +ϕ(G::WilliamsonGenerator, t) = WilliamsonTransforms.𝒲(G.X,G.d)(t) + +# McNeil & Neshelova 2009 +τ(G::WilliamsonGenerator) = 4*Distributions.expectation(x -> Copulas.ϕ(G,x), Copulas.williamson_dist(G,2))-1 \ No newline at end of file diff --git a/src/MiscellaneousCopulas/EmpiricalCopula.jl b/src/MiscellaneousCopulas/EmpiricalCopula.jl index 3830d27e..7986c74c 100644 --- a/src/MiscellaneousCopulas/EmpiricalCopula.jl +++ b/src/MiscellaneousCopulas/EmpiricalCopula.jl @@ -43,4 +43,5 @@ function Distributions._rand!(rng::Distributions.AbstractRNG, C::EmpiricalCopula end function Distributions.fit(::Type{CT},u) where {CT <: EmpiricalCopula} return EmpiricalCopula(u) -end \ No newline at end of file +end +StatsBase.corkendall(C::EmpiricalCopula) = StatsBase.corkendall(C.u) \ No newline at end of file diff --git a/src/SubsetCopula.jl b/src/SubsetCopula.jl index 48f6da26..fe948468 100644 --- a/src/SubsetCopula.jl +++ b/src/SubsetCopula.jl @@ -42,11 +42,10 @@ end # A few specialized constructors: SubsetCopula(C::GaussianCopula,dims) = length(dims) == 1 ? Distributions.Uniform() : GaussianCopula(C.Σ[collect(dims),collect(dims)]) SubsetCopula(C::TCopula{d,df,MT},dims) where {d,df,MT} = length(dims) == 1 ? Distributions.Uniform() : TCopula(df, C.Σ[collect(dims),collect(dims)]) -SubsetCopula(C::ArchimedeanCopula{d,TG},dims) where {d,TG} = length(dims) == 1 ? Distributions.Uniform() : ArchimedeanCopula(length(dims), C.G) # in particular for the independence this will work. - -# We could add a few more for performance if needed: EmpiricalCopula, others... - +SubsetCopula(C::ArchimedeanCopula{d,TG},dims) where {d,TG} = length(dims) == 1 ? Distributions.Uniform() : ArchimedeanCopula(length(dims), C.G) # in particular for the independence this will work. +# Kendall tau of bivariate subsets, when the underlying copula is bivariate, should just be kendall tau of the underlying copula. +τ(C::SubsetCopula{2,CT}) where {CT<:Copula{2}} = τ(C.C) """ subsetdims(C::Copula,dims) diff --git a/src/UnivariateDistribution/WilliamsonFromFrailty.jl b/src/UnivariateDistribution/WilliamsonFromFrailty.jl index 19288d7b..053ce270 100644 --- a/src/UnivariateDistribution/WilliamsonFromFrailty.jl +++ b/src/UnivariateDistribution/WilliamsonFromFrailty.jl @@ -22,4 +22,6 @@ function Distributions.pdf(D::WilliamsonFromFrailty{TF,d}, x::Real) where {TF,d} e -> Distributions.pdf(D.frailty_dist,e/x), Distributions.Erlang(d) ) -end \ No newline at end of file +end +Base.minimum(D::WilliamsonFromFrailty{TF,d}) where {TF,d} = 0 +Base.maximum(D::WilliamsonFromFrailty{TF,d}) where {TF,d} = Inf \ No newline at end of file diff --git a/test/kendall_tau_notnan.jl b/test/kendall_tau_notnan.jl index 9486ce07..f6c7150d 100644 --- a/test/kendall_tau_notnan.jl +++ b/test/kendall_tau_notnan.jl @@ -19,7 +19,7 @@ InvGaussianCopula(4,0.05), InvGaussianCopula(3,8.), GaussianCopula([1 0.5; 0.5 1]), - # TCopula(4, [1 0.5; 0.5 1]), # this one takes a while. + TCopula(4, [1 0.5; 0.5 1]), FGMCopula(2,1), MCopula(4), WCopula(2), @@ -28,11 +28,11 @@ SurvivalCopula(ClaytonCopula(2,-0.7),(1,2)), RafteryCopula(2, 0.2), RafteryCopula(3, 0.5), + ArchimedeanCopula(2,i𝒲(LogNormal(),2)), # Others ? Yes probably others too ! ) for C in cops @test !isnan(Copulas.τ(C)) end - @test_broken Copulas.τ(ArchimedeanCopula(2,i𝒲(LogNormal(),2))) # not implemented. end \ No newline at end of file From 5aa54b51066f1e16ed2852ff9613c6d6c4d0ac16 Mon Sep 17 00:00:00 2001 From: Oskar Laverny Date: Sat, 21 Sep 2024 19:45:55 +0200 Subject: [PATCH 2/5] Fix Extreme Values Copulas (#226) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Fix A function in AsymGalambosCopula to improve readability. * refactor: Update AsymLogCopula struct with individual asymmetry parameters + Fix the A function * refactor: Simplify calculation in `A` function in AsymMixedCopula. * Fully rewrite BC2Copula parameters and methods Changed the BC2Copula parameters to 'a' and 'b'. Updated the functions to reflect the new parameters. Added functions τ and ρ for calculations. * refactor: Rename function `ρₛ` to `ρ` in CuadrasAugeCopula. * refactor(GalambosCopula.jl): Improve handling of large parameter values * refactor: Simplify calculation functions in HuslerReissCopula * refactor: Update specific A function of LogCopula * refactor: Rename parameters in MOCopula and update related functions * refactor: Improve spacing in MixedCopula calculation formula * refactor: Remove unused _pdf function in tEVCopula * refactor: Simplify _pdf function and add τ and ρ functions to EVCs --- src/ExtremeValueCopula.jl | 35 ++------- src/ExtremeValueCopulas/AsymGalambosCopula.jl | 13 +--- src/ExtremeValueCopulas/AsymLogCopula.jl | 14 ++-- src/ExtremeValueCopulas/AsymMixedCopula.jl | 6 +- src/ExtremeValueCopulas/BC2Copula.jl | 72 ++++++++----------- src/ExtremeValueCopulas/CuadrasAugeCopula.jl | 2 +- src/ExtremeValueCopulas/GalambosCopula.jl | 4 +- src/ExtremeValueCopulas/HuslerReissCopula.jl | 10 +-- src/ExtremeValueCopulas/LogCopula.jl | 7 +- src/ExtremeValueCopulas/MOCopula.jl | 57 ++++----------- src/ExtremeValueCopulas/MixedCopula.jl | 2 +- src/ExtremeValueCopulas/tEVCopula.jl | 10 --- 12 files changed, 67 insertions(+), 165 deletions(-) diff --git a/src/ExtremeValueCopula.jl b/src/ExtremeValueCopula.jl index 176fe2cd..83c46c14 100644 --- a/src/ExtremeValueCopula.jl +++ b/src/ExtremeValueCopula.jl @@ -90,14 +90,6 @@ function D_B_ℓ(C::ExtremeValueCopula, t::Vector{Float64}, B::Vector{Int}) end # Función PDF para ExtremeValueCopula usando ℓ -function _pdf(C::ExtremeValueCopula, u::AbstractArray{<:Real}) - t = -log.(u) - c = exp(-ℓ(C, t)) - D1 = D_B_ℓ(C, t, [1]) - D2 = D_B_ℓ(C, t, [2]) - D12 = D_B_ℓ(C, t, [1, 2]) - return c * (-D12 + D1 * D2) / (u[1] * u[2]) -end function Distributions._logpdf(C::ExtremeValueCopula, u::AbstractArray{<:Real}) t = -log.(u) c = exp(-ℓ(C, t)) @@ -106,26 +98,12 @@ function Distributions._logpdf(C::ExtremeValueCopula, u::AbstractArray{<:Real}) D12 = D_B_ℓ(C, t, [1, 2]) return log(c) + log(-D12 + D1 * D2) - log(u[1] * u[2]) end -# Definir la función para calcular τ -function τ(C::ExtremeValueCopula) - integrand(x) = begin - a = A(C, x) - da = dA(C, x) - return (x * (1 - x) / a) * da - end - - integrate, _ = QuadGK.quadgk(integrand, 0.0, 1.0) - return integrate -end +# Definir la función para calcular τ and ρ +# Warning: the τ function can be veeeery unstable... it is actually very hard to compute correctly. +# In some case, it simply fails by a lot. +τ(C::ExtremeValueCopula) = QuadGK.quadgk(x -> d²A(C, x) * x * (1 - x) / A(C, x), 0.0, 1.0)[1] +ρ(C::ExtremeValueCopula) = 12 * QuadGK.quadgk(x -> 1 / (1 + A(C, x))^2, 0, 1)[1] - 3 -function ρₛ(C::ExtremeValueCopula) - integrand(x) = 1 / (1 + A(C, x))^2 - - integral, _ = QuadGK.quadgk(integrand, 0, 1) - - ρs = 12 * integral - 3 - return ρs -end # Función para calcular el coeficiente de dependencia en el límite superior function λᵤ(C::ExtremeValueCopula) return 2(1 - A(C, 0.5)) @@ -150,9 +128,6 @@ function Distributions._rand!(rng::Distributions.AbstractRNG, C::ExtremeValueCop u1, u2 = rand(rng, Distributions.Uniform(0,1), 2) z = rand(rng, ExtremeDist(C)) p = probability_z(C, z) - if p < -eps() || p > eps() - p = 0 - end c = rand(rng, Distributions.Bernoulli(p)) w = 0 if c == 1 diff --git a/src/ExtremeValueCopulas/AsymGalambosCopula.jl b/src/ExtremeValueCopulas/AsymGalambosCopula.jl index 986825d0..342e6707 100644 --- a/src/ExtremeValueCopulas/AsymGalambosCopula.jl +++ b/src/ExtremeValueCopulas/AsymGalambosCopula.jl @@ -46,14 +46,7 @@ struct AsymGalambosCopula{P} <: ExtremeValueCopula{P} end function A(C::AsymGalambosCopula, t::Real) - α = C.α - θ = C.θ - - term1 = (θ[1] * t)^(-α) - term2 = (θ[2] * (1 - t))^(-α) - - inner_term = term1 + term2 - - result = 1 - inner_term^(-1 / α) - return result + x₁ = - C.α * log(C.θ[1] * t) + x₂ = - C.α * log(C.θ[2] * (1-t)) + return -expm1(-LogExpFunctions.logaddexp(x₁,x₂) / C.α) end \ No newline at end of file diff --git a/src/ExtremeValueCopulas/AsymLogCopula.jl b/src/ExtremeValueCopulas/AsymLogCopula.jl index 329f75fc..756c3a31 100644 --- a/src/ExtremeValueCopulas/AsymLogCopula.jl +++ b/src/ExtremeValueCopulas/AsymLogCopula.jl @@ -21,7 +21,8 @@ References: """ struct AsymLogCopula{P} <: ExtremeValueCopula{P} α::P # Dependence Parameter - θ::Vector{P} # Asymmetry parameters (size 2) + θ₁::P + θ₂::P function AsymLogCopula(α::P, θ::Vector{P}) where {P} if length(θ) != 2 throw(ArgumentError("The vector θ must have 2 elements for the bivariate case")) @@ -30,15 +31,8 @@ struct AsymLogCopula{P} <: ExtremeValueCopula{P} elseif !(0 <= θ[1] <= 1) || !(0 <= θ[2] <= 1) throw(ArgumentError("All parameters θ must be in the interval [0, 1]")) else - return new{P}(α, θ) + return new{P}(α, θ[1],θ[2]) end end end - -function A(C::AsymLogCopula, t::Real) - α = C.α - θ = C.θ - - A = ((θ[1]^α)*(1-t)^α + (θ[2]^α)*(t^α))^(1/α)+(θ[1]- θ[2])*t + 1 -θ[1] - return A -end \ No newline at end of file +A(C::AsymLogCopula, t::Real) = ((C.θ₁^C.α)*(1-t)^C.α + (C.θ₂^C.α)*(t^C.α))^(1/C.α)+(C.θ₁- C.θ₂)*t + 1 - C.θ₁ \ No newline at end of file diff --git a/src/ExtremeValueCopulas/AsymMixedCopula.jl b/src/ExtremeValueCopulas/AsymMixedCopula.jl index e3b524e2..1dccf385 100644 --- a/src/ExtremeValueCopulas/AsymMixedCopula.jl +++ b/src/ExtremeValueCopulas/AsymMixedCopula.jl @@ -52,8 +52,6 @@ struct AsymMixedCopula{P} <: ExtremeValueCopula{P} end function A(C::AsymMixedCopula, t::Real) - θ = C.θ - - a = θ[2]*t^3 + θ[1]*t^2-(θ[1]+θ[2])*t+1 - return a + θ₁, θ₂ = C.θ[1], C.θ[2] + return θ₂*t^3 + θ₁*t^2 - (θ₁+θ₂)*t + 1 end \ No newline at end of file diff --git a/src/ExtremeValueCopulas/BC2Copula.jl b/src/ExtremeValueCopulas/BC2Copula.jl index 9ffdb7b1..c2eb695e 100644 --- a/src/ExtremeValueCopulas/BC2Copula.jl +++ b/src/ExtremeValueCopulas/BC2Copula.jl @@ -3,75 +3,63 @@ Fields: - - θ1::Real - parameter - - θ1::Real - parameter + - a::Real - parameter + - a::Real - parameter Constructor - BC2Copula(θ1, θ2) + BC2Copula(a, b) -The bivariate BC₂ copula is parameterized by two parameters ``\\theta_{i} \\in [0,1], i=1,2``. It is an Extreme value copula with Pickands dependence function: +The bivariate BC2 copula is parameterized by two parameters ``a,b \\in [0,1]``. It is an Extreme value copula with Pickands dependence function: ```math -A(t) = \\max\\{\\theta_1 t, \\theta_2(1-t) \\} + \\max\\{(1-\\theta_1)t, (1-\\theta_2)(1-t)\\} +A(t) = \\max\\{a t, b (1-t) \\} + \\max\\{(1-a)t, (1-b)(1-t)\\} ``` References: * [mai2011bivariate](@cite) Mai, J. F., & Scherer, M. (2011). Bivariate extreme-value copulas with discrete Pickands dependence measure. Extremes, 14, 311-324. Springer, 2011. """ struct BC2Copula{P} <: ExtremeValueCopula{P} - θ1::P - θ2::P - - function BC2Copula(θ::Vararg{Real}) - if length(θ) !== 2 - throw(ArgumentError("BC2Copula requires only 2 arguments.")) - end - T = promote_type(typeof(θ[1]), typeof(θ[2])) - θ1, θ2 = T(θ[1]), T(θ[2]) - - if !(0 <= θ1 <= 1) || !(0 <= θ2 <= 1) - throw(ArgumentError("All θ parameters must be in [0,1]")) + a::P + b::P + function BC2Copula(a,b) + T = promote_type(typeof(a), typeof(b)) + if !(0 <= a <= 1) || !(0 <= b <= 1) + throw(ArgumentError("Both parameters a and b must be in [0,1]")) end - return new{T}(θ1, θ2) + return new{T}(T(a), T(b)) end end function ℓ(C::BC2Copula, t::Vector) - θ1, θ2 = C.θ1, C.θ2 + a, b = C.a, C.b t₁, t₂ = t - return max(θ1*t₁, θ2*t₂) + max((1-θ1)*t₁, (1-θ2)*t₂) + return max(a*t₁, b*t₂) + max((1-a)*t₁, (1-b)*t₂) end function A(C::BC2Copula, t::Real) - θ1, θ2 = C.θ1, C.θ2 - return max(θ1*t, θ2*(1-t)) + max((1-θ1)*t, (1-θ2)*(1-t)) + a, b = C.a, C.b + return max(a*t, b*(1-t)) + max((1-a)*t, (1-b)*(1-t)) end function dA(C::BC2Copula, t::Float64) - θ1, θ2 = C.θ1, C.θ2 - - # Conditions for the derivative of the first part - if θ1*t >= θ2*(1-t) - f1_derivative = θ1 - else - f1_derivative = -θ2 - end - - # Conditions for the derivative of the second part - if (1-θ1)*t >= (1-θ2)*(1-t) - f2_derivative = 1-θ1 - else - f2_derivative = -(1-θ2) - end - - return f1_derivative + f2_derivative + a, b = C.a, C.b + der1 = a*t >= b*(1-t) ? a : -b + der2 = (1-a)*t >= (1-b)*(1-t) ? -a : -(1-b) + return der1 + der2 end function Distributions._rand!(rng::Distributions.AbstractRNG, C::BC2Copula, u::AbstractVector{T}) where {T<:Real} - θ1, θ2 = C.θ1, C.θ2 + a, b = C.a, C.b v1, v2 = rand(rng, Distributions.Uniform(0,1), 2) - u[1] = max(v1^(1/θ1),v2^(1/(1-θ1))) - u[2] = max(v1^(1/θ2),v2^(1/(1-θ2))) + u[1] = max(v1^(1/a), v2^(1/(1-a))) + u[2] = max(v1^(1/b), v2^(1/(1-b))) return u +end + +τ(C::BC2Copula) = 1 - abs(C.a - C.b) + +function ρ(C::BC2Copula) + a,b = C.a, C.b + return 2 * (a + b + a*b + max(a,b) - 2a^2 - 2b^2) / (3 - a - b - min(a,b)) / (a + b + max(a,b)) end \ No newline at end of file diff --git a/src/ExtremeValueCopulas/CuadrasAugeCopula.jl b/src/ExtremeValueCopulas/CuadrasAugeCopula.jl index 7e8f1419..a6a16617 100644 --- a/src/ExtremeValueCopulas/CuadrasAugeCopula.jl +++ b/src/ExtremeValueCopulas/CuadrasAugeCopula.jl @@ -39,7 +39,7 @@ dA(C::CuadrasAugeCopula, t::Real) = t <= 0.5 ? -C.θ : C.θ τ(C::CuadrasAugeCopula) = C.θ/(2-C.θ) -ρₛ(C::CuadrasAugeCopula) = (3*C.θ)/(4-C.θ) +ρ(C::CuadrasAugeCopula) = (3*C.θ)/(4-C.θ) # specific ℓ específica of Cuadras-Augé Copula function ℓ(C::CuadrasAugeCopula, t::Vector) diff --git a/src/ExtremeValueCopulas/GalambosCopula.jl b/src/ExtremeValueCopulas/GalambosCopula.jl index 4e2232c1..b9169914 100644 --- a/src/ExtremeValueCopulas/GalambosCopula.jl +++ b/src/ExtremeValueCopulas/GalambosCopula.jl @@ -27,7 +27,7 @@ struct GalambosCopula{P} <: ExtremeValueCopula{P} θ::P # Copula parameter function GalambosCopula(θ) if θ > 19.5 - @warn "The parameter θ = $(θ) is large, which may lead to numerical instability in any functions. Consider regularizing the input." + @info "GalambosCopula(θ=$(θ)): θ is large, which may lead to numerical issues." end if θ < 0 throw(ArgumentError("Theta must be >= 0")) @@ -41,7 +41,7 @@ struct GalambosCopula{P} <: ExtremeValueCopula{P} end end -A(C::GalambosCopula, t::Real) = 1 - (t^(-C.θ) + (1 - t)^(-C.θ))^(-1/C.θ) +A(C::GalambosCopula, t::Real) = -expm1(-LogExpFunctions.logaddexp(-C.θ*log(t),-C.θ*log(1-t))/C.θ) # This auxiliary function helps determine if we need binary search or not in the generation of random samples function needs_binary_search(C::GalambosCopula) return C.θ > 19.5 diff --git a/src/ExtremeValueCopulas/HuslerReissCopula.jl b/src/ExtremeValueCopulas/HuslerReissCopula.jl index 30f5a5cb..9e1d3f30 100644 --- a/src/ExtremeValueCopulas/HuslerReissCopula.jl +++ b/src/ExtremeValueCopulas/HuslerReissCopula.jl @@ -50,11 +50,7 @@ function A(H::HuslerReissCopula, t::Real) θ = H.θ term1 = t * Distributions.cdf(Distributions.Normal(), θ^(-1) + 0.5 * θ * log(t / (1 - t))) term2 = (1 - t) * Distributions.cdf(Distributions.Normal(), θ^(-1) + 0.5 * θ * log((1 - t) / t)) - - a = term1 + term2 - - - return a + return term1 + term2 end function dA(H::HuslerReissCopula, t::Real) @@ -66,7 +62,5 @@ function dA(H::HuslerReissCopula, t::Real) dA_term2 = -Distributions.cdf(Distributions.Normal(), θ^(-1) + 0.5 * θ * log((1 - t) / t)) + (1 - t) * Distributions.pdf(Distributions.Normal(), θ^(-1) + 0.5 * θ * log((1 - t) / t)) * (0.5 * θ * (-1 / t - 1 / (1 - t))) - da = dA_term1 + dA_term2 - - return da + return dA_term1 + dA_term2 end \ No newline at end of file diff --git a/src/ExtremeValueCopulas/LogCopula.jl b/src/ExtremeValueCopulas/LogCopula.jl index f9483792..d2731b55 100644 --- a/src/ExtremeValueCopulas/LogCopula.jl +++ b/src/ExtremeValueCopulas/LogCopula.jl @@ -42,8 +42,5 @@ function ℓ(G::LogCopula, t::Vector) t₁, t₂ = t return (t₁^θ + t₂^θ)^(1/θ) end -# # specific A funcion of HuslerReissCopula -function A(C::LogCopula, t::Real) - θ = C.θ - return (t^θ + (1 - t)^θ)^(1/θ) -end \ No newline at end of file +# # specific A funcion of LogCopula +A(C::LogCopula, t::Real) = exp(LogExpFunctions.logaddexp(C.θ*log(t),C.θ*log(1-t))/C.θ) diff --git a/src/ExtremeValueCopulas/MOCopula.jl b/src/ExtremeValueCopulas/MOCopula.jl index 0b74ac19..3b7a8363 100644 --- a/src/ExtremeValueCopulas/MOCopula.jl +++ b/src/ExtremeValueCopulas/MOCopula.jl @@ -3,9 +3,9 @@ Fields: - - λ1::Real - parameter - - λ2::Real - parameter - - λ12::Real - parameter + - λ₁::Real - parameter + - λ₂::Real - parameter + - λ₁₂::Real - parameter Constructor @@ -21,49 +21,22 @@ References: * [mai2012simulating](@cite) Mai, J. F., & Scherer, M. (2012). Simulating copulas: stochastic models, sampling algorithms, and applications (Vol. 4). World Scientific. """ struct MOCopula{P} <: ExtremeValueCopula{P} - λ1::P - λ2::P - λ12::P - - function MOCopula(λ::Vararg{Real}) - if length(λ) !== 3 - throw(ArgumentError("MOCopula requires only 3 arguments.")) - end - - T = promote_type(typeof(λ[1]), typeof(λ[2]), typeof(λ[3])) - λ1, λ2, λ12 = T(λ[1]), T(λ[2]), T(λ[3]) - - if λ1 < 0 || λ2 < 0 || λ12 < 0 + a::P + b::P + function MOCopula(λ₁,λ₂,λ₁₂) + if λ₁ < 0 || λ₂ < 0 || λ₁₂ < 0 throw(ArgumentError("All λ parameters must be >= 0")) end - - return new{T}(λ1, λ2, λ12) + a, b = λ₁ / (λ₁ + λ₁₂), λ₂ / (λ₂ + λ₁₂) + return new{typeof(a)}(a,b) end end - -function A(C::MOCopula, t::Real) - λ1, λ2, λ12 = C.λ1, C.λ2, C.λ12 - return ((λ1 * (1 - t))/(λ1 + λ12)) + ((λ2 * t)/(λ2 + λ12)) + λ12 * max((1-t)/(λ1 + λ12), t/(λ2 + λ12)) -end - -function ℓ(C::MOCopula, t::Vector) - λ1, λ2, λ12 = C.λ1, C.λ2, C.λ12 - t₁, t₂ = t - return ((λ1 * t₂)/(λ1 + λ12)) + ((λ2 * t₁)/(λ2 + λ12)) + λ12 * max(t₂/(λ1 + λ12), t₁/(λ2 + λ12)) -end - -function _cdf(C::MOCopula, u::AbstractArray{<:Real}) - λ1, λ2, λ12 = C.λ1, C.λ2, C.λ12 - u₁, u₂ = u - return min(u₂*u₁^(1 - λ12/(λ1 + λ12)), u₁ * u₂^(1 - λ12/(λ2 + λ12))) -end - +A(C::MOCopula, t::Real) = max(t + (1-t)*C.b, (1-t)+C.a*t) +_cdf(C::MOCopula, u::AbstractArray{<:Real}) = min(u[1]^C.a * u[2], u[1] * u[2]^C.b) function Distributions._rand!(rng::Distributions.AbstractRNG, C::MOCopula, u::AbstractVector{T}) where {T<:Real} - λ1, λ2, λ12 = C.λ1, C.λ2, C.λ12 - r, s, t = rand(rng, Distributions.Uniform(0,1),3) - x = min(-log(r)/λ1, -log(t)/λ12) - y = min(-log(s)/λ2, -log(t)/λ12) - u[1] = exp(-(λ1+λ12)*x) - u[2] = exp(-(λ2+λ12)*y) + r, s, t = -log.(rand(rng,3)) # Exponentials(1) + u[1] = exp(-min(r/(1-C.a), t/C.a)) + u[2] = exp(-min(s/(1-C.b), t/C.b)) return u end +τ(C::MOCopula) = C.a*C.b/(C.a+C.b-C.a*C.b) \ No newline at end of file diff --git a/src/ExtremeValueCopulas/MixedCopula.jl b/src/ExtremeValueCopulas/MixedCopula.jl index 56dd3a44..940a25a1 100644 --- a/src/ExtremeValueCopulas/MixedCopula.jl +++ b/src/ExtremeValueCopulas/MixedCopula.jl @@ -34,4 +34,4 @@ struct MixedCopula{P} <: ExtremeValueCopula{P} end end -A(C::MixedCopula, t::Real) = C.θ*t^2 - C.θ*t + 1 \ No newline at end of file +A(C::MixedCopula, t::Real) = C.θ * t^2 - C.θ * t + 1 \ No newline at end of file diff --git a/src/ExtremeValueCopulas/tEVCopula.jl b/src/ExtremeValueCopulas/tEVCopula.jl index 84b3f56e..b1283636 100644 --- a/src/ExtremeValueCopulas/tEVCopula.jl +++ b/src/ExtremeValueCopulas/tEVCopula.jl @@ -93,16 +93,6 @@ function d²A(C::tEVCopula, t::Real) d2A = (dA_plus - dA_minus) / (2 * h) return d2A end - -# PDF function for ExtremeValueCopula using ℓ -function _pdf(C::tEVCopula, u::AbstractArray{<:Real}) - t = -log.(u) - c = exp(-ℓ(C, t)) - D1 = D_B_ℓ(C, t, [1]) - D2 = D_B_ℓ(C, t, [2]) - D12 = D_B_ℓ(C, t, [1, 2]) - return c * (-D12 + D1 * D2) / (u[1] * u[2]) -end function Distributions._logpdf(C::tEVCopula, u::AbstractArray{<:Real}) t = -log.(u) c = exp(-ℓ(C, t)) From a10edf2af40d9bb36e169d2c818a64e246882bf7 Mon Sep 17 00:00:00 2001 From: Oskar Laverny Date: Sat, 21 Sep 2024 19:49:37 +0200 Subject: [PATCH 3/5] Fix WCopula sampling issue (#227) --- src/ArchimedeanCopula.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/ArchimedeanCopula.jl b/src/ArchimedeanCopula.jl index e0532620..e0515a61 100644 --- a/src/ArchimedeanCopula.jl +++ b/src/ArchimedeanCopula.jl @@ -185,7 +185,8 @@ end function Distributions._rand!(rng::Distributions.AbstractRNG, ::ArchimedeanCopula{d,WGenerator}, x::AbstractVector{T}) where {d,T<:Real} @assert d==2 x[1] = rand(rng) - x[2] = 1-x[1] + x[2] = 1-x[1] + return x end function Distributions._rand!(rng::Distributions.AbstractRNG, ::ArchimedeanCopula{d,IndependentGenerator}, A::DenseMatrix{T}) where {T<:Real, d} Random.rand!(rng,A) From 21e139d7ffd362bc7bb7112c3756323dae3ba46c Mon Sep 17 00:00:00 2001 From: Oskar Laverny Date: Sat, 21 Sep 2024 19:50:13 +0200 Subject: [PATCH 4/5] Fix WilliamsonGenerator Kendall's tau (#230) * feat: Fix kendall tau for WilliamsonGenerator * feat: Add minimum and maximum functions to WilliamsonFromFrailty distribution From ac532bf38dfc24d25509f3ce56f8b58cd96ae169 Mon Sep 17 00:00:00 2001 From: Oskar Laverny Date: Sat, 21 Sep 2024 19:50:18 +0200 Subject: [PATCH 5/5] refactor: Clean up constructor and fix the sampling via stochastic representation for FGMCopula (#228) --- src/MiscellaneousCopulas/FGMCopula.jl | 85 ++++++++++----------------- 1 file changed, 31 insertions(+), 54 deletions(-) diff --git a/src/MiscellaneousCopulas/FGMCopula.jl b/src/MiscellaneousCopulas/FGMCopula.jl index e1f54727..a15a5bb4 100644 --- a/src/MiscellaneousCopulas/FGMCopula.jl +++ b/src/MiscellaneousCopulas/FGMCopula.jl @@ -16,22 +16,19 @@ C(\\boldsymbol{u})=\\prod_{i=1}^{d}u_i \\left[1+ \\sum_{k=2}^{d}\\sum_{1 \\leq j where `` \\bar{u}=1-u``. -More details about Farlie-Gumbel-Morgenstern (FGM) copula are found in : - - Nelsen, Roger B. An introduction to copulas. Springer, 2006. Exercise 3.38. - -We use the stochastic representation of the copula to obtain random samples. - - Blier-Wong, C., Cossette, H., & Marceau, E. (2022). Stochastic representation of FGM copulas using multivariate Bernoulli random variables. Computational Statistics & Data Analysis, 173, 107506. - It has a few special cases: - When d=2 and θ = 0, it is the IndependentCopula. +More details about Farlie-Gumbel-Morgenstern (FGM) copula are found in [nelsen2006](@cite). +We use the stochastic representation from [blier2022stochastic](@cite) to obtain random samples. + References: * [nelsen2006](@cite) Nelsen, Roger B. An introduction to copulas. Springer, 2006. +* [blier2022stochastic](@cite) Blier-Wong, C., Cossette, H., & Marceau, E. (2022). Stochastic representation of FGM copulas using multivariate Bernoulli random variables. Computational Statistics & Data Analysis, 173, 107506. """ -struct FGMCopula{d, Tθ} <: Copula{d} +struct FGMCopula{d, Tθ, Tf} <: Copula{d} θ::Tθ + fᵢ::Tf function FGMCopula(d, θ) vθ = typeof(θ)<:Vector ? θ : [θ] if all(θ .== 0) @@ -40,70 +37,50 @@ struct FGMCopula{d, Tθ} <: Copula{d} # Check first restrictions on parameters any(abs.(vθ) .> 1) && throw(ArgumentError("Each component of the parameter vector must satisfy that |θᵢ| ≤ 1")) length(vθ) != 2^d - d - 1 && throw(ArgumentError("Number of parameters (θ) must match the dimension ($d): 2ᵈ-d-1")) + # Last check: - rez = new{d, typeof(vθ)}(vθ) for epsilon in Base.product(fill([-1, 1], d)...) - if 1 + _reduce_over_combinations(rez,epsilon,prod) < 0 + if 1 + _fgm_red(vθ, epsilon) < 0 throw(ArgumentError("Invalid parameters. The parameters do not meet the condition to be an FGM copula")) end end - return rez + + # Now construct the stochastic representation: + wᵢ = [_fgm_red(vθ, 1 .- 2*Base.reverse(digits(i, base=2, pad=d))) for i in 0:(2^d-1)] + fᵢ = Distributions.DiscreteNonParametric(0:(2^d-1), (1 .+ wᵢ)/2^d) + return new{d, typeof(vθ), typeof(fᵢ)}(vθ, fᵢ) end end Base.eltype(C::FGMCopula) = eltype(C.θ) -function _reduce_over_combinations(C::FGMCopula{d,Tθ}, vector_to_combine, reducer_function) where {d,Tθ} - # This version of the reductor is non-allocative, which is much better in terms of performance. - # Moreover, since $d$ is a type parameter the loop will fold out at compile time :) - rez = zero(eltype(vector_to_combine)) - # Iterate over all possible combinations of k elements, for k = 2, 3, ..., d - i = 1 +function _fgm_red(θ, v) + # This function implements the reduction over combinations of the fgm copula. + # It is non-alocative thus performant :) + rez, d, i = zero(eltype(v)), length(v), 1 for k in 2:d - for indices in Combinatorics.combinations(1:d, k) - rez += C.θ[i] * reducer_function(vector_to_combine[indices]) - i = i+1 - end + for indices in Combinatorics.combinations(1:d, k) + rez += θ[i] * prod(v[indices]) + i = i+1 + end end return rez end -function _cdf(fgm::FGMCopula, u::Vector{T}) where {T} - return prod(u) * (1 + _reduce_over_combinations(fgm, 1 .-u, prod)) -end -function Distributions._logpdf(fgm::FGMCopula, u) - return log1p(_reduce_over_combinations(fgm, 1 .-2u, prod)) -end -function Distributions._rand!(rng::Distributions.AbstractRNG, fgm::FGMCopula{d,Tθ}, x::AbstractVector{T}) where {d,Tθ, T <: Real} - if d == 2 - u = rand(rng, T) - t = rand(rng, T) - a = 1.0 .+ fgm.θ .* (1.0-2.0*u) - b = sqrt.(a.^2 .-4.0 .*(a .-1.0).*t) - v = (2.0 .*t) ./(b .+ a) - x[1] = u - x[2] = v[1] - return x - elseif d > 2 - I = zeros(T,d) - for i in 1:d - term = _reduce_over_combinations(fgm, I, x -> (-1)^sum(x)) - I[i] = rand(rng) < (1 / 2^d) * (1 + term) - end - V0 = rand(rng, d) - V1 = rand(rng, d) - for j in 1:d - U_j = 1-sqrt(1-V0[j])*(1-V1[j])^(I[j]) - x[j] = U_j - end - return x - end +_cdf(fgm::FGMCopula, u::Vector{T}) where {T} = prod(u) * (1 + _fgm_red(fgm.θ, 1 .-u)) +Distributions._logpdf(fgm::FGMCopula, u) = log1p(_fgm_red(fgm.θ, 1 .-2u)) +function Distributions._rand!(rng::Distributions.AbstractRNG, fgm::FGMCopula{d, Tθ, Tf}, x::AbstractVector{T}) where {d,Tθ, Tf, T <: Real} + I = Base.reverse(digits(rand(rng,fgm.fᵢ), base=2, pad=d)) + V₀ = rand(rng, d) + V₁ = rand(rng, d) + x .= 1 .- sqrt.(V₀) .* (V₁ .^ I) + return x end -τ(fgm::FGMCopula) = (2*fgm.θ[1])/9 +τ(fgm::FGMCopula{2, Tθ, Tf}) where {Tθ,Tf} = (2*fgm.θ[1])/9 function τ⁻¹(::Type{FGMCopula}, τ) if !all(-2/9 <= τi <= 2/9 for τi in τ) throw(ArgumentError("For the FGM copula, tau must be in [-2/9, 2/9].")) end return max.(min.(9 * τ / 2, 1), -1) end -ρ(fgm::FGMCopula) = (1*fgm.θ)/3 # this is weird as it will return a vector ? +ρ(fgm::FGMCopula{2, Tθ, Tf}) where {Tθ,Tf} = fgm.θ[1]/3 function ρ⁻¹(::Type{FGMCopula}, ρ) if !all(-1/3 <= ρi <= 1/3 for ρi in ρ) throw(ArgumentError("For the FGM copula, rho must be in [-1/3, 1/3]."))