diff --git a/src/Distances.jl b/src/Distances.jl index 695792c..f45daae 100644 --- a/src/Distances.jl +++ b/src/Distances.jl @@ -58,7 +58,6 @@ export rogerstanimoto, chebyshev, minkowski, - mahalanobis, hamming, cosine_dist, diff --git a/src/common.jl b/src/common.jl index 7e63dae..f3cb6a1 100644 --- a/src/common.jl +++ b/src/common.jl @@ -64,7 +64,7 @@ function get_colwise_dims(d::Int, r::AbstractArray, a::AbstractVector, b::Abstra end function get_colwise_dims(d::Int, r::AbstractArray, a::AbstractMatrix, b::AbstractVector) - size(a, 1) == length(b) == d + size(a, 1) == length(b) == d || throw(DimensionMismatch("Incorrect vector dimensions.")) length(r) == size(a, 2) || throw(DimensionMismatch("Incorrect size of r.")) return size(a) @@ -109,10 +109,10 @@ function sumsq_percol(a::AbstractMatrix{T}) where {T} return r end -function wsumsq_percol(w::AbstractArray{T1}, a::AbstractMatrix{T2}) where {T1,T2} +function wsumsq_percol(w::AbstractArray{T1}, a::AbstractMatrix{T2}) where {T1, T2} m = size(a, 1) n = size(a, 2) - T = typeof(one(T1)*one(T2)) + T = typeof(one(T1) * one(T2)) r = Vector{T}(n) for j = 1:n aj = view(a, :, j) @@ -126,16 +126,16 @@ function wsumsq_percol(w::AbstractArray{T1}, a::AbstractMatrix{T2}) where {T1,T2 end function dot_percol!(r::AbstractArray, a::AbstractMatrix, b::AbstractMatrix) - m = size(a,1) - n = size(a,2) - size(b) == (m,n) && length(r) == n || + m = size(a, 1) + n = size(a, 2) + size(b) == (m, n) && length(r) == n || throw(DimensionMismatch("Inconsistent array dimensions.")) for j = 1:n - aj = view(a,:,j) - bj = view(b,:,j) + aj = view(a, :, j) + bj = view(b, :, j) r[j] = dot(aj, bj) end return r end -dot_percol(a::AbstractMatrix, b::AbstractMatrix) = dot_percol!(Vector{Float64}(size(a,2)), a, b) +dot_percol(a::AbstractMatrix, b::AbstractMatrix) = dot_percol!(Vector{Float64}(size(a, 2)), a, b) diff --git a/src/generic.jl b/src/generic.jl index d5ab477..20fbd66 100644 --- a/src/generic.jl +++ b/src/generic.jl @@ -32,7 +32,7 @@ result_type(::PreMetric, ::AbstractArray, ::AbstractArray) = Float64 function colwise!(r::AbstractArray, metric::PreMetric, a::AbstractVector, b::AbstractMatrix) n = size(b, 2) length(r) == n || throw(DimensionMismatch("Incorrect size of r.")) - for j = 1 : n + for j = 1:n @inbounds r[j] = evaluate(metric, a, view(b, :, j)) end r @@ -41,7 +41,7 @@ end function colwise!(r::AbstractArray, metric::PreMetric, a::AbstractMatrix, b::AbstractVector) n = size(a, 2) length(r) == n || throw(DimensionMismatch("Incorrect size of r.")) - for j = 1 : n + for j = 1:n @inbounds r[j] = evaluate(metric, view(a, :, j), b) end r @@ -50,7 +50,7 @@ end function colwise!(r::AbstractArray, metric::PreMetric, a::AbstractMatrix, b::AbstractMatrix) n = get_common_ncols(a, b) length(r) == n || throw(DimensionMismatch("Incorrect size of r.")) - for j = 1 : n + for j = 1:n @inbounds r[j] = evaluate(metric, view(a, :, j), view(b, :, j)) end r @@ -85,10 +85,10 @@ function pairwise!(r::AbstractMatrix, metric::PreMetric, a::AbstractMatrix, b::A na = size(a, 2) nb = size(b, 2) size(r) == (na, nb) || throw(DimensionMismatch("Incorrect size of r.")) - for j = 1 : size(b, 2) - bj = view(b,:,j) - for i = 1 : size(a, 2) - @inbounds r[i,j] = evaluate(metric, view(a,:,i), bj) + for j = 1:size(b, 2) + bj = view(b, :, j) + for i = 1:size(a, 2) + @inbounds r[i, j] = evaluate(metric, view(a, :, i), bj) end end r @@ -101,14 +101,14 @@ end function pairwise!(r::AbstractMatrix, metric::SemiMetric, a::AbstractMatrix) n = size(a, 2) size(r) == (n, n) || throw(DimensionMismatch("Incorrect size of r.")) - for j = 1 : n - aj = view(a,:,j) - for i = j+1 : n - @inbounds r[i,j] = evaluate(metric, view(a,:,i), aj) + for j = 1:n + aj = view(a, :, j) + for i = (j + 1):n + @inbounds r[i, j] = evaluate(metric, view(a, :, i), aj) end - @inbounds r[j,j] = 0 - for i = 1 : j-1 - @inbounds r[i,j] = r[j,i] # leveraging the symmetry of SemiMetric + @inbounds r[j, j] = 0 + for i = 1:(j - 1) + @inbounds r[i, j] = r[j, i] # leveraging the symmetry of SemiMetric end end r diff --git a/src/mahalanobis.jl b/src/mahalanobis.jl index 3284ffb..1080c01 100644 --- a/src/mahalanobis.jl +++ b/src/mahalanobis.jl @@ -13,7 +13,7 @@ result_type(::SqMahalanobis{T}, ::AbstractArray, ::AbstractArray) where {T} = T # SqMahalanobis -function evaluate(dist::SqMahalanobis{T}, a::AbstractVector, b::AbstractVector) where {T <: AbstractFloat} +function evaluate(dist::SqMahalanobis{T}, a::AbstractVector, b::AbstractVector) where {T <: Real} if length(a) != length(b) throw(DimensionMismatch("first array has length $(length(a)) which does not match the length of the second, $(length(b)).")) end @@ -25,14 +25,14 @@ end sqmahalanobis(a::AbstractVector, b::AbstractVector, Q::AbstractMatrix) = evaluate(SqMahalanobis(Q), a, b) -function colwise!(r::AbstractArray, dist::SqMahalanobis{T}, a::AbstractMatrix, b::AbstractMatrix) where {T <: AbstractFloat} +function colwise!(r::AbstractArray, dist::SqMahalanobis{T}, a::AbstractMatrix, b::AbstractMatrix) where {T <: Real} Q = dist.qmat m, n = get_colwise_dims(size(Q, 1), r, a, b) z = a - b dot_percol!(r, Q * z, z) end -function colwise!(r::AbstractArray, dist::SqMahalanobis{T}, a::AbstractVector, b::AbstractMatrix) where {T <: AbstractFloat} +function colwise!(r::AbstractArray, dist::SqMahalanobis{T}, a::AbstractVector, b::AbstractMatrix) where {T <: Real} Q = dist.qmat m, n = get_colwise_dims(size(Q, 1), r, a, b) z = a .- b @@ -40,7 +40,7 @@ function colwise!(r::AbstractArray, dist::SqMahalanobis{T}, a::AbstractVector, b dot_percol!(r, Q * z, z) end -function pairwise!(r::AbstractMatrix, dist::SqMahalanobis{T}, a::AbstractMatrix, b::AbstractMatrix) where {T <: AbstractFloat} +function pairwise!(r::AbstractMatrix, dist::SqMahalanobis{T}, a::AbstractMatrix, b::AbstractMatrix) where {T <: Real} Q = dist.qmat m, na, nb = get_pairwise_dims(size(Q, 1), r, a, b) @@ -50,15 +50,15 @@ function pairwise!(r::AbstractMatrix, dist::SqMahalanobis{T}, a::AbstractMatrix, sb2 = dot_percol(b, Qb) At_mul_B!(r, a, Qb) - for j = 1 : nb - @simd for i = 1 : na - @inbounds r[i,j] = sa2[i] + sb2[j] - 2 * r[i,j] + for j = 1:nb + @simd for i = 1:na + @inbounds r[i, j] = sa2[i] + sb2[j] - 2 * r[i, j] end end r end -function pairwise!(r::AbstractMatrix, dist::SqMahalanobis{T}, a::AbstractMatrix) where {T <: AbstractFloat} +function pairwise!(r::AbstractMatrix, dist::SqMahalanobis{T}, a::AbstractMatrix) where {T <: Real} Q = dist.qmat m, n = get_pairwise_dims(size(Q, 1), r, a) @@ -66,13 +66,13 @@ function pairwise!(r::AbstractMatrix, dist::SqMahalanobis{T}, a::AbstractMatrix) sa2 = dot_percol(a, Qa) At_mul_B!(r, a, Qa) - for j = 1 : n - for i = 1 : j-1 - @inbounds r[i,j] = r[j,i] + for j = 1:n + for i = 1:(j - 1) + @inbounds r[i, j] = r[j, i] end - r[j,j] = 0 - for i = j+1 : n - @inbounds r[i,j] = sa2[i] + sa2[j] - 2 * r[i,j] + r[j, j] = 0 + for i = (j + 1):n + @inbounds r[i, j] = sa2[i] + sa2[j] - 2 * r[i, j] end end r @@ -81,24 +81,24 @@ end # Mahalanobis -function evaluate(dist::Mahalanobis{T}, a::AbstractVector, b::AbstractVector) where {T <: AbstractFloat} +function evaluate(dist::Mahalanobis{T}, a::AbstractVector, b::AbstractVector) where {T <: Real} sqrt(evaluate(SqMahalanobis(dist.qmat), a, b)) end mahalanobis(a::AbstractVector, b::AbstractVector, Q::AbstractMatrix) = evaluate(Mahalanobis(Q), a, b) -function colwise!(r::AbstractArray, dist::Mahalanobis{T}, a::AbstractMatrix, b::AbstractMatrix) where {T <: AbstractFloat} +function colwise!(r::AbstractArray, dist::Mahalanobis{T}, a::AbstractMatrix, b::AbstractMatrix) where {T <: Real} sqrt!(colwise!(r, SqMahalanobis(dist.qmat), a, b)) end -function colwise!(r::AbstractArray, dist::Mahalanobis{T}, a::AbstractVector, b::AbstractMatrix) where {T <: AbstractFloat} +function colwise!(r::AbstractArray, dist::Mahalanobis{T}, a::AbstractVector, b::AbstractMatrix) where {T <: Real} sqrt!(colwise!(r, SqMahalanobis(dist.qmat), a, b)) end -function pairwise!(r::AbstractMatrix, dist::Mahalanobis{T}, a::AbstractMatrix, b::AbstractMatrix) where {T <: AbstractFloat} +function pairwise!(r::AbstractMatrix, dist::Mahalanobis{T}, a::AbstractMatrix, b::AbstractMatrix) where {T <: Real} sqrt!(pairwise!(r, SqMahalanobis(dist.qmat), a, b)) end -function pairwise!(r::AbstractMatrix, dist::Mahalanobis{T}, a::AbstractMatrix) where {T <: AbstractFloat} +function pairwise!(r::AbstractMatrix, dist::Mahalanobis{T}, a::AbstractMatrix) where {T <: Real} sqrt!(pairwise!(r, SqMahalanobis(dist.qmat), a)) end diff --git a/src/metrics.jl b/src/metrics.jl index 6762ba2..3db4105 100644 --- a/src/metrics.jl +++ b/src/metrics.jl @@ -28,7 +28,6 @@ struct CorrDist <: SemiMetric end struct ChiSqDist <: SemiMetric end struct KLDivergence <: PreMetric end -struct KLDivergence <: PreMetric end struct GenKLDivergence <: PreMetric end """ @@ -99,7 +98,7 @@ struct RMSDeviation <: Metric end struct NormRMSDeviation <: Metric end -const UnionMetrics = Union{Euclidean, SqEuclidean, Chebyshev, Cityblock, Minkowski, Hamming, Jaccard, RogersTanimoto, CosineDist, CorrDist, ChiSqDist, KLDivergence, RenyiDivergence, JSDivergence, SpanNormDist, GenKLDivergence} +const UnionMetrics = Union{Euclidean,SqEuclidean,Chebyshev,Cityblock,Minkowski,Hamming,Jaccard,RogersTanimoto,CosineDist,CorrDist,ChiSqDist,KLDivergence,RenyiDivergence,JSDivergence,SpanNormDist,GenKLDivergence} """ Euclidean([thresh]) @@ -168,7 +167,7 @@ function evaluate(d::UnionMetrics, a::AbstractArray, b::AbstractArray) end return eval_end(d, s) end -result_type(dist::UnionMetrics, ::AbstractArray{T1}, ::AbstractArray{T2}) where {T1,T2} = +result_type(dist::UnionMetrics, ::AbstractArray{T1}, ::AbstractArray{T2}) where {T1, T2} = typeof(eval_end(dist, eval_op(dist, one(T1), one(T2)))) eval_start(d::UnionMetrics, a::AbstractArray, b::AbstractArray) = zero(result_type(d, a, b)) @@ -205,9 +204,9 @@ chebyshev(a::AbstractArray, b::AbstractArray) = evaluate(Chebyshev(), a, b) chebyshev(a::T, b::T) where {T <: Number} = evaluate(Chebyshev(), a, b) # Minkowski -@inline eval_op(dist::Minkowski, ai, bi) = abs(ai - bi) .^ dist.p +@inline eval_op(dist::Minkowski, ai, bi) = abs(ai - bi).^dist.p @inline eval_reduce(::Minkowski, s1, s2) = s1 + s2 -eval_end(dist::Minkowski, s) = s .^ (1/dist.p) +eval_end(dist::Minkowski, s) = s.^(1 / dist.p) minkowski(a::AbstractArray, b::AbstractArray, p::Real) = evaluate(Minkowski(p), a, b) minkowski(a::T, b::T, p::Real) where {T <: Number} = evaluate(Minkowski(p), a, b) @@ -255,15 +254,15 @@ kl_divergence(a::AbstractArray, b::AbstractArray) = evaluate(KLDivergence(), a, gkl_divergence(a::AbstractArray, b::AbstractArray) = evaluate(GenKLDivergence(), a, b) # RenyiDivergence -function eval_start(::RenyiDivergence, a::AbstractArray{T}, b::AbstractArray{T}) where {T <: AbstractFloat} - zero(T), zero(T), sum(a), sum(b) +function eval_start(::RenyiDivergence, a::AbstractArray{T}, b::AbstractArray{T}) where {T <: Real} + zero(T), zero(T), T(sum(a)), T(sum(b)) end -@inline function eval_op(dist::RenyiDivergence, ai::T, bi::T) where {T <: AbstractFloat} +@inline function eval_op(dist::RenyiDivergence, ai::T, bi::T) where {T <: Real} if ai == zero(T) return zero(T), zero(T), zero(T), zero(T) elseif dist.is_normal - return ai, ai * ((ai / bi) ^ dist.p), zero(T), zero(T) + return ai, ai * ((ai / bi)^dist.p), zero(T), zero(T) elseif dist.is_zero return ai, bi, zero(T), zero(T) elseif dist.is_one @@ -274,8 +273,8 @@ end end @inline function eval_reduce(dist::RenyiDivergence, - s1::Tuple{T, T, T, T}, - s2::Tuple{T, T, T, T}) where {T <: AbstractFloat} + s1::Tuple{T,T,T,T}, + s2::Tuple{T,T,T,T}) where {T <: Real} if dist.is_inf if s1[1] == zero(T) return (s2[1], s2[2], s1[3], s1[4]) @@ -289,7 +288,7 @@ end end end -function eval_end(dist::RenyiDivergence, s::Tuple{T, T, T, T}) where {T <: AbstractFloat} +function eval_end(dist::RenyiDivergence, s::Tuple{T,T,T,T}) where {T <: Real} if dist.is_zero || dist.is_normal log(s[2] / s[1]) / dist.p + log(s[4] / s[3]) elseif dist.is_one @@ -332,7 +331,7 @@ end eval_end(::SpanNormDist, s) = s[2] - s[1] spannorm_dist(a::AbstractArray, b::AbstractArray) = evaluate(SpanNormDist(), a, b) -function result_type(dist::SpanNormDist, ::AbstractArray{T1}, ::AbstractArray{T2}) where {T1,T2} +function result_type(dist::SpanNormDist, ::AbstractArray{T1}, ::AbstractArray{T2}) where {T1, T2} typeof(eval_op(dist, one(T1), one(T2))) end @@ -352,7 +351,7 @@ end a, b end @inline function eval_end(::Jaccard, a) - @inbounds v = 1 - (a[1]/a[2]) + @inbounds v = 1 - (a[1] / a[2]) return v end jaccard(a::AbstractArray, b::AbstractArray) = evaluate(Jaccard(), a, b) @@ -415,29 +414,29 @@ function pairwise!(r::AbstractMatrix, dist::SqEuclidean, a::AbstractMatrix, b::A threshT = convert(eltype(r), dist.thresh) if threshT <= 0 # If there's no chance of triggering the threshold, we can use @simd - for j = 1 : size(r,2) + for j = 1:size(r, 2) sb = sb2[j] - @simd for i = 1 : size(r,1) - @inbounds r[i,j] = sa2[i] + sb - 2 * r[i,j] + @simd for i = 1:size(r, 1) + @inbounds r[i, j] = sa2[i] + sb - 2 * r[i, j] end end else - for j = 1 : size(r,2) + for j = 1:size(r, 2) sb = sb2[j] - for i = 1 : size(r,1) + for i = 1:size(r, 1) @inbounds selfterms = sa2[i] + sb - @inbounds v = selfterms - 2*r[i,j] - if v < threshT*selfterms + @inbounds v = selfterms - 2 * r[i, j] + if v < threshT * selfterms # The distance is likely to be inaccurate, recalculate at higher prec. # This reflects the following: # ((x+ϵ) - y)^2 ≈ x^2 - 2xy + y^2 + O(ϵ) when |x-y| >> ϵ # ((x+ϵ) - y)^2 ≈ O(ϵ^2) otherwise v = zero(v) - for k = 1:size(a,1) - @inbounds v += (a[k,i]-b[k,j])^2 + for k = 1:size(a, 1) + @inbounds v += (a[k, i] - b[k, j])^2 end end - @inbounds r[i,j] = v + @inbounds r[i, j] = v end end end @@ -449,27 +448,27 @@ function pairwise!(r::AbstractMatrix, dist::SqEuclidean, a::AbstractMatrix) At_mul_B!(r, a, a) sa2 = sumsq_percol(a) threshT = convert(eltype(r), dist.thresh) - @inbounds for j = 1 : n - for i = 1 : j-1 - r[i,j] = r[j,i] + @inbounds for j = 1:n + for i = 1:(j - 1) + r[i, j] = r[j, i] end - r[j,j] = 0 + r[j, j] = 0 sa2j = sa2[j] if threshT <= 0 - @simd for i = j+1 : n - r[i,j] = sa2[i] + sa2j - 2 * r[i,j] + @simd for i = (j + 1):n + r[i, j] = sa2[i] + sa2j - 2 * r[i, j] end else - for i = j+1 : n + for i = (j + 1):n selfterms = sa2[i] + sa2j - v = selfterms - 2*r[i,j] - if v < threshT*selfterms + v = selfterms - 2 * r[i, j] + if v < threshT * selfterms v = zero(v) - for k = 1:size(a,1) - v += (a[k,i]-a[k,j])^2 + for k = 1:size(a, 1) + v += (a[k, i] - a[k, j])^2 end end - r[i,j] = v + r[i, j] = v end end end @@ -483,22 +482,22 @@ function pairwise!(r::AbstractMatrix, dist::Euclidean, a::AbstractMatrix, b::Abs sa2 = sumsq_percol(a) sb2 = sumsq_percol(b) threshT = convert(eltype(r), dist.thresh) - @inbounds for j = 1 : nb + @inbounds for j = 1:nb sb = sb2[j] - for i = 1 : na + for i = 1:na selfterms = sa2[i] + sb - v = selfterms - 2*r[i,j] - if v < threshT*selfterms + v = selfterms - 2 * r[i, j] + if v < threshT * selfterms # The distance is likely to be inaccurate, recalculate directly # This reflects the following: # while sqrt(x+ϵ) ≈ sqrt(x) + O(ϵ/sqrt(x)) when |x| >> ϵ, # sqrt(x+ϵ) ≈ O(sqrt(ϵ)) otherwise. v = zero(v) for k = 1:m - v += (a[k,i]-b[k,j])^2 + v += (a[k, i] - b[k, j])^2 end end - r[i,j] = sqrt(v) + r[i, j] = sqrt(v) end end r @@ -509,22 +508,22 @@ function pairwise!(r::AbstractMatrix, dist::Euclidean, a::AbstractMatrix) At_mul_B!(r, a, a) sa2 = sumsq_percol(a) threshT = convert(eltype(r), dist.thresh) - @inbounds for j = 1 : n - for i = 1 : j-1 - r[i,j] = r[j,i] + @inbounds for j = 1:n + for i = 1:(j - 1) + r[i, j] = r[j, i] end - r[j,j] = 0 + r[j, j] = 0 sa2j = sa2[j] - for i = j+1 : n + for i = (j + 1):n selfterms = sa2[i] + sa2j - v = selfterms - 2*r[i,j] - if v < threshT*selfterms + v = selfterms - 2 * r[i, j] + if v < threshT * selfterms v = zero(v) for k = 1:m - v += (a[k,i]-a[k,j])^2 + v += (a[k, i] - a[k, j])^2 end end - r[i,j] = sqrt(v) + r[i, j] = sqrt(v) end end r @@ -537,9 +536,9 @@ function pairwise!(r::AbstractMatrix, dist::CosineDist, a::AbstractMatrix, b::Ab At_mul_B!(r, a, b) ra = sqrt!(sumsq_percol(a)) rb = sqrt!(sumsq_percol(b)) - for j = 1 : nb - @simd for i = 1 : na - @inbounds r[i,j] = max(1 - r[i,j] / (ra[i] * rb[j]), 0) + for j = 1:nb + @simd for i = 1:na + @inbounds r[i, j] = max(1 - r[i, j] / (ra[i] * rb[j]), 0) end end r @@ -548,13 +547,13 @@ function pairwise!(r::AbstractMatrix, dist::CosineDist, a::AbstractMatrix) m, n = get_pairwise_dims(r, a) At_mul_B!(r, a, a) ra = sqrt!(sumsq_percol(a)) - @inbounds for j = 1 : n - @simd for i = j+1 : n - r[i,j] = max(1 - r[i,j] / (ra[i] * ra[j]), 0) + @inbounds for j = 1:n + @simd for i = j + 1:n + r[i, j] = max(1 - r[i, j] / (ra[i] * ra[j]), 0) end - r[j,j] = 0 - for i = 1 : j-1 - r[i,j] = r[j,i] + r[j, j] = 0 + for i = 1:(j - 1) + r[i, j] = r[j, i] end end r diff --git a/src/wmetrics.jl b/src/wmetrics.jl index b77daf0..dd0f33f 100644 --- a/src/wmetrics.jl +++ b/src/wmetrics.jl @@ -6,7 +6,7 @@ # Metric types # ########################################################### -const RealAbstractArray{T <: AbstractFloat} = AbstractArray{T} +const RealAbstractArray{T <: Real} = AbstractArray{T} struct WeightedEuclidean{W <: RealAbstractArray} <: Metric @@ -21,7 +21,7 @@ struct WeightedCityblock{W <: RealAbstractArray} <: Metric weights::W end -struct WeightedMinkowski{W <: RealAbstractArray, T <: Real} <: Metric +struct WeightedMinkowski{W <: RealAbstractArray,T <: Real} <: Metric weights::W p::T end @@ -31,7 +31,7 @@ struct WeightedHamming{W <: RealAbstractArray} <: Metric end -const UnionWeightedMetrics{W} = Union{WeightedEuclidean{W}, WeightedSqEuclidean{W}, WeightedCityblock{W}, WeightedMinkowski{W}, WeightedHamming{W}} +const UnionWeightedMetrics{W} = Union{WeightedEuclidean{W},WeightedSqEuclidean{W},WeightedCityblock{W},WeightedMinkowski{W},WeightedHamming{W}} Base.eltype(x::UnionWeightedMetrics) = eltype(x.weights) ########################################################### # @@ -42,7 +42,7 @@ Base.eltype(x::UnionWeightedMetrics) = eltype(x.weights) function evaluate(dist::UnionWeightedMetrics, a::T, b::T) where {T <: Number} eval_end(dist, eval_op(dist, a, b, one(eltype(dist)))) end -function result_type(dist::UnionWeightedMetrics, ::AbstractArray{T1}, ::AbstractArray{T2}) where {T1,T2} +function result_type(dist::UnionWeightedMetrics, ::AbstractArray{T1}, ::AbstractArray{T2}) where {T1, T2} typeof(evaluate(dist, one(T1), one(T2))) end function eval_start(d::UnionWeightedMetrics, a::AbstractArray, b::AbstractArray) @@ -98,9 +98,9 @@ weuclidean(a::AbstractArray, b::AbstractArray, w::AbstractArray) = evaluate(Weig wcityblock(a::AbstractArray, b::AbstractArray, w::AbstractArray) = evaluate(WeightedCityblock(w), a, b) # Minkowski -@inline eval_op(dist::WeightedMinkowski, ai, bi, wi) = abs(ai - bi) .^ dist.p * wi +@inline eval_op(dist::WeightedMinkowski, ai, bi, wi) = abs(ai - bi).^dist.p * wi @inline eval_reduce(::WeightedMinkowski, s1, s2) = s1 + s2 -eval_end(dist::WeightedMinkowski, s) = s .^ (1/dist.p) +eval_end(dist::WeightedMinkowski, s) = s.^(1 / dist.p) wminkowski(a::AbstractArray, b::AbstractArray, w::AbstractArray, p::Real) = evaluate(WeightedMinkowski(w, p), a, b) # WeightedHamming @@ -122,9 +122,9 @@ function pairwise!(r::AbstractMatrix, dist::WeightedSqEuclidean, a::AbstractMatr sa2 = wsumsq_percol(w, a) sb2 = wsumsq_percol(w, b) At_mul_B!(r, a, b .* w) - for j = 1 : nb - @simd for i = 1 : na - @inbounds r[i,j] = sa2[i] + sb2[j] - 2 * r[i,j] + for j = 1:nb + @simd for i = 1:na + @inbounds r[i, j] = sa2[i] + sb2[j] - 2 * r[i, j] end end r @@ -136,13 +136,13 @@ function pairwise!(r::AbstractMatrix, dist::WeightedSqEuclidean, a::AbstractMatr sa2 = wsumsq_percol(w, a) At_mul_B!(r, a, a .* w) - for j = 1 : n - for i = 1 : j-1 - @inbounds r[i,j] = r[j,i] + for j = 1:n + for i = 1:(j - 1) + @inbounds r[i, j] = r[j, i] end - @inbounds r[j,j] = 0 - @simd for i = j+1 : n - @inbounds r[i,j] = sa2[i] + sa2[j] - 2 * r[i,j] + @inbounds r[j, j] = 0 + @simd for i = (j + 1):n + @inbounds r[i, j] = sa2[i] + sa2[j] - 2 * r[i, j] end end r diff --git a/test/F64.jl b/test/F64.jl new file mode 100644 index 0000000..6897cde --- /dev/null +++ b/test/F64.jl @@ -0,0 +1,43 @@ +# dummy type wrapping a Float64 used in tests +struct F64 <: Real + x::Float64 +end + +# operations +for op in (:+, :-) + @eval Base.$op(a::F64) = F64($op(a.x)) +end +for op in (:+, :-, :*, :/) + @eval Base.$op(a::F64, b::F64) = F64($op(a.x, b.x)) +end +for op in (:zero, :one) + @eval Base.$op(::Type{F64}) = F64($op(Float64)) +end +Base.rand(rng::AbstractRNG, ::Type{F64}) = F64(rand()) +Base.sqrt(a::F64) = F64(sqrt(a.x)) +Base.:^(a::F64, b::Number) = F64(a.x^b) +Base.:^(a::F64, b::Int) = F64(a.x^b) +Base.:^(a::F64, b::F64) = F64(a.x^b.x) +Base.:^(a::Number, b::F64) = a^b.x +Base.log(a::F64) = F64(log(a.x)) +Base.isfinite(a::F64) = isfinite(a.x) +Base.float(a::F64) = a +Base.rtoldefault(a::Type{F64}, b::Type{F64}) = Base.rtoldefault(Float64, Float64) +# comparison +Base.isapprox(a::F64, b::F64) = isapprox(a.x, b.x) +Base.:<(a::F64, b::F64) = a.x < b.x +Base.:<=(a::F64, b::F64) = a.x <= b.x +Base.eps(::Type{F64}) = eps(Float64) + +# promotion +Base.promote_type(::Type{Float32}, ::Type{F64}) = Float64 # for eig +Base.promote_type(::Type{Float64}, ::Type{F64}) = Float64 # for vecnorm +Base.promote(a::F64, b::T) where {T <: Number} = a, F64(b) +Base.promote(a::T, b::F64) where {T <: Number} = F64(a), b +Base.convert(::Type{F64}, a::F64) = a +Base.convert(::Type{Float64}, a::F64) = a.x +Base.convert(::Type{F64}, a::T) where {T <: Number} = F64(a) + +# conversion +Base.Int64(a::F64) = Int64(a.x) +Base.Int32(a::F64) = Int32(a.x) \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index a86c786..96118f8 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,4 +1,5 @@ using Distances using Base.Test +include("F64.jl") include("test_dists.jl") diff --git a/test/test_dists.jl b/test/test_dists.jl index 1533261..8e13fb4 100644 --- a/test/test_dists.jl +++ b/test/test_dists.jl @@ -36,71 +36,73 @@ function test_metricity(dist, x, y, z) end @testset "PreMetric, SemiMetric, Metric" begin - n = 10 - x = rand(n) - y = rand(n) - z = rand(n) - - test_metricity(SqEuclidean(), x, y, z) - test_metricity(Euclidean(), x, y, z) - test_metricity(Cityblock(), x, y, z) - test_metricity(Chebyshev(), x, y, z) - test_metricity(Minkowski(2.5), x, y, z) - - test_metricity(CosineDist(), x, y, z) - test_metricity(CorrDist(), x, y, z) - - test_metricity(ChiSqDist(), x, y, z) - - test_metricity(Jaccard(), x, y, z) - test_metricity(SpanNormDist(), x, y, z) - - test_metricity(BhattacharyyaDist(), x, y, z) - test_metricity(HellingerDist(), x, y, z) - - k = rand(1:3, n) - l = rand(1:3, n) - m = rand(1:3, n) - - test_metricity(Hamming(), k, l, m) - - a = rand(Bool, n) - b = rand(Bool, n) - c = rand(Bool, n) - - test_metricity(RogersTanimoto(), a, b, c) - test_metricity(Jaccard(), a, b, c) - - w = rand(n) - - test_metricity(WeightedSqEuclidean(w), x, y, z) - test_metricity(WeightedEuclidean(w), x, y, z) - test_metricity(WeightedCityblock(w), x, y, z) - test_metricity(WeightedMinkowski(w, 2.5), x, y, z) - test_metricity(WeightedHamming(w), a, b, c) - - Q = rand(n, n) - Q = Q * Q' # make sure Q is positive-definite - - test_metricity(SqMahalanobis(Q), x, y, z) - test_metricity(Mahalanobis(Q), x, y, z) - - p = rand(n) - q = rand(n) - r = rand(n) - p[p .< median(p)] = 0 - p /= sum(p) - q /= sum(q) - r /= sum(r) - - test_metricity(KLDivergence(), p, q, r) - test_metricity(RenyiDivergence(0.0), p, q, r) - test_metricity(RenyiDivergence(1.0), p, q, r) - test_metricity(RenyiDivergence(Inf), p, q, r) - test_metricity(RenyiDivergence(0.5), p, q, r) - test_metricity(RenyiDivergence(2), p, q, r) - test_metricity(RenyiDivergence(10), p, q, r) - test_metricity(JSDivergence(), p, q, r) + for T in (Float64, F64) + n = 10 + x = rand(T, n) + y = rand(T, n) + z = rand(T, n) + + test_metricity(SqEuclidean(), x, y, z) + test_metricity(Euclidean(), x, y, z) + test_metricity(Cityblock(), x, y, z) + test_metricity(Chebyshev(), x, y, z) + test_metricity(Minkowski(2.5), x, y, z) + + test_metricity(CosineDist(), x, y, z) + test_metricity(CorrDist(), x, y, z) + + test_metricity(ChiSqDist(), x, y, z) + + test_metricity(Jaccard(), x, y, z) + test_metricity(SpanNormDist(), x, y, z) + + test_metricity(BhattacharyyaDist(), x, y, z) + test_metricity(HellingerDist(), x, y, z) + + k = rand(1:3, n) + l = rand(1:3, n) + m = rand(1:3, n) + + test_metricity(Hamming(), k, l, m) + + a = rand(Bool, n) + b = rand(Bool, n) + c = rand(Bool, n) + + test_metricity(RogersTanimoto(), a, b, c) + test_metricity(Jaccard(), a, b, c) + + w = rand(T, n) + + test_metricity(WeightedSqEuclidean(w), x, y, z) + test_metricity(WeightedEuclidean(w), x, y, z) + test_metricity(WeightedCityblock(w), x, y, z) + test_metricity(WeightedMinkowski(w, 2.5), x, y, z) + test_metricity(WeightedHamming(w), a, b, c) + + Q = rand(T, n, n) + Q = Q * Q' # make sure Q is positive-definite + + test_metricity(SqMahalanobis(Q), x, y, z) + test_metricity(Mahalanobis(Q), x, y, z) + + p = rand(T, n) + q = rand(T, n) + r = rand(T, n) + p[p .< median(p)] = 0 + p /= sum(p) + q /= sum(q) + r /= sum(r) + + test_metricity(KLDivergence(), p, q, r) + test_metricity(RenyiDivergence(0.0), p, q, r) + test_metricity(RenyiDivergence(1.0), p, q, r) + test_metricity(RenyiDivergence(Inf), p, q, r) + test_metricity(RenyiDivergence(0.5), p, q, r) + test_metricity(RenyiDivergence(2), p, q, r) + test_metricity(RenyiDivergence(10), p, q, r) + test_metricity(JSDivergence(), p, q, r) + end end @testset "individual metrics" begin @@ -116,113 +118,122 @@ end bt = [true, false, true] bf = [false, true, true] - @test rogerstanimoto(bt, bf) == 4.0/5.0 - - for (x, y) in (([4.0, 5.0, 6.0, 7.0], [3.0, 9.0, 8.0, 1.0]), - ([4.0, 5.0, 6.0, 7.0], [3. 8.; 9. 1.0])) - @test sqeuclidean(x, y) == 57.0 - @test euclidean(x, y) == sqrt(57.0) - @test jaccard(x, y) == 13.0/28 - @test cityblock(x, y) == 13.0 - @test chebyshev(x, y) == 6.0 - @test minkowski(x, y, 2) == sqrt(57.0) - @test_throws DimensionMismatch cosine_dist(1.0:2, 1.0:3) - @test cosine_dist(x, y) ≈ (1.0 - 112. / sqrt(19530.0)) - x_int, y_int = map(Int64, x), map(Int64, y) - @test cosine_dist(x_int, y_int) == (1.0 - 112.0 / sqrt(19530.0)) - @test corr_dist(x, y) ≈ cosine_dist(x .- mean(x), vec(y) .- mean(y)) - @test chisq_dist(x, y) == sum((x - vec(y)).^2 ./ (x + vec(y))) - @test spannorm_dist(x, y) == maximum(x - vec(y)) - minimum(x - vec(y)) - - @test gkl_divergence(x, y) ≈ sum(i -> x[i] * log(x[i] / y[i]) - x[i] + y[i], 1:length(x)) - - @test meanad(x, y) ≈ mean(Float64[abs(x[i] - y[i]) for i in 1:length(x)]) - @test msd(x, y) ≈ mean(Float64[abs2(x[i] - y[i]) for i in 1:length(x)]) - @test rmsd(x, y) ≈ sqrt(msd(x, y)) - @test nrmsd(x, y) ≈ sqrt(msd(x, y)) / (maximum(x) - minimum(x)) - - w = ones(4) - @test sqeuclidean(x, y) ≈ wsqeuclidean(x, y, w) - - w = rand(size(x)) - @test wsqeuclidean(x, y, w) ≈ dot((x - vec(y)).^2, w) - @test weuclidean(x, y, w) == sqrt(wsqeuclidean(x, y, w)) - @test wcityblock(x, y, w) ≈ dot(abs.(x - vec(y)), w) - @test wminkowski(x, y, w, 2) ≈ weuclidean(x, y, w) - end - - # Test weighted Hamming distances with even weights - a = [1.0, 2.0, 1.0, 3.0, 2.0, 1.0] - b = [1.0, 3.0, 0.0, 2.0, 2.0, 0.0] - w = rand(size(a)) - - @test whamming(a, a, w) == 0.0 - @test whamming(a, b, w) == sum((a .!= b) .* w) - - # Minimal test of Jaccard - test return type stability. - @inferred evaluate(Jaccard(), rand(3), rand(3)) - @inferred evaluate(Jaccard(), [1,2,3], [1,2,3]) - @inferred evaluate(Jaccard(), [true, false, true], [false, true, true]) - - # Test KL, Renyi and JS divergences - p = r = rand(12) - p[p .< median(p)] = 0.0 - scale = sum(p) / sum(r) - r /= sum(r) - p /= sum(p) - q = rand(12) - q /= sum(q) - - klv = 0.0 - for i = 1 : length(p) - if p[i] > 0 - klv += p[i] * log(p[i] / q[i]) + @test rogerstanimoto(bt, bf) == 4.0 / 5.0 + for T in (Float64, F64) + + for (_x, _y) in (([4.0, 5.0, 6.0, 7.0], [3.0, 9.0, 8.0, 1.0]), + ([4.0, 5.0, 6.0, 7.0], [3. 8.; 9. 1.0])) + x, y = T.(_x), T.(_y) + @test sqeuclidean(x, y) == 57.0 + @test euclidean(x, y) == sqrt(57.0) + @test jaccard(x, y) == 13.0 / 28 + @test cityblock(x, y) == 13.0 + @test chebyshev(x, y) == 6.0 + @test minkowski(x, y, 2) == sqrt(57.0) + @test_throws DimensionMismatch cosine_dist(1.0:2, 1.0:3) + @test cosine_dist(x, y) ≈ (1.0 - 112. / sqrt(19530.0)) + x_int, y_int = Int64.(x), Int64.(y) + @test cosine_dist(x_int, y_int) == (1.0 - 112.0 / sqrt(19530.0)) + @test corr_dist(x, y) ≈ cosine_dist(x .- mean(x), vec(y) .- mean(y)) + @test chisq_dist(x, y) == sum((x - vec(y)).^2 ./ (x + vec(y))) + @test spannorm_dist(x, y) == maximum(x - vec(y)) - minimum(x - vec(y)) + + @test gkl_divergence(x, y) ≈ sum(i -> x[i] * log(x[i] / y[i]) - x[i] + y[i], 1:length(x)) + + @test meanad(x, y) ≈ mean(Float64[abs(x[i] - y[i]) for i in 1:length(x)]) + @test msd(x, y) ≈ mean(Float64[abs2(x[i] - y[i]) for i in 1:length(x)]) + @test rmsd(x, y) ≈ sqrt(msd(x, y)) + @test nrmsd(x, y) ≈ sqrt(msd(x, y)) / (maximum(x) - minimum(x)) + + w = ones(4) + @test sqeuclidean(x, y) ≈ wsqeuclidean(x, y, w) + + w = rand(size(x)) + @test wsqeuclidean(x, y, w) ≈ dot((x - vec(y)).^2, w) + @test weuclidean(x, y, w) == sqrt(wsqeuclidean(x, y, w)) + @test wcityblock(x, y, w) ≈ dot(abs.(x - vec(y)), w) + @test wminkowski(x, y, w, 2) ≈ weuclidean(x, y, w) + end + + + # Test weighted Hamming distances with even weights + a = T.([1.0, 2.0, 1.0, 3.0, 2.0, 1.0]) + b = T.([1.0, 3.0, 0.0, 2.0, 2.0, 0.0]) + w = rand(T, size(a)) + + @test whamming(a, a, w) === T(0.0) + @test whamming(a, b, w) === sum((a .!= b) .* w) + + # Minimal test of Jaccard - test return type stability. + @inferred evaluate(Jaccard(), rand(T, 3), rand(T, 3)) + @inferred evaluate(Jaccard(), [1, 2, 3], [1, 2, 3]) + @inferred evaluate(Jaccard(), [true, false, true], [false, true, true]) + + # Test KL, Renyi and JS divergences + r = rand(T, 12) + p = copy(r) + p[p .< median(p)] = 0.0 + scale = sum(p) / sum(r) + r /= sum(r) + p /= sum(p) + q = rand(T, 12) + q /= sum(q) + + klv = 0.0 + for i = 1:length(p) + if p[i] > 0 + klv += p[i] * log(p[i] / q[i]) + end end + @test kl_divergence(p, q) ≈ klv + @test typeof(kl_divergence(p, q)) == T + + + @test renyi_divergence(p, r, 0) ≈ -log(scale) + @test renyi_divergence(p, r, 1) ≈ -log(scale) + @test renyi_divergence(p, r, 10) ≈ -log(scale) + @test renyi_divergence(p, r, rand()) ≈ -log(scale) + @test renyi_divergence(p, r, Inf) ≈ -log(scale) + @test isinf(renyi_divergence([0.0, 0.5, 0.5], [0.0, 1.0, 0.0], Inf)) + @test renyi_divergence([0.0, 1.0, 0.0], [0.0, 0.5, 0.5], Inf) ≈ log(2.0) + @test renyi_divergence(p, q, 1) ≈ kl_divergence(p, q) + + pm = (p + q) / 2 + jsv = kl_divergence(p, pm) / 2 + kl_divergence(q, pm) / 2 + @test js_divergence(p, q) ≈ jsv end - @test kl_divergence(p, q) ≈ klv - - @test renyi_divergence(p, r, 0) ≈ -log(scale) - @test renyi_divergence(p, r, 1) ≈ -log(scale) - @test renyi_divergence(p, r, 10) ≈ -log(scale) - @test renyi_divergence(p, r, rand()) ≈ -log(scale) - @test renyi_divergence(p, r, Inf) ≈ -log(scale) - @test isinf(renyi_divergence([0.0, 0.5, 0.5], [0.0, 1.0, 0.0], Inf)) - @test renyi_divergence([0.0, 1.0, 0.0], [0.0, 0.5, 0.5], Inf) ≈ log(2.0) - @test renyi_divergence(p, q, 1) ≈ kl_divergence(p, q) - - pm = (p + q) / 2 - jsv = kl_divergence(p, pm) / 2 + kl_divergence(q, pm) / 2 - @test js_divergence(p, q) ≈ jsv end # testset @testset "NaN behavior" begin a = [NaN, 0]; b = [0, NaN] - @test isnan(chebyshev(a, b)) == isnan(maximum(a-b)) + @test isnan(chebyshev(a, b)) == isnan(maximum(a - b)) a = [NaN, 0]; b = [0, 1] - @test isnan(chebyshev(a, b)) == isnan(maximum(a-b)) + @test isnan(chebyshev(a, b)) == isnan(maximum(a - b)) @test isnan(renyi_divergence([0.5, 0.0, 0.5], [0.5, 0.5, NaN], 2)) end #testset @testset "empty vector" begin - a = Float64[] - b = Float64[] - @test sqeuclidean(a, b) == 0.0 - @test isa(sqeuclidean(a, b), Float64) - @test euclidean(a, b) == 0.0 - @test isa(euclidean(a, b), Float64) - @test cityblock(a, b) == 0.0 - @test isa(cityblock(a, b), Float64) - @test chebyshev(a, b) == 0.0 - @test isa(chebyshev(a, b), Float64) - @test minkowski(a, b, 2) == 0.0 - @test isa(minkowski(a, b, 2), Float64) - @test hamming(a, b) == 0.0 - @test isa(hamming(a, b), Int) - @test renyi_divergence(a, b, 1.0) == 0.0 - @test isa(renyi_divergence(a, b, 2.0), Float64) - - w = Float64[] - @test isa(whamming(a, b, w), Float64) + for T in (Float64, F64) + a = T[] + b = T[] + @test sqeuclidean(a, b) == 0.0 + @test isa(sqeuclidean(a, b), T) + @test euclidean(a, b) == 0.0 + @test isa(euclidean(a, b), T) + @test cityblock(a, b) == 0.0 + @test isa(cityblock(a, b), T) + @test chebyshev(a, b) == 0.0 + @test isa(chebyshev(a, b), T) + @test minkowski(a, b, 2) == 0.0 + @test isa(minkowski(a, b, 2), T) + @test hamming(a, b) == 0.0 + @test isa(hamming(a, b), Int) + @test renyi_divergence(a, b, 1.0) == 0.0 + @test isa(renyi_divergence(a, b, 2.0), T) + + w = T[] + @test isa(whamming(a, b, w), T) + end end # testset @testset "DimensionMismatch throwing" begin @@ -247,137 +258,142 @@ end # testset end # testset @testset "mahalanobis" begin - x, y = [4.0, 5.0, 6.0, 7.0], [3.0, 9.0, 8.0, 1.0] - a = [1.0, 2.0, 1.0, 3.0, 2.0, 1.0] - b = [1.0, 3.0, 0.0, 2.0, 2.0, 0.0] - - Q = rand(length(x), length(x)) - Q = Q * Q' # make sure Q is positive-definite - @test sqmahalanobis(x, y, Q) ≈ dot(x - y, Q * (x - y)) - - @test mahalanobis(x, y, Q) == sqrt(sqmahalanobis(x, y, Q)) + for T in (Float64, F64) + x, y = T.([4.0, 5.0, 6.0, 7.0]), T.([3.0, 9.0, 8.0, 1.0]) + a = T.([1.0, 2.0, 1.0, 3.0, 2.0, 1.0]) + b = T.([1.0, 3.0, 0.0, 2.0, 2.0, 0.0]) + + Q = rand(T, length(x), length(x)) + Q = Q * Q' # make sure Q is positive-definite + @test sqmahalanobis(x, y, Q) ≈ dot(x - y, Q * (x - y)) + @test eltype(sqmahalanobis(x, y, Q)) == T + @test mahalanobis(x, y, Q) == sqrt(sqmahalanobis(x, y, Q)) + @test eltype(mahalanobis(x, y, Q)) == T + end end #testset @testset "bhattacharyya / hellinger" begin - x, y = [4.0, 5.0, 6.0, 7.0], [3.0, 9.0, 8.0, 1.0] - a = [1.0, 2.0, 1.0, 3.0, 2.0, 1.0] - b = [1.0, 3.0, 0.0, 2.0, 2.0, 0.0] - p = rand(12) - p[p .< median(p)] = 0.0 - q = rand(12) - - # Bhattacharyya and Hellinger distances are defined for discrete - # probability distributions so to calculate the expected values - # we need to normalize vectors. - px = x ./ sum(x) - py = y ./ sum(y) - expected_bc_x_y = sum(sqrt.(px .* py)) - @test Distances.bhattacharyya_coeff(x, y) ≈ expected_bc_x_y - @test bhattacharyya(x, y) ≈ (-log(expected_bc_x_y)) - @test hellinger(x, y) ≈ sqrt(1 - expected_bc_x_y) - - pa = a ./ sum(a) - pb = b ./ sum(b) - expected_bc_a_b = sum(sqrt.(pa .* pb)) - @test Distances.bhattacharyya_coeff(a, b) ≈ expected_bc_a_b - @test bhattacharyya(a, b) ≈ (-log(expected_bc_a_b)) - @test hellinger(a, b) ≈ sqrt(1 - expected_bc_a_b) - - pp = p ./ sum(p) - pq = q ./ sum(q) - expected_bc_p_q = sum(sqrt.(pp .* pq)) - @test Distances.bhattacharyya_coeff(p, q) ≈ expected_bc_p_q - @test bhattacharyya(p, q) ≈ (-log(expected_bc_p_q)) - @test hellinger(p, q) ≈ sqrt(1 - expected_bc_p_q) - - # Ensure it is semimetric - @test bhattacharyya(x, y) ≈ bhattacharyya(y, x) - + for T in (Float64, F64) + x, y = T.([4.0, 5.0, 6.0, 7.0]), T.([3.0, 9.0, 8.0, 1.0]) + a = T.([1.0, 2.0, 1.0, 3.0, 2.0, 1.0]) + b = T.([1.0, 3.0, 0.0, 2.0, 2.0, 0.0]) + p = rand(T, 12) + p[p .< median(p)] = 0.0 + q = rand(T, 12) + + # Bhattacharyya and Hellinger distances are defined for discrete + # probability distributions so to calculate the expected values + # we need to normalize vectors. + px = x ./ sum(x) + py = y ./ sum(y) + expected_bc_x_y = sum(sqrt.(px .* py)) + @test Distances.bhattacharyya_coeff(x, y) ≈ expected_bc_x_y + @test bhattacharyya(x, y) ≈ (-log(expected_bc_x_y)) + @test hellinger(x, y) ≈ sqrt(1 - expected_bc_x_y) + + pa = a ./ sum(a) + pb = b ./ sum(b) + expected_bc_a_b = sum(sqrt.(pa .* pb)) + @test Distances.bhattacharyya_coeff(a, b) ≈ expected_bc_a_b + @test bhattacharyya(a, b) ≈ (-log(expected_bc_a_b)) + @test hellinger(a, b) ≈ sqrt(1 - expected_bc_a_b) + + pp = p ./ sum(p) + pq = q ./ sum(q) + expected_bc_p_q = sum(sqrt.(pp .* pq)) + @test Distances.bhattacharyya_coeff(p, q) ≈ expected_bc_p_q + @test bhattacharyya(p, q) ≈ (-log(expected_bc_p_q)) + @test hellinger(p, q) ≈ sqrt(1 - expected_bc_p_q) + + # Ensure it is semimetric + @test bhattacharyya(x, y) ≈ bhattacharyya(y, x) + end end #testset -function test_colwise(dist, x, y) +function test_colwise(dist, x, y, T) n = size(x, 2) - r1 = zeros(n) - r2 = zeros(n) - r3 = zeros(n) - for j = 1 : n - r1[j] = evaluate(dist, x[:,j], y[:,j]) - r2[j] = evaluate(dist, x[:,1], y[:,j]) - r3[j] = evaluate(dist, x[:,j], y[:,1]) + r1 = zeros(T, n) + r2 = zeros(T, n) + r3 = zeros(T, n) + for j = 1:n + r1[j] = evaluate(dist, x[:, j], y[:, j]) + r2[j] = evaluate(dist, x[:, 1], y[:, j]) + r3[j] = evaluate(dist, x[:, j], y[:, 1]) end @test colwise(dist, x, y) ≈ r1 - @test colwise(dist, x[:,1], y) ≈ r2 - @test colwise(dist, x, y[:,1]) ≈ r3 + @test colwise(dist, x[:, 1], y) ≈ r2 + @test colwise(dist, x, y[:, 1]) ≈ r3 end @testset "column-wise metrics" begin - m = 5 - n = 8 - X = rand(m, n) - Y = rand(m, n) - A = rand(1:3, m, n) - B = rand(1:3, m, n) - - P = rand(m, n) - Q = rand(m, n) - # Make sure not to remove all of the non-zeros from any column - for i in 1:n - P[P[:, i] .< median(P[:, i])/2, i] = 0.0 - end - - test_colwise(SqEuclidean(), X, Y) - test_colwise(Euclidean(), X, Y) - test_colwise(Cityblock(), X, Y) - test_colwise(Chebyshev(), X, Y) - test_colwise(Minkowski(2.5), X, Y) - test_colwise(Hamming(), A, B) - - test_colwise(CosineDist(), X, Y) - test_colwise(CorrDist(), X, Y) - - test_colwise(ChiSqDist(), X, Y) - test_colwise(KLDivergence(), P, Q) - test_colwise(RenyiDivergence(0.0), P, Q) - test_colwise(RenyiDivergence(1.0), P, Q) - test_colwise(RenyiDivergence(Inf), P, Q) - test_colwise(RenyiDivergence(0.5), P, Q) - test_colwise(RenyiDivergence(2), P, Q) - test_colwise(RenyiDivergence(10), P, Q) - test_colwise(JSDivergence(), P, Q) - test_colwise(SpanNormDist(), X, Y) - - test_colwise(BhattacharyyaDist(), X, Y) - test_colwise(HellingerDist(), X, Y) - - w = rand(m) - - test_colwise(WeightedSqEuclidean(w), X, Y) - test_colwise(WeightedEuclidean(w), X, Y) - test_colwise(WeightedCityblock(w), X, Y) - test_colwise(WeightedMinkowski(w, 2.5), X, Y) - test_colwise(WeightedHamming(w), A, B) - - Q = rand(m, m) - Q = Q * Q' # make sure Q is positive-definite - - test_colwise(SqMahalanobis(Q), X, Y) - test_colwise(Mahalanobis(Q), X, Y) + for T in (Float64, F64) + m = 5 + n = 8 + X = rand(T, m, n) + Y = rand(T, m, n) + A = rand(1:3, m, n) + B = rand(1:3, m, n) + + P = rand(T, m, n) + Q = rand(T, m, n) + # Make sure not to remove all of the non-zeros from any column + for i in 1:n + P[P[:, i] .< median(P[:, i]) / 2, i] = 0.0 + end + test_colwise(SqEuclidean(), X, Y, T) + test_colwise(Euclidean(), X, Y, T) + test_colwise(Cityblock(), X, Y, T) + test_colwise(Chebyshev(), X, Y, T) + test_colwise(Minkowski(2.5), X, Y, T) + test_colwise(Hamming(), A, B, T) + + test_colwise(CosineDist(), X, Y, T) + test_colwise(CorrDist(), X, Y, T) + + test_colwise(ChiSqDist(), X, Y, T) + test_colwise(KLDivergence(), P, Q, T) + test_colwise(RenyiDivergence(0.0), P, Q, T) + test_colwise(RenyiDivergence(1.0), P, Q, T) + test_colwise(RenyiDivergence(Inf), P, Q, T) + test_colwise(RenyiDivergence(0.5), P, Q, T) + test_colwise(RenyiDivergence(2), P, Q, T) + test_colwise(RenyiDivergence(10), P, Q, T) + test_colwise(JSDivergence(), P, Q, T) + test_colwise(SpanNormDist(), X, Y, T) + + test_colwise(BhattacharyyaDist(), X, Y, T) + test_colwise(HellingerDist(), X, Y, T) + + w = rand(T, m) + + test_colwise(WeightedSqEuclidean(w), X, Y, T) + test_colwise(WeightedEuclidean(w), X, Y, T) + test_colwise(WeightedCityblock(w), X, Y, T) + test_colwise(WeightedMinkowski(w, 2.5), X, Y, T) + test_colwise(WeightedHamming(w), A, B, T) + + Q = rand(T, m, m) + Q = Q * Q' # make sure Q is positive-definite + + test_colwise(SqMahalanobis(Q), X, Y, T) + test_colwise(Mahalanobis(Q), X, Y, T) + end end # testset -function test_pairwise(dist, x, y) +function test_pairwise(dist, x, y, T) quote nx = size(x, 2) ny = size(y, 2) - rxy = zeros(nx, ny) - rxx = zeros(nx, nx) - for j = 1 : ny, i = 1 : nx - rxy[i, j] = evaluate(dist, x[:,i], y[:,j]) + rxy = zeros(T, nx, ny) + rxx = zeros(T, nx, nx) + for j = 1:ny, i = 1:nx + rxy[i, j] = evaluate(dist, x[:, i], y[:, j]) end - for j = 1 : nx, i = 1 : nx - rxx[i, j] = evaluate(dist, x[:,i], x[:,j]) + for j = 1:nx, i = 1:nx + rxx[i, j] = evaluate(dist, x[:, i], x[:, j]) end @test pairwise(dist, x, y) ≈ rxy @test pairwise(dist, x) ≈ rxx @@ -385,71 +401,72 @@ function test_pairwise(dist, x, y) end @testset "pairwise metrics" begin - - m = 5 - n = 8 - nx = 6 - ny = 8 - - X = rand(m, nx) - Y = rand(m, ny) - A = rand(1:3, m, nx) - B = rand(1:3, m, ny) - - P = rand(m, nx) - Q = rand(m, ny) - - - test_pairwise(SqEuclidean(), X, Y) - test_pairwise(Euclidean(), X, Y) - test_pairwise(Cityblock(), X, Y) - test_pairwise(Chebyshev(), X, Y) - test_pairwise(Minkowski(2.5), X, Y) - test_pairwise(Hamming(), A, B) - - test_pairwise(CosineDist(), X, Y) - test_pairwise(CorrDist(), X, Y) - - test_pairwise(ChiSqDist(), X, Y) - test_pairwise(KLDivergence(), P, Q) - test_pairwise(RenyiDivergence(0.0), P, Q) - test_pairwise(RenyiDivergence(1.0), P, Q) - test_pairwise(RenyiDivergence(Inf), P, Q) - test_pairwise(RenyiDivergence(0.5), P, Q) - test_pairwise(RenyiDivergence(2), P, Q) - test_pairwise(JSDivergence(), P, Q) - - test_pairwise(BhattacharyyaDist(), X, Y) - test_pairwise(HellingerDist(), X, Y) - - w = rand(m) - - test_pairwise(WeightedSqEuclidean(w), X, Y) - test_pairwise(WeightedEuclidean(w), X, Y) - test_pairwise(WeightedCityblock(w), X, Y) - test_pairwise(WeightedMinkowski(w, 2.5), X, Y) - test_pairwise(WeightedHamming(w), A, B) - - Q = rand(m, m) - Q = Q * Q' # make sure Q is positive-definite - - test_pairwise(SqMahalanobis(Q), X, Y) - test_pairwise(Mahalanobis(Q), X, Y) + for T in (Float64, F64) + m = 5 + n = 8 + nx = 6 + ny = 8 + + X = rand(T, m, nx) + Y = rand(T, m, ny) + A = rand(1:3, m, nx) + B = rand(1:3, m, ny) + + P = rand(T, m, nx) + Q = rand(T, m, ny) + + + test_pairwise(SqEuclidean(), X, Y, T) + test_pairwise(Euclidean(), X, Y, T) + test_pairwise(Cityblock(), X, Y, T) + test_pairwise(Chebyshev(), X, Y, T) + test_pairwise(Minkowski(2.5), X, Y, T) + test_pairwise(Hamming(), A, B, T) + + test_pairwise(CosineDist(), X, Y, T) + test_pairwise(CorrDist(), X, Y, T) + + test_pairwise(ChiSqDist(), X, Y, T) + test_pairwise(KLDivergence(), P, Q, T) + test_pairwise(RenyiDivergence(0.0), P, Q, T) + test_pairwise(RenyiDivergence(1.0), P, Q, T) + test_pairwise(RenyiDivergence(Inf), P, Q, T) + test_pairwise(RenyiDivergence(0.5), P, Q, T) + test_pairwise(RenyiDivergence(2), P, Q, T) + test_pairwise(JSDivergence(), P, Q, T) + + test_pairwise(BhattacharyyaDist(), X, Y, T) + test_pairwise(HellingerDist(), X, Y, T) + + w = rand(m) + + test_pairwise(WeightedSqEuclidean(w), X, Y, T) + test_pairwise(WeightedEuclidean(w), X, Y, T) + test_pairwise(WeightedCityblock(w), X, Y, T) + test_pairwise(WeightedMinkowski(w, 2.5), X, Y, T) + test_pairwise(WeightedHamming(w), A, B, T) + + Q = rand(m, m) + Q = Q * Q' # make sure Q is positive-definite + + test_pairwise(SqMahalanobis(Q), X, Y, T) + test_pairwise(Mahalanobis(Q), X, Y, T) + end end #testset @testset "Euclidean precision" begin X = [0.1 0.2; 0.3 0.4; -0.1 -0.1] pd = pairwise(Euclidean(1e-12), X, X) - @test pd[1,1] == 0 - @test pd[2,2] == 0 + @test pd[1, 1] == 0 + @test pd[2, 2] == 0 pd = pairwise(Euclidean(1e-12), X) - @test pd[1,1] == 0 - @test pd[2,2] == 0 + @test pd[1, 1] == 0 + @test pd[2, 2] == 0 pd = pairwise(SqEuclidean(1e-12), X, X) - @test pd[1,1] == 0 - @test pd[2,2] == 0 + @test pd[1, 1] == 0 + @test pd[2, 2] == 0 pd = pairwise(SqEuclidean(1e-12), X) - @test pd[1,1] == 0 - @test pd[2,2] == 0 + @test pd[1, 1] == 0 + @test pd[2, 2] == 0 end