From 14bf6224f7c92675b139fbfa5bbd9c93d98d0e8a Mon Sep 17 00:00:00 2001 From: Asaf Manela Date: Thu, 17 Aug 2023 23:16:22 +0300 Subject: [PATCH] Upgrade to StatsBase v0.34 (#59) * import numerical array types from NumericalTypeAliases and not StatsBase * add CompatHelper action * replaced uses of type aliases with their definitions in the same manner as JuliaStats/StatsBase.jl#840 * Include both StatsBase 0.33 and 0.34 in compat Co-authored-by: Alex Arslan --------- Co-authored-by: Alex Arslan --- .github/workflows/CompatHelper.yml | 45 ++++++++++++++++++++ Project.toml | 4 +- src/MLBase.jl | 1 - src/classification.jl | 38 ++++++++--------- src/perfeval.jl | 66 +++++++++++++++--------------- src/utils.jl | 6 +-- 6 files changed, 102 insertions(+), 58 deletions(-) create mode 100644 .github/workflows/CompatHelper.yml diff --git a/.github/workflows/CompatHelper.yml b/.github/workflows/CompatHelper.yml new file mode 100644 index 0000000..0918161 --- /dev/null +++ b/.github/workflows/CompatHelper.yml @@ -0,0 +1,45 @@ +name: CompatHelper +on: + schedule: + - cron: 0 0 * * * + workflow_dispatch: +permissions: + contents: write + pull-requests: write +jobs: + CompatHelper: + runs-on: ubuntu-latest + steps: + - name: Check if Julia is already available in the PATH + id: julia_in_path + run: which julia + continue-on-error: true + - name: Install Julia, but only if it is not already available in the PATH + uses: julia-actions/setup-julia@v1 + with: + version: '1' + arch: ${{ runner.arch }} + if: steps.julia_in_path.outcome != 'success' + - name: "Add the General registry via Git" + run: | + import Pkg + ENV["JULIA_PKG_SERVER"] = "" + Pkg.Registry.add("General") + shell: julia --color=yes {0} + - name: "Install CompatHelper" + run: | + import Pkg + name = "CompatHelper" + uuid = "aa819f21-2bde-4658-8897-bab36330d9b7" + version = "3" + Pkg.add(; name, uuid, version) + shell: julia --color=yes {0} + - name: "Run CompatHelper" + run: | + import CompatHelper + CompatHelper.main() + shell: julia --color=yes {0} + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + COMPATHELPER_PRIV: ${{ secrets.DOCUMENTER_KEY }} + # COMPATHELPER_PRIV: ${{ secrets.COMPATHELPER_PRIV }} diff --git a/Project.toml b/Project.toml index 281a34e..1b2fcab 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "MLBase" uuid = "f0e99cf1-93fa-52ec-9ecc-5026115318e0" -version = "0.9.1" +version = "0.9.2" [deps] IterTools = "c8e1da08-722c-5040-9ed9-7db0dc04731e" @@ -11,7 +11,7 @@ StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" [compat] IterTools = "1" Reexport = "1" -StatsBase = "0.33" +StatsBase = "0.33, 0.34" julia = "1" [extras] diff --git a/src/MLBase.jl b/src/MLBase.jl index 6cebcd1..abcc443 100644 --- a/src/MLBase.jl +++ b/src/MLBase.jl @@ -8,7 +8,6 @@ module MLBase import Base: length, show, keys, precision, length, getindex import Base: iterate import Base.Order: lt, Ordering, ForwardOrdering, ReverseOrdering, Forward, Reverse - import StatsBase: RealVector, IntegerVector, RealMatrix, IntegerMatrix, RealArray import IterTools: product export diff --git a/src/classification.jl b/src/classification.jl index 7025a44..ea64631 100644 --- a/src/classification.jl +++ b/src/classification.jl @@ -4,7 +4,7 @@ # classify -function classify(x::RealVector, ord::Ordering) +function classify(x::AbstractVector{<:Real}, ord::Ordering) n = length(x) v = x[1] k::Int = 1 @@ -18,9 +18,9 @@ function classify(x::RealVector, ord::Ordering) return k end -classify(x::RealVector) = classify(x, Forward) +classify(x::AbstractVector{<:Real}) = classify(x, Forward) -function classify!(r::IntegerVector, x::RealMatrix, ord::Ordering) +function classify!(r::AbstractVector{<:Integer}, x::AbstractMatrix{<:Real}, ord::Ordering) m = size(x, 1) n = size(x, 2) length(r) == n || throw(DimensionMismatch("Mismatched length of r.")) @@ -30,15 +30,15 @@ function classify!(r::IntegerVector, x::RealMatrix, ord::Ordering) return r end -classify!(r::IntegerVector, x::RealMatrix) = classify!(r, x, Forward) +classify!(r::AbstractVector{<:Integer}, x::AbstractMatrix{<:Real}) = classify!(r, x, Forward) # - this one throws a deprecation -classify(x::RealMatrix, ord::Ordering) = classify!(Array{Int}(undef, size(x,2)), x, ord) -classify(x::RealMatrix) = classify(x, Forward) +classify(x::AbstractMatrix{<:Real}, ord::Ordering) = classify!(Array{Int}(undef, size(x,2)), x, ord) +classify(x::AbstractMatrix{<:Real}) = classify(x, Forward) # classify with score(s) -function classify_withscore(x::RealVector, ord::Ordering) +function classify_withscore(x::AbstractVector{<:Real}, ord::Ordering) n = length(x) v = x[1] k::Int = 1 @@ -52,9 +52,9 @@ function classify_withscore(x::RealVector, ord::Ordering) return (k, v) end -classify_withscore(x::RealVector) = classify_withscore(x, Forward) +classify_withscore(x::AbstractVector{<:Real}) = classify_withscore(x, Forward) -function classify_withscores!(r::IntegerVector, s::RealVector, x::RealMatrix, ord::Ordering) +function classify_withscores!(r::AbstractVector{<:Integer}, s::AbstractVector{<:Real}, x::AbstractMatrix{<:Real}, ord::Ordering) m = size(x, 1) n = size(x, 2) length(r) == n || throw(DimensionMismatch("Mismatched length of r.")) @@ -66,27 +66,27 @@ function classify_withscores!(r::IntegerVector, s::RealVector, x::RealMatrix, or return (r, s) end -classify_withscores!(r::IntegerVector, s::RealVector, x::RealMatrix) = +classify_withscores!(r::AbstractVector{<:Integer}, s::AbstractVector{<:Real}, x::AbstractMatrix{<:Real}) = classify_withscores!(r, s, x, Forward) -function classify_withscores(x::RealMatrix{T}, ord::Ordering) where T<:Real +function classify_withscores(x::AbstractMatrix{<:Real}{T}, ord::Ordering) where T<:Real n = size(x, 2) r = Array{Int}(undef, n) s = Array{T}(undef, n) return classify_withscores!(r, s, x, ord) end -classify_withscores(x::RealMatrix{T}) where {T<:Real} = classify_withscores(x, Forward) +classify_withscores(x::AbstractMatrix{<:Real}{T}) where {T<:Real} = classify_withscores(x, Forward) # classify with threshold -classify(x::RealVector, t::Real, ord::Ordering) = +classify(x::AbstractVector{<:Real}, t::Real, ord::Ordering) = ((k, v) = classify_withscore(x, ord); ifelse(lt(ord, v, t), 0, k)) -classify(x::RealVector, t::Real) = classify(x, t, Forward) +classify(x::AbstractVector{<:Real}, t::Real) = classify(x, t, Forward) -function classify!(r::IntegerVector, x::RealMatrix, t::Real, ord::Ordering) +function classify!(r::AbstractVector{<:Integer}, x::AbstractMatrix{<:Real}, t::Real, ord::Ordering) m = size(x, 1) n = size(x, 2) length(r) == n || throw(DimensionMismatch("Mismatched length of r.")) @@ -96,10 +96,10 @@ function classify!(r::IntegerVector, x::RealMatrix, t::Real, ord::Ordering) return r end -classify!(r::IntegerVector, x::RealMatrix, t::Real) = classify!(r, x, t, Forward) +classify!(r::AbstractVector{<:Integer}, x::AbstractMatrix{<:Real}, t::Real) = classify!(r, x, t, Forward) -classify(x::RealMatrix, t::Real, ord::Ordering) = classify!(Array{Int}(undef, size(x,2)), x, t, ord) -classify(x::RealMatrix, t::Real) = classify(x, t, Forward) +classify(x::AbstractMatrix{<:Real}, t::Real, ord::Ordering) = classify!(Array{Int}(undef, size(x,2)), x, t, ord) +classify(x::AbstractMatrix{<:Real}, t::Real) = classify(x, t, Forward) ## label map @@ -154,7 +154,7 @@ labeldecode(lmap::LabelMap{T}, ys::AbstractArray{Int}) where {T} = ## group labels -function groupindices(k::Int, xs::IntegerVector; warning::Bool=true) +function groupindices(k::Int, xs::AbstractVector{<:Integer}; warning::Bool=true) gs = Array{Vector{Int}}(undef, k) for i = 1:k gs[i] = Int[] diff --git a/src/perfeval.jl b/src/perfeval.jl index 4f521e2..c045a76 100644 --- a/src/perfeval.jl +++ b/src/perfeval.jl @@ -2,12 +2,12 @@ ## correctrate & errorrate -correctrate(gt::IntegerVector, r::IntegerVector) = counteq(gt, r) / length(gt) -errorrate(gt::IntegerVector, r::IntegerVector) = countne(gt, r) / length(gt) +correctrate(gt::AbstractVector{<:Integer}, r::AbstractVector{<:Integer}) = counteq(gt, r) / length(gt) +errorrate(gt::AbstractVector{<:Integer}, r::AbstractVector{<:Integer}) = countne(gt, r) / length(gt) ## confusion matrix -function confusmat(k::Integer, gts::IntegerVector, preds::IntegerVector) +function confusmat(k::Integer, gts::AbstractVector{<:Integer}, preds::AbstractVector{<:Integer}) n = length(gts) length(preds) == n || throw(DimensionMismatch("Inconsistent lengths.")) R = zeros(Int, k, k) @@ -21,7 +21,7 @@ end ## counthits & hitrate -function counthits(gt::IntegerVector, rklst::IntegerMatrix, k::Integer) +function counthits(gt::AbstractVector{<:Integer}, rklst::AbstractMatrix{<:Integer}, k::Integer) n = length(gt) size(rklst, 2) == n || throw(DimensionMismatch("Input dimensions mismatch.")) m = min(size(rklst, 1), Int(k)) @@ -40,7 +40,7 @@ function counthits(gt::IntegerVector, rklst::IntegerMatrix, k::Integer) return cnt::Int end -function counthits(gt::IntegerVector, rklst::IntegerMatrix, ks::IntegerVector) +function counthits(gt::AbstractVector{<:Integer}, rklst::AbstractMatrix{<:Integer}, ks::AbstractVector{<:Integer}) n = length(gt) size(rklst, 2) == n || throw(DimensionMismatch("Input dimensions mismatch.")) issorted(ks) || throw(DimensionMismatch("ks must be sorted.")) @@ -67,10 +67,10 @@ function counthits(gt::IntegerVector, rklst::IntegerMatrix, ks::IntegerVector) end -hitrate(gt::IntegerVector, rklst::IntegerMatrix, k::Integer) = +hitrate(gt::AbstractVector{<:Integer}, rklst::AbstractMatrix{<:Integer}, k::Integer) = (counthits(gt, rklst, k) / length(gt))::Float64 -function hitrates(gt::IntegerVector, rklst::IntegerMatrix, ks::IntegerVector) +function hitrates(gt::AbstractVector{<:Integer}, rklst::AbstractMatrix{<:Integer}, ks::AbstractVector{<:Integer}) n = length(gt) h = counthits(gt, rklst, ks) nk = length(ks) @@ -124,7 +124,7 @@ f1score(x::ROCNums) = (tp2 = x.tp + x.tp; tp2 / (tp2 + x.fp + x.fn) ) _ispos(x::Bool) = x _ispos(x::Real) = x > zero(x) -function _roc(gt::IntegerVector, pr) +function _roc(gt::AbstractVector{<:Integer}, pr) len = length(gt) length(pr) == len || throw(DimensionMismatch("Inconsistent lengths.")) @@ -159,14 +159,14 @@ function _roc(gt::IntegerVector, pr) end # compute roc numbers based on prediction -roc(gt::IntegerVector, pr::IntegerVector) = _roc(gt, pr) +roc(gt::AbstractVector{<:Integer}, pr::AbstractVector{<:Integer}) = _roc(gt, pr) ## # BinaryThresPredVec immutates a vector: # # v[i] := scores[i] < thres ? 0 : 1 # -struct BinaryThresPredVec{ScoreVec <: RealVector, +struct BinaryThresPredVec{ScoreVec <: AbstractVector{<:Real}, T <: Real, Ord <: Ordering} scores::ScoreVec @@ -178,10 +178,10 @@ length(v::BinaryThresPredVec) = length(v.scores) getindex(v::BinaryThresPredVec, i::Integer) = !lt(v.ord, v.scores[i], v.thres) # compute roc numbers based on scores & threshold -roc(gt::IntegerVector, scores::RealVector, t::Real, ord::Ordering) = +roc(gt::AbstractVector{<:Integer}, scores::AbstractVector{<:Real}, t::Real, ord::Ordering) = _roc(gt, BinaryThresPredVec(scores, t, ord)) -roc(gt::IntegerVector, scores::RealVector, thres::Real) = +roc(gt::AbstractVector{<:Integer}, scores::AbstractVector{<:Real}, thres::Real) = roc(gt, scores, thres, Forward) ## @@ -189,8 +189,8 @@ roc(gt::IntegerVector, scores::RealVector, thres::Real) = # # v[i] := scores[i] < thres ? 0 : preds[i] # -struct ThresPredVec{PredVec <: IntegerVector, - ScoreVec <: RealVector, +struct ThresPredVec{PredVec <: AbstractVector{<:Integer}, + ScoreVec <: AbstractVector{<:Real}, T <: Real, Ord <: Ordering} @@ -201,7 +201,7 @@ struct ThresPredVec{PredVec <: IntegerVector, end function ThresPredVec( - preds::PVec, scores::SVec, thres::T, ord::Ord) where {PVec<:IntegerVector,SVec<:RealVector,T<:Real,Ord<:Ordering} + preds::PVec, scores::SVec, thres::T, ord::Ord) where {PVec<:AbstractVector{<:Integer},SVec<:AbstractVector{<:Real},T<:Real,Ord<:Ordering} n = length(preds) length(scores) == n || throw(DimensionMismatch("Inconsistent lengths.")) ThresPredVec{PVec,SVec,T,Ord}(preds, scores, thres, ord) @@ -211,10 +211,10 @@ length(v::ThresPredVec) = length(v.preds) getindex(v::ThresPredVec, i::Integer) = ifelse(lt(v.ord, v.scores[i], v.thres), 0, v.preds[i]) # compute roc numbers based on predictions & scores & threshold -roc(gt::IntegerVector, preds::Tuple{PV,SV}, t::Real, ord::Ordering) where {PV<:IntegerVector,SV<:RealVector} = +roc(gt::AbstractVector{<:Integer}, preds::Tuple{PV,SV}, t::Real, ord::Ordering) where {PV<:AbstractVector{<:Integer},SV<:AbstractVector{<:Real}} = _roc(gt, ThresPredVec(preds..., t, ord)) -roc(gt::IntegerVector, preds::Tuple{PV,SV}, thres::Real) where {PV<:IntegerVector,SV<:RealVector} = +roc(gt::AbstractVector{<:Integer}, preds::Tuple{PV,SV}, thres::Real) where {PV<:AbstractVector{<:Integer},SV<:AbstractVector{<:Real}} = roc(gt, preds, thres, Forward) @@ -226,7 +226,7 @@ roc(gt::IntegerVector, preds::Tuple{PV,SV}, thres::Real) where {PV<:IntegerVecto # threshold[i] <= x < threshold[i+1] --> i+1 # x >= threshold[n] --> n+1 # -function find_thresbin(x::Real, thresholds::RealVector, ord::Ordering) +function find_thresbin(x::Real, thresholds::AbstractVector{<:Real}, ord::Ordering) n = length(thresholds) r = 1 if !lt(ord, x, thresholds[1]) @@ -244,16 +244,16 @@ function find_thresbin(x::Real, thresholds::RealVector, ord::Ordering) return r::Int end -find_thresbin(x::Real, thresholds::RealVector) = find_thresbin(x, thresholds, Forward) +find_thresbin(x::Real, thresholds::AbstractVector{<:Real}) = find_thresbin(x, thresholds, Forward) -lin_thresholds(scores::RealArray, n::Integer, ord::ForwardOrdering) = +lin_thresholds(scores::AbstractArray{<:Real}, n::Integer, ord::ForwardOrdering) = ((s0, s1) = extrema(scores); intv = (s1 - s0) / (n-1); s0:intv:s1) -lin_thresholds(scores::RealArray, n::Integer, ord::ReverseOrdering{ForwardOrdering}) = +lin_thresholds(scores::AbstractArray{<:Real}, n::Integer, ord::ReverseOrdering{ForwardOrdering}) = ((s0, s1) = extrema(scores); intv = (s0 - s1) / (n-1); s1:intv:s0) # roc for binary predictions -function roc(gt::IntegerVector, scores::RealVector, thresholds::RealVector, ord::Ordering) +function roc(gt::AbstractVector{<:Integer}, scores::AbstractVector{<:Real}, thresholds::AbstractVector{<:Real}, ord::Ordering) issorted(thresholds, ord) || error("thresholds must be sorted w.r.t. the given ordering.") ns = length(scores) @@ -291,19 +291,19 @@ function roc(gt::IntegerVector, scores::RealVector, thresholds::RealVector, ord: return r end -roc(gt::IntegerVector, scores::RealVector, thresholds::RealVector) = roc(gt, scores, thresholds, Forward) +roc(gt::AbstractVector{<:Integer}, scores::AbstractVector{<:Real}, thresholds::AbstractVector{<:Real}) = roc(gt, scores, thresholds, Forward) -roc(gt::IntegerVector, scores::RealVector, n::Integer, ord::Ordering) = +roc(gt::AbstractVector{<:Integer}, scores::AbstractVector{<:Real}, n::Integer, ord::Ordering) = roc(gt, scores, lin_thresholds(scores, n, ord), ord) -roc(gt::IntegerVector, scores::RealVector, n::Integer) = roc(gt, scores, n, Forward) +roc(gt::AbstractVector{<:Integer}, scores::AbstractVector{<:Real}, n::Integer) = roc(gt, scores, n, Forward) -roc(gt::IntegerVector, scores::RealVector, ord::Ordering) = roc(gt, scores, 100, ord) -roc(gt::IntegerVector, scores::RealVector) = roc(gt, scores, Forward) +roc(gt::AbstractVector{<:Integer}, scores::AbstractVector{<:Real}, ord::Ordering) = roc(gt, scores, 100, ord) +roc(gt::AbstractVector{<:Integer}, scores::AbstractVector{<:Real}) = roc(gt, scores, Forward) # roc for multi-way predictions function roc( - gt::IntegerVector, preds::Tuple{PV,SV}, thresholds::RealVector, ord::Ordering) where {PV<:IntegerVector,SV<:RealVector} + gt::AbstractVector{<:Integer}, preds::Tuple{PV,SV}, thresholds::AbstractVector{<:Real}, ord::Ordering) where {PV<:AbstractVector{<:Integer},SV<:AbstractVector{<:Real}} issorted(thresholds, ord) || error("thresholds must be sorted w.r.t. the given ordering.") pr::PV = preds[1] @@ -354,17 +354,17 @@ function roc( return r end -roc(gt::IntegerVector, preds::Tuple{PV,SV}, thresholds::RealVector) where {PV<:IntegerVector, SV<:RealVector} = +roc(gt::AbstractVector{<:Integer}, preds::Tuple{PV,SV}, thresholds::AbstractVector{<:Real}) where {PV<:AbstractVector{<:Integer}, SV<:AbstractVector{<:Real}} = roc(gt, preds, thresholds, Forward) -roc(gt::IntegerVector, preds::Tuple{PV,SV}, n::Integer, ord::Ordering) where {PV<:IntegerVector, SV<:RealVector} = +roc(gt::AbstractVector{<:Integer}, preds::Tuple{PV,SV}, n::Integer, ord::Ordering) where {PV<:AbstractVector{<:Integer}, SV<:AbstractVector{<:Real}} = roc(gt, preds, lin_thresholds(preds[2],n,ord), ord) -roc(gt::IntegerVector, preds::Tuple{PV,SV}, n::Integer) where {PV<:IntegerVector, SV<:RealVector} = +roc(gt::AbstractVector{<:Integer}, preds::Tuple{PV,SV}, n::Integer) where {PV<:AbstractVector{<:Integer}, SV<:AbstractVector{<:Real}} = roc(gt, preds, n, Forward) -roc(gt::IntegerVector, preds::Tuple{PV,SV}, ord::Ordering) where {PV<:IntegerVector, SV<:RealVector} = +roc(gt::AbstractVector{<:Integer}, preds::Tuple{PV,SV}, ord::Ordering) where {PV<:AbstractVector{<:Integer}, SV<:AbstractVector{<:Real}} = roc(gt, preds, 100, ord) -roc(gt::IntegerVector, preds::Tuple{PV,SV}) where {PV<:IntegerVector, SV<:RealVector} = +roc(gt::AbstractVector{<:Integer}, preds::Tuple{PV,SV}) where {PV<:AbstractVector{<:Integer}, SV<:AbstractVector{<:Real}} = roc(gt, preds, Forward) diff --git a/src/utils.jl b/src/utils.jl index a0f3e99..13b27f8 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -15,7 +15,7 @@ function repeach(x::AbstractVector{T}, n::Integer) where T return r end -function repeach(x::AbstractVector{T}, ns::IntegerVector) where T +function repeach(x::AbstractVector{T}, ns::AbstractVector{<:Integer}) where T k = length(x) length(ns) == k || throw(DimensionMismatch("length(ns) should be equal to k.")) r = Array{T}(undef, sum(ns)) @@ -46,7 +46,7 @@ function repeachcol(x::DenseArray{T,2}, n::Integer) where T return r end -function repeachcol(x::DenseArray{T,2}, ns::IntegerVector) where T +function repeachcol(x::DenseArray{T,2}, ns::AbstractVector{<:Integer}) where T m = size(x, 1) k = size(x, 2) r = zeros(T, m, sum(ns)) @@ -80,7 +80,7 @@ function repeachrow(x::DenseArray{T,2}, n::Integer) where T return r end -function repeachrow(x::DenseArray{T,2}, ns::IntegerVector) where T +function repeachrow(x::DenseArray{T,2}, ns::AbstractVector{<:Integer}) where T k = size(x, 1) m = size(x, 2) r = Array{T}(undef, sum(ns), m)