Skip to content

Commit

Permalink
Merge branch 'master' into patch-1
Browse files Browse the repository at this point in the history
  • Loading branch information
aplavin authored May 16, 2024
2 parents dafd32b + 4e25f43 commit 9913c61
Show file tree
Hide file tree
Showing 11 changed files with 123 additions and 87 deletions.
17 changes: 16 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,24 @@ jobs:
- uses: julia-actions/julia-buildpkg@v1
- uses: julia-actions/julia-runtest@v1
- uses: julia-actions/julia-processcoverage@v1
- uses: codecov/codecov-action@v3
- uses: codecov/codecov-action@v4
with:
file: lcov.info
# This is a public repo.
# We are using v4 of the codecov-action.
# If the PR is from a fork, then Codecov allows us to use
# tokenless Codecov uploads.
# If the PR is not from a fork, then Codecov does not allow
# us to use tokenless Codecov uploads, and thus we must use
# the token.
# Tokenless Codcov uploads are prone to random failures,
# due to Codecov's servers hitting GitHub rate limits.
# Therefore:
# 1. If this CI run is not a PR run, we set `fail_ci_if_error` to `true`.
# 2. If this CI run is a PR run and the PR is not from a fork, we set `fail_ci_if_error` to `true`.
# 3. If this CI run is a PR run and the PR is from a fork, we set `fail_ci_if_error` to `false`.
fail_ci_if_error: ${{ (github.event_name != 'pull_request') || (github.repository == github.event.pull_request.head.repo.full_name) }}
token: ${{ secrets.CODECOV_TOKEN }}
docs:
permissions:
contents: write
Expand Down
9 changes: 8 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
name = "StatsBase"
uuid = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
authors = ["JuliaStats"]
version = "0.34.2"
version = "0.34.3"

[deps]
AliasTables = "66dad0bd-aa9a-41b7-9441-69ab47430ed8"
DataAPI = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a"
DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Expand All @@ -17,11 +18,17 @@ Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
StatsAPI = "82ae8749-77ed-4fe6-ae5f-f523153014b0"

[compat]
AliasTables = "1"
DataAPI = "1"
DataStructures = "0.10, 0.11, 0.12, 0.13, 0.14, 0.17, 0.18"
LinearAlgebra = "<0.0.1, 1"
LogExpFunctions = "0.3"
Missings = "0.3, 0.4, 1.0"
Printf = "<0.0.1, 1"
Random = "<0.0.1, 1"
SortingAlgorithms = "0.3, 1.0"
SparseArrays = "<0.0.1, 1"
Statistics = "1"
StatsAPI = "1.2"
julia = "1"

Expand Down
62 changes: 62 additions & 0 deletions src/deprecates.jl
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,65 @@ end
@deprecate stdm(x::AbstractArray{<:Real}, w::AbstractWeights, m::AbstractArray{<:Real}, dim::Int; corrected::Union{Bool, Nothing}=nothing) std(x, w, dim, mean=m, corrected=corrected) false
@deprecate varm(x::AbstractArray{<:Real}, w::AbstractWeights, m::AbstractArray{<:Real}, dim::Int; corrected::Union{Bool, Nothing}=nothing) var(x, w, dim, mean=m, corrected=corrected) false
@deprecate varm!(R::AbstractArray, x::AbstractArray{<:Real}, w::AbstractWeights, m::AbstractArray{<:Real}, dim::Int; corrected::Union{Bool, Nothing}=nothing) var!(R, x, w, dim, mean=m, corrected=corrected) false

### This was never part of the public API
### Deprecated April 2024
function make_alias_table!(w::AbstractVector, wsum,
a::AbstractVector{Float64},
alias::AbstractVector{Int})
Base.depwarn("make_alias_table! is both internal and deprecated, use AliasTables.jl instead", :make_alias_table!)
# Arguments:
#
# w [in]: input weights
# wsum [in]: pre-computed sum(w)
#
# a [out]: acceptance probabilities
# alias [out]: alias table
#
# Note: a and w can be the same array, then that array will be
# overwritten inplace by acceptance probabilities
#
# Returns nothing
#

n = length(w)
length(a) == length(alias) == n ||
throw(DimensionMismatch("Inconsistent array lengths."))

ac = n / wsum
for i = 1:n
@inbounds a[i] = w[i] * ac
end

larges = Vector{Int}(undef, n)
smalls = Vector{Int}(undef, n)
kl = 0 # actual number of larges
ks = 0 # actual number of smalls

for i = 1:n
@inbounds ai = a[i]
if ai > 1.0
larges[kl+=1] = i # push to larges
elseif ai < 1.0
smalls[ks+=1] = i # push to smalls
end
end

while kl > 0 && ks > 0
s = smalls[ks]; ks -= 1 # pop from smalls
l = larges[kl]; kl -= 1 # pop from larges
@inbounds alias[s] = l
@inbounds al = a[l] = (a[l] - 1.0) + a[s]
if al > 1.0
larges[kl+=1] = l # push to larges
else
smalls[ks+=1] = l # push to smalls
end
end

# this loop should be redundant, except for rounding
for i = 1:ks
@inbounds a[smalls[i]] = 1.0
end
nothing
end
4 changes: 3 additions & 1 deletion src/misc.jl
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,9 @@ run lengths.
"""
function inverse_rle(vals::AbstractVector{T}, lens::AbstractVector{<:Integer}) where T
m = length(vals)
length(lens) == m || raise_dimerror()
mlens = length(lens)
mlens == m || throw(DimensionMismatch(
"number of vals ($m) does not match the number of lens ($mlens)"))
n = sum(lens)
n >= 0 || throw(ArgumentError("lengths must be non-negative"))

Expand Down
5 changes: 4 additions & 1 deletion src/ranking.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,10 @@

function _check_randparams(rks, x, p)
n = length(rks)
length(x) == length(p) == n || raise_dimerror()
nx = length(x)
np = length(p)
nx == np == n || throw(
DimensionMismatch("lengths of x $nx and p $np do not match that of ranks $n"))
return n
end

Expand Down
85 changes: 10 additions & 75 deletions src/sampling.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#
###########################################################

using AliasTables
using Random: Sampler

if VERSION < v"1.3.0-DEV.565"
Expand Down Expand Up @@ -637,65 +638,6 @@ end
direct_sample!(a::AbstractArray, wv::AbstractWeights, x::AbstractArray) =
direct_sample!(default_rng(), a, wv, x)

function make_alias_table!(w::AbstractVector, wsum,
a::AbstractVector{Float64},
alias::AbstractVector{Int})
# Arguments:
#
# w [in]: input weights
# wsum [in]: pre-computed sum(w)
#
# a [out]: acceptance probabilities
# alias [out]: alias table
#
# Note: a and w can be the same array, then that array will be
# overwritten inplace by acceptance probabilities
#
# Returns nothing
#

n = length(w)
length(a) == length(alias) == n ||
throw(DimensionMismatch("Inconsistent array lengths."))

ac = n / wsum
for i = 1:n
@inbounds a[i] = w[i] * ac
end

larges = Vector{Int}(undef, n)
smalls = Vector{Int}(undef, n)
kl = 0 # actual number of larges
ks = 0 # actual number of smalls

for i = 1:n
@inbounds ai = a[i]
if ai > 1.0
larges[kl+=1] = i # push to larges
elseif ai < 1.0
smalls[ks+=1] = i # push to smalls
end
end

while kl > 0 && ks > 0
s = smalls[ks]; ks -= 1 # pop from smalls
l = larges[kl]; kl -= 1 # pop from larges
@inbounds alias[s] = l
@inbounds al = a[l] = (a[l] - 1.0) + a[s]
if al > 1.0
larges[kl+=1] = l # push to larges
else
smalls[ks+=1] = l # push to smalls
end
end

# this loop should be redundant, except for rounding
for i = 1:ks
@inbounds a[smalls[i]] = 1.0
end
nothing
end

"""
alias_sample!([rng], a::AbstractArray, wv::AbstractWeights, x::AbstractArray)
Expand All @@ -706,31 +648,24 @@ Build an alias table, and sample therefrom.
Reference: Walker, A. J. "An Efficient Method for Generating Discrete Random Variables
with General Distributions." *ACM Transactions on Mathematical Software* 3 (3): 253, 1977.
Noting `k=length(x)` and `n=length(a)`, this algorithm takes ``O(n \\log n)`` time
for building the alias table, and then ``O(1)`` to draw each sample. It consumes ``2 k`` random numbers.
Noting `k=length(x)` and `n=length(a)`, this algorithm takes ``O(n)`` time
for building the alias table, and then ``O(1)`` to draw each sample. It consumes ``k`` random numbers.
"""
function alias_sample!(rng::AbstractRNG, a::AbstractArray, wv::AbstractWeights, x::AbstractArray)
Base.mightalias(a, x) &&
throw(ArgumentError("output array x must not share memory with input array a"))
Base.mightalias(x, wv) &&
throw(ArgumentError("output array x must not share memory with weights array wv"))
1 == firstindex(a) == firstindex(wv) == firstindex(x) ||
1 == firstindex(a) == firstindex(wv) ||
throw(ArgumentError("non 1-based arrays are not supported"))
wsum = sum(wv)
isfinite(wsum) || throw(ArgumentError("only finite weights are supported"))
n = length(a)
length(wv) == n || throw(DimensionMismatch("Inconsistent lengths."))
isfinite(sum(wv)) || throw(ArgumentError("only finite weights are supported"))
length(wv) == length(a) || throw(DimensionMismatch("Inconsistent lengths."))

# create alias table
ap = Vector{Float64}(undef, n)
alias = Vector{Int}(undef, n)
make_alias_table!(wv, wsum, ap, alias)
at = AliasTable(wv)

# sampling
s = Sampler(rng, 1:n)
for i = 1:length(x)
j = rand(rng, s)
x[i] = rand(rng) < ap[j] ? a[j] : a[alias[j]]
for i in eachindex(x)
j = rand(rng, at)
x[i] = a[j]
end
return x
end
Expand Down
10 changes: 8 additions & 2 deletions src/scalarstats.jl
Original file line number Diff line number Diff line change
Expand Up @@ -433,7 +433,7 @@ Return the standard error of the mean for a collection `x`.
A pre-computed `mean` may be provided.
When not using weights, this is the (sample) standard deviation
divided by the sample size. If weights are used, the
divided by the square root of the sample size. If weights are used, the
variance of the sample mean is calculated as follows:
* `AnalyticWeights`: Not implemented.
Expand Down Expand Up @@ -742,7 +742,13 @@ function entropy(p)
return -sum(xlogx, p)
end

entropy(p, b::Real) = entropy(p) / log(b)
function entropy(p, b::Real)
e = entropy(p)
# Promote explicitly before applying `log` to avoid undesired promotions
# with `log(b)::Float64` arising from `b::Int` (ref: #924)
_b = first(promote(b, e))
return e / log(_b)
end

"""
renyientropy(p, α)
Expand Down
1 change: 1 addition & 0 deletions test/misc.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ z = [1, 1, 2, 2, 2, 3, 1, 2, 2, 3, 3, 3, 3]
@test lens == [2, 3, 1, 1, 2, 4]
@test inverse_rle(vals, lens) == z
@test_throws ArgumentError inverse_rle(vals, fill(-1, length(lens)))
@test_throws DimensionMismatch inverse_rle(vals, [1])

z = [true, true, false, false, true, false, true, true, true]
vals, lens = rle(z)
Expand Down
3 changes: 3 additions & 0 deletions test/ranking.jl
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,6 @@ s = ["c", "a", "b", "d", "d", "b", "e", "d"] # s is a vector of strings ordered
@test tiedrank(s) == tiedrank(x)
@test tiedrank(x, rev = true) == tiedrank(-x)
@test tiedrank(x, lt = (x, y) -> isless(y, x)) == tiedrank(-x)


@test_throws DimensionMismatch StatsBase._check_randparams([1,2], [1,2], [1])
11 changes: 5 additions & 6 deletions test/scalarstats.jl
Original file line number Diff line number Diff line change
Expand Up @@ -222,12 +222,7 @@ x = sort!(vcat([5:-1:i for i in 1:5]...))
@test mad(Iterators.repeated(4, 10)) == 0
@test mad(Integer[1,2,3,4]) === mad(1:4)
let itr = (i for i in 1:10000)
if VERSION >= v"1.10.0-"
# FIXME: Allocations are closer to 10x this on 1.10
@test_broken (@benchmark mad($itr)).allocs < 200
else
@test (@benchmark mad($itr)).allocs < 200
end
@test (@benchmark mad($itr)).allocs < 200
end

# Issue 197
Expand Down Expand Up @@ -265,6 +260,10 @@ it = (xᵢ for xᵢ in x)
@test @inferred(entropy([1//2, 1//2], 2)) 1.0
@test @inferred(entropy([0.2, 0.3, 0.5], 2)) 1.4854752972273344

# issue #924
@test @inferred(entropy([0.5f0, 0.5f0], 2)) isa Float32
@test @inferred(entropy([0.5f0, 0.5f0], MathConstants.e)) isa Float32

@test_throws ArgumentError @inferred(entropy(Float64[]))
@test_throws ArgumentError @inferred(entropy(Int[]))

Expand Down
3 changes: 3 additions & 0 deletions test/wsampling.jl
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,9 @@ for wv in (
check_wsample_wrep(a, (4, 7), wv, 5.0e-3; ordered=false)
end

@test_throws ArgumentError alias_sample!(rand(10), weights(fill(0, 10)), rand(10))
@test_throws ArgumentError alias_sample!(rand(100), weights(randn(100)), rand(10))

for rev in (true, false), T in (Int, Int16, Float64, Float16, BigInt, ComplexF64, Rational{Int})
r = rev ? reverse(4:7) : (4:7)
r = T===Int ? r : T.(r)
Expand Down

0 comments on commit 9913c61

Please sign in to comment.