Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add ddof to PowerDivergenceTest and ChisqTest to allow changes to degrees of freedom #288

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
Open
45 changes: 25 additions & 20 deletions src/power_divergence.jl
Original file line number Diff line number Diff line change
Expand Up @@ -237,14 +237,17 @@ end
# Under regularity conditions, their asymptotic distributions are all the same (Drost 1989)
# Chi-squared null approximation works best for lambda near 2/3
"""
PowerDivergenceTest(x[, y]; lambda = 1.0, theta0 = ones(length(x))/length(x))
PowerDivergenceTest(x[, y]; lambda = 1.0, theta0 = ones(length(x))/length(x), ddof = 0)

Perform a Power Divergence test.

If `y` is not given and `x` is a matrix with one row or column, or `x` is a vector, then
a goodness-of-fit test is performed (`x` is treated as a one-dimensional contingency
table). In this case, the hypothesis tested is whether the population probabilities equal
those in `theta0`, or are all equal if `theta0` is not given.
those in `theta0`, or are all equal if `theta0` is not given. The `ddof` parameter is the
"delta degrees of freedom" adjustment to the number of degrees of freedom used for
calculation of p-values. The number of degrees of freedom is decreased by `ddof`.
sprague252 marked this conversation as resolved.
Show resolved Hide resolved


If `x` is a matrix with at least two rows and columns, it is taken as a two-dimensional
contingency table. Otherwise, `x` and `y` must be vectors of the same length. The contingency
Expand Down Expand Up @@ -284,7 +287,7 @@ Implements: [`pvalue`](@ref), [`confint(::PowerDivergenceTest)`](@ref)

* Agresti, Alan. Categorical Data Analysis, 3rd Edition. Wiley, 2013.
"""
function PowerDivergenceTest(x::AbstractMatrix{T}; lambda::U=1.0, theta0::Vector{U} = ones(length(x))/length(x)) where {T<:Integer,U<:AbstractFloat}
function PowerDivergenceTest(x::AbstractMatrix{T}; lambda::U=1.0, theta0::Vector{U} = ones(length(x))/length(x), ddof::Int64=0) where {T<:Integer,U<:AbstractFloat}
sprague252 marked this conversation as resolved.
Show resolved Hide resolved

nrows, ncols = size(x)
n = sum(x)
Expand All @@ -297,13 +300,13 @@ function PowerDivergenceTest(x::AbstractMatrix{T}; lambda::U=1.0, theta0::Vector
if nrows > 1 && ncols > 1
rowsums = sum(x, dims=2)
colsums = sum(x, dims=1)
df = (nrows - 1) * (ncols - 1)
df = (nrows - 1) * (ncols - 1) - ddof
thetahat = x ./ n
xhat = rowsums * colsums / n
theta0 = xhat / n
V = Float64[(colsums[j]/n) * (rowsums[i]/n) * (1 - rowsums[i]/n) * (n - colsums[j]) for i in 1:nrows, j in 1:ncols]
elseif nrows == 1 || ncols == 1
df = length(x) - 1
df = length(x) - 1 - ddof
xhat = reshape(n * theta0, size(x))
thetahat = x / n
V = reshape(n .* theta0 .* (1 .- theta0), size(x))
Expand Down Expand Up @@ -337,18 +340,18 @@ end
#convenience functions

#PDT
function PowerDivergenceTest(x::AbstractVector{T}, y::AbstractVector{T}, levels::Levels{T}; lambda::U=1.0) where {T<:Integer,U<:AbstractFloat}
function PowerDivergenceTest(x::AbstractVector{T}, y::AbstractVector{T}, levels::Levels{T}; lambda::U=1.0, ddof::Int64=0) where {T<:Integer,U<:AbstractFloat}
sprague252 marked this conversation as resolved.
Show resolved Hide resolved
d = counts(x, y, levels)
PowerDivergenceTest(d, lambda=lambda)
PowerDivergenceTest(d, lambda=lambda, ddof=ddof)
end

function PowerDivergenceTest(x::AbstractVector{T}, y::AbstractVector{T}, k::T; lambda::U=1.0) where {T<:Integer,U<:AbstractFloat}
function PowerDivergenceTest(x::AbstractVector{T}, y::AbstractVector{T}, k::T; lambda::U=1.0, ddof::Int64=0) where {T<:Integer,U<:AbstractFloat}
d = counts(x, y, k)
PowerDivergenceTest(d, lambda=lambda)
PowerDivergenceTest(d, lambda=lambda, ddof=ddof)
end

PowerDivergenceTest(x::AbstractVector{T}; lambda::U=1.0, theta0::Vector{U} = ones(length(x))/length(x)) where {T<:Integer,U<:AbstractFloat} =
PowerDivergenceTest(reshape(x, length(x), 1), lambda=lambda, theta0=theta0)
PowerDivergenceTest(x::AbstractVector{T}; lambda::U=1.0, theta0::Vector{U} = ones(length(x))/length(x), ddof::Int64=0) where {T<:Integer,U<:AbstractFloat} =
PowerDivergenceTest(reshape(x, length(x), 1), lambda=lambda, theta0=theta0, ddof=ddof)

#ChisqTest
"""
Expand All @@ -360,7 +363,9 @@ with ``λ = 1``).
If `y` is not given and `x` is a matrix with one row or column, or `x` is a vector, then
a goodness-of-fit test is performed (`x` is treated as a one-dimensional contingency
table). In this case, the hypothesis tested is whether the population probabilities equal
those in `theta0`, or are all equal if `theta0` is not given.
those in `theta0`, or are all equal if `theta0` is not given. The `ddof` parameter is the
"delta degrees of freedom" adjustment to the number of degrees of freedom used for
calculation of p-values. The number of degrees of freedom is decreased by `ddof`.

If `x` is a matrix with at least two rows and columns, it is taken as a two-dimensional
contingency table. Otherwise, `x` and `y` must be vectors of the same length. The contingency
Expand All @@ -373,22 +378,22 @@ Note that the entries of `x` (and `y` if provided) must be non-negative integers

Implements: [`pvalue`](@ref), [`confint`](@ref)
"""
function ChisqTest(x::AbstractMatrix{T}) where T<:Integer
PowerDivergenceTest(x, lambda=1.0)
function ChisqTest(x::AbstractMatrix{T}; ddof::Int64=0) where T<:Integer
PowerDivergenceTest(x, lambda=1.0, ddof=ddof)
end

function ChisqTest(x::AbstractVector{T}, y::AbstractVector{T}, levels::Levels{T}) where T<:Integer
function ChisqTest(x::AbstractVector{T}, y::AbstractVector{T}, levels::Levels{T}; ddof::Int64=0) where T<:Integer
d = counts(x, y, levels)
PowerDivergenceTest(d, lambda=1.0)
PowerDivergenceTest(d, lambda=1.0, ddof=ddof)
end

function ChisqTest(x::AbstractVector{T}, y::AbstractVector{T}, k::T) where T<:Integer
function ChisqTest(x::AbstractVector{T}, y::AbstractVector{T}, k::T; ddof::Int64=0) where T<:Integer
d = counts(x, y, k)
PowerDivergenceTest(d, lambda=1.0)
PowerDivergenceTest(d, lambda=1.0, ddof=ddof)
end

ChisqTest(x::AbstractVector{T}, theta0::Vector{U} = ones(length(x))/length(x)) where {T<:Integer,U<:AbstractFloat} =
PowerDivergenceTest(reshape(x, length(x), 1), lambda=1.0, theta0=theta0)
ChisqTest(x::AbstractVector{T}, theta0::Vector{U} = ones(length(x))/length(x); ddof::Int64=0) where {T<:Integer,U<:AbstractFloat} =
PowerDivergenceTest(reshape(x, length(x), 1), lambda=1.0, theta0=theta0, ddof=ddof)

#MultinomialLRTest
"""
Expand Down
6 changes: 6 additions & 0 deletions test/power_divergence.jl
Original file line number Diff line number Diff line change
Expand Up @@ -198,4 +198,10 @@ MultinomialLRTest(x,y,(1:3,1:3))
d = [113997 1031298
334453 37471]
PowerDivergenceTest(d)

# Test ddof for the ChisqTest
sprague252 marked this conversation as resolved.
Show resolved Hide resolved
x = [8,10,16,6]
probs = [0.15865525393145702, 0.341344746068543, 0.34134474606854304, 0.15865525393145702]
m = ChisqTest(x, probs, ddof=2)
@test pvalue(m) ≈ 0.17603510054227095
end