JuliaStats · sprague252 · Mar 2, 2023 · Mar 2, 2023 · Mar 2, 2023 · Mar 13, 2023
diff --git a/src/power_divergence.jl b/src/power_divergence.jl
@@ -38,7 +38,8 @@ function testname(x::PowerDivergenceTest)
 end
 
 # parameter of interest: name, value under h0, point estimate
-population_param_of_interest(x::PowerDivergenceTest) = ("Multinomial Probabilities", x.theta0, x.thetahat)
+population_param_of_interest(x::PowerDivergenceTest) = ("Multinomial Probabilities",
+	x.theta0, x.thetahat)
 default_tail(test::PowerDivergenceTest) = :right
 
 pvalue(x::PowerDivergenceTest; tail=:right) = pvalue(Chisq(x.df),x.stat; tail=tail)
@@ -49,8 +50,8 @@ pvalue(x::PowerDivergenceTest; tail=:right) = pvalue(Chisq(x.df),x.stat; tail=ta
 Compute a confidence interval with coverage `level` for multinomial proportions using
 one of the following methods. Possible values for `method` are:
 
-  - `:auto` (default): If the minimum of the expected cell counts exceeds 100, Quesenberry-Hurst
-    intervals are used, otherwise Sison-Glaz.
+  - `:auto` (default): If the minimum of the expected cell counts exceeds 100,
+  	Quesenberry-Hurst intervals are used, otherwise Sison-Glaz.
   - `:sison_glaz`: Sison-Glaz intervals
   - `:bootstrap`: Bootstrap intervals
   - `:quesenberry_hurst`: Quesenberry-Hurst intervals
@@ -59,8 +60,9 @@ one of the following methods. Possible values for `method` are:
 # References
 
   * Agresti, Alan. Categorical Data Analysis, 3rd Edition. Wiley, 2013.
-  * Sison, C.P and Glaz, J. Simultaneous confidence intervals and sample size determination
-    for multinomial proportions. Journal of the American Statistical Association,
+  * Sison, C.P and Glaz, J. Simultaneous confidence intervals and sample size
+  	determination for multinomial proportions. Journal of the American Statistical
+  	Association,
     90:366-369, 1995.
   * Quesensberry, C.P. and Hurst, D.C. Large Sample Simultaneous Confidence Intervals for
     Multinational Proportions. Technometrics, 6:191-195, 1964.
@@ -102,28 +104,35 @@ end
 
 # Bootstrap
 function ci_bootstrap(x::PowerDivergenceTest,alpha::Float64, iters::Int64)
-    m = mapslices(x -> quantile(x, [alpha / 2, 1 - alpha / 2]), rand(Multinomial(x.n, convert(Vector{Float64}, x.thetahat)),iters) / x.n, dims=2)
-    Tuple{Float64,Float64}[(boundproportion(m[i,1]), boundproportion(m[i,2])) for i in 1:length(x.thetahat)]
+    m = mapslices(x -> quantile(x, [alpha / 2, 1 - alpha / 2]), rand(Multinomial(x.n,
+    	convert(Vector{Float64}, x.thetahat)),iters) / x.n, dims=2)
+    Tuple{Float64,Float64}[(boundproportion(m[i,1]), boundproportion(m[i,2])) for i in
+    	1:length(x.thetahat)]
 end
 
 # Quesenberry, Hurst (1964)
 function ci_quesenberry_hurst(x::PowerDivergenceTest, alpha::Float64; GC::Bool=true)
     m  = length(x.thetahat)
     cv = GC ? quantile(Chisq(1), 1 - alpha / m) : quantile(Chisq(m - 1), 1 - alpha)
 
-    u = (cv .+ 2 .* x.observed .+ sqrt.(cv .* (cv .+ 4 .* x.observed .* (x.n .- x.observed) ./ x.n))) ./ (2 .* (x.n .+ cv))
-    l = (cv .+ 2 .* x.observed .- sqrt.(cv .* (cv .+ 4 .* x.observed .* (x.n .- x.observed) ./ x.n))) ./ (2 .* (x.n .+ cv))
+    u = (cv .+ 2 .* x.observed .+ sqrt.(cv .* (cv .+ 4 .* x.observed .* (x.n .-
+    	x.observed) ./ x.n))) ./ (2 .* (x.n .+ cv))
+    l = (cv .+ 2 .* x.observed .- sqrt.(cv .* (cv .+ 4 .* x.observed .* (x.n .-
+    	x.observed) ./ x.n))) ./ (2 .* (x.n .+ cv))
     Tuple{Float64,Float64}[(boundproportion(l[j]), boundproportion(u[j])) for j in 1:m]
 end
 
 # asymptotic simultaneous confidence interval
 # Gold (1963)
-function ci_gold(x::PowerDivergenceTest, alpha::Float64; correct::Bool=true, GC::Bool=true)
+function ci_gold(x::PowerDivergenceTest, alpha::Float64; correct::Bool=true,
+	GC::Bool=true)
     m  = length(x.thetahat)
     cv = GC ? quantile(Chisq(1), 1 - alpha / 2m) : quantile(Chisq(m - 1), 1 - alpha / 2)
 
-    u = [x.thetahat[j] + sqrt.(cv * x.thetahat[j] * (1 - x.thetahat[j]) / x.n) + ifelse(correct, inv(2x.n), 0) for j in 1:m]
-    l = [x.thetahat[j] - sqrt.(cv * x.thetahat[j] * (1 - x.thetahat[j]) / x.n) - ifelse(correct, inv(2x.n), 0) for j in 1:m]
+    u = [x.thetahat[j] + sqrt.(cv * x.thetahat[j] * (1 - x.thetahat[j]) / x.n) +
+    	ifelse(correct, inv(2x.n), 0) for j in 1:m]
+    l = [x.thetahat[j] - sqrt.(cv * x.thetahat[j] * (1 - x.thetahat[j]) / x.n) -
+    	ifelse(correct, inv(2x.n), 0) for j in 1:m]
     Tuple{Float64,Float64}[ (boundproportion(l[j]), boundproportion(u[j])) for j in 1:m]
 end
 
@@ -172,7 +181,8 @@ function ci_sison_glaz(x::PowerDivergenceTest, alpha::Float64; skew_correct::Boo
             m1[i] = mu[1]
             m2[i] = mu[2] + mu[1] - mu[1]^2
             m3[i] = mu[3] + mu[2] * (3 - 3mu[1]) + (mu[1] - 3mu[1]^2 + 2mu[1]^3)
-            m4[i] = mu[4] + mu[3] * (6 - 4mu[1]) + mu[2] * (7 - 12mu[1] + 6mu[1]^2) + mu[1] - 4mu[1]^2 + 6mu[1]^3 - 3mu[1]^4
+            m4[i] = (mu[4] + mu[3] * (6 - 4mu[1]) + mu[2] * (7 - 12mu[1] + 6mu[1]^2) +
+            	mu[1] - 4mu[1]^2 + 6mu[1]^3 - 3mu[1]^4)
             m5[i] = den
         end
         for i in 1:k
@@ -183,7 +193,8 @@ function ci_sison_glaz(x::PowerDivergenceTest, alpha::Float64; skew_correct::Boo
         g1 = s3 / s2^(3/2)
         g2 = s4 / s2^2
 
-        poly = 1 + g1 * (z^3 - 3z) / 6 + g2 * (z^4 - 6z^2 + 3) / 24 + g1^2 * (z^6 - 15z^4 + 45z^2 - 15) / 72
+        poly = 1 + g1 * (z^3 - 3z) / 6 + g2 * (z^4 - 6z^2 + 3) / 24 + g1^2 * (z^6 - 15z^4
+        	+ 45z^2 - 15) / 72
         f = poly * exp(-z^2 / 2) / sqrt(2π)
         probx = prod(m5)
 
@@ -220,45 +231,55 @@ function ci_sison_glaz(x::PowerDivergenceTest, alpha::Float64; skew_correct::Boo
         out[i,5] = min(x.thetahat[i] + cn + onen, 1)
     end
     if skew_correct
-        return Tuple{Float64,Float64}[(boundproportion(out[i,2]), boundproportion(out[i,3])) for i in 1:k]
+        return Tuple{Float64,Float64}[(boundproportion(out[i,2]),
+        	boundproportion(out[i,3])) for i in 1:k]
     else
-        return Tuple{Float64,Float64}[(boundproportion(out[i,4]), boundproportion(out[i,5])) for i in 1:k]
+        return Tuple{Float64,Float64}[(boundproportion(out[i,4]),
+        	boundproportion(out[i,5])) for i in 1:k]
     end
 end
 
 # power divergence statistic for testing goodness of fit
 # Cressie and Read 1984; Read and Cressie 1988
 # lambda    =  1: Pearson's Chi-square statistic
 # lambda ->  0: Converges to Likelihood Ratio test stat
-# lambda -> -1: Converges to minimum discrimination information statistic (Gokhale, Kullback 1978)
+# lambda -> -1: Converges to minimum discrimination information statistic (Gokhale,
+# 	Kullback 1978)
 # lambda =  -2: Neyman Modified chi-squared statistic (Neyman 1949)
 # lambda = -.5: Freeman-Tukey statistic (Freeman, Tukey 1950)
 
-# Under regularity conditions, their asymptotic distributions are all the same (Drost 1989)
+# Under regularity conditions, their asymptotic distributions are all the same (Drost
+# 	1989)
 # Chi-squared null approximation works best for lambda near 2/3
 """
-    PowerDivergenceTest(x[, y]; lambda = 1.0, theta0 = ones(length(x))/length(x))
+    PowerDivergenceTest(x[, y]; lambda = 1.0, theta0 = ones(length(x))/length(x), ddof =
+    	0)
 
 Perform a Power Divergence test.
 
 If `y` is not given and `x` is a matrix with one row or column, or `x` is a vector, then
 a goodness-of-fit test is performed (`x` is treated as a one-dimensional contingency
 table). In this case, the hypothesis tested is whether the population probabilities equal
-those in `theta0`, or are all equal if `theta0` is not given.
+those in `theta0`, or are all equal if `theta0` is not given. 
+
 
 If `x` is a matrix with at least two rows and columns, it is taken as a two-dimensional
-contingency table. Otherwise, `x` and `y` must be vectors of the same length. The contingency
-table is calculated using the `counts` function from the `StatsBase` package. Then the power
-divergence test is conducted under the null hypothesis that the joint distribution of the
-cell counts in a 2-dimensional contingency table is the product of the row and column
-marginals.
+contingency table. Otherwise, `x` and `y` must be vectors of the same length. The
+contingency table is calculated using the `counts` function from the `StatsBase` package.
+Then the power divergence test is conducted under the null hypothesis that the joint
+distribution of the cell counts in a 2-dimensional contingency table is the product of the
+row and column marginals.
 
 Note that the entries of `x` (and `y` if provided) must be non-negative integers.
 
-Computed confidence intervals by default are Quesenberry-Hurst intervals
-if the minimum of the expected cell counts exceeds 100, and Sison-Glaz intervals otherwise.
-See the [`confint(::PowerDivergenceTest)`](@ref) documentation for a list of
-supported methods to compute confidence intervals.
+The `ddof` parameter is the "delta degrees of freedom" adjustment to the number of degrees
+of freedom used for calculation of p-values. The number of degrees of freedom is decreased
+by `ddof`.
+
+Computed confidence intervals by default are Quesenberry-Hurst intervals if the minimum of
+the expected cell counts exceeds 100, and Sison-Glaz intervals otherwise. See the
+[`confint(::PowerDivergenceTest)`](@ref) documentation for a list of supported methods to
+compute confidence intervals.
 
 The power divergence test is given by
 ```math
@@ -284,31 +305,36 @@ Implements: [`pvalue`](@ref), [`confint(::PowerDivergenceTest)`](@ref)
 
   * Agresti, Alan. Categorical Data Analysis, 3rd Edition. Wiley, 2013.
 """
-function PowerDivergenceTest(x::AbstractMatrix{T}; lambda::U=1.0, theta0::Vector{U} = ones(length(x))/length(x)) where {T<:Integer,U<:AbstractFloat}
+function PowerDivergenceTest(x::AbstractMatrix{T}; lambda::U=1.0, theta0::Vector{U} =
+	ones(length(x))/length(x), ddof::Integer=0) where {T<:Integer,U<:AbstractFloat}
 
     nrows, ncols = size(x)
     n = sum(x)
 
     #validate date
-    (any(x .< 0) || any(!isfinite, x)) && throw(ArgumentError("all entries must be nonnegative and finite"))
+    (any(x .< 0) || any(!isfinite, x)) && throw(ArgumentError("all entries must be
+    	nonnegative and finite"))
     n == 0 && throw(ArgumentError("at least one entry must be positive"))
-    (!isfinite(nrows) || !isfinite(ncols) || !isfinite(nrows*ncols)) && throw(ArgumentError("invalid number of rows or columns"))
+    (!isfinite(nrows) || !isfinite(ncols) || !isfinite(nrows*ncols)) &&
+    	throw(ArgumentError("invalid number of rows or columns"))
 
     if nrows > 1 && ncols > 1
         rowsums = sum(x, dims=2)
         colsums = sum(x, dims=1)
-        df = (nrows - 1) * (ncols - 1)
+        df = (nrows - 1) * (ncols - 1) - ddof
         thetahat = x ./ n
         xhat = rowsums * colsums / n
         theta0 = xhat / n
-        V = Float64[(colsums[j]/n) * (rowsums[i]/n) * (1 - rowsums[i]/n) * (n - colsums[j]) for i in 1:nrows, j in 1:ncols]
+        V = Float64[(colsums[j]/n) * (rowsums[i]/n) * (1 - rowsums[i]/n) * (n -
+        	colsums[j]) for i in 1:nrows, j in 1:ncols]
     elseif nrows == 1 || ncols == 1
-        df = length(x) - 1
+        df = length(x) - 1 - ddof
         xhat = reshape(n * theta0, size(x))
         thetahat = x / n
         V = reshape(n .* theta0 .* (1 .- theta0), size(x))
 
-        abs(1 - sum(theta0)) <= sqrt(eps()) || throw(ArgumentError("Probabilities must sum to one"))
+        abs(1 - sum(theta0)) <= sqrt(eps()) || 
+            throw(ArgumentError("Probabilities must sum to one"))
     else
         throw(ArgumentError("Number of rows or columns must be at least 1"))
     end
@@ -331,24 +357,29 @@ function PowerDivergenceTest(x::AbstractMatrix{T}; lambda::U=1.0, theta0::Vector
         stat *= 2 / (lambda * (lambda + 1))
     end
 
-    PowerDivergenceTest(lambda, vec(theta0), stat, df, x, n, vec(thetahat), xhat, (x - xhat) ./ sqrt.(xhat), (x - xhat) ./ sqrt.(V))
+    PowerDivergenceTest(lambda, vec(theta0), stat, df, x, n, vec(thetahat), xhat, (x -
+    	xhat) ./ sqrt.(xhat), (x - xhat) ./ sqrt.(V))
 end
 
 #convenience functions
 
 #PDT
-function PowerDivergenceTest(x::AbstractVector{T}, y::AbstractVector{T}, levels::Levels{T}; lambda::U=1.0) where {T<:Integer,U<:AbstractFloat}
+function PowerDivergenceTest(x::AbstractVector{T}, y::AbstractVector{T},
+	levels::Levels{T}; lambda::U=1.0, ddof::Integer=0) where {T<:Integer,U<:AbstractFloat}
     d = counts(x, y, levels)
-    PowerDivergenceTest(d, lambda=lambda)
+    PowerDivergenceTest(d, lambda=lambda, ddof=ddof)
 end
 
-function PowerDivergenceTest(x::AbstractVector{T}, y::AbstractVector{T}, k::T; lambda::U=1.0) where {T<:Integer,U<:AbstractFloat}
+function PowerDivergenceTest(x::AbstractVector{T}, y::AbstractVector{T}, k::T;
+	lambda::U=1.0, ddof::Integer=0) where {T<:Integer,U<:AbstractFloat}
     d = counts(x, y, k)
-    PowerDivergenceTest(d, lambda=lambda)
+    PowerDivergenceTest(d, lambda=lambda, ddof=ddof)
 end
 
-PowerDivergenceTest(x::AbstractVector{T}; lambda::U=1.0, theta0::Vector{U} = ones(length(x))/length(x)) where {T<:Integer,U<:AbstractFloat} =
-    PowerDivergenceTest(reshape(x, length(x), 1), lambda=lambda, theta0=theta0)
+PowerDivergenceTest(x::AbstractVector{T}; lambda::U=1.0, theta0::Vector{U} =
+	ones(length(x))/length(x), ddof::Integer=0) where {T<:Integer,U<:AbstractFloat} =
+	PowerDivergenceTest(reshape(x, length(x), 1), lambda=lambda, theta0=theta0,
+	ddof=ddof)
 
 #ChisqTest
 """
@@ -360,35 +391,40 @@ with ``λ = 1``).
 If `y` is not given and `x` is a matrix with one row or column, or `x` is a vector, then
 a goodness-of-fit test is performed (`x` is treated as a one-dimensional contingency
 table). In this case, the hypothesis tested is whether the population probabilities equal
-those in `theta0`, or are all equal if `theta0` is not given.
+those in `theta0`, or are all equal if `theta0` is not given. The `ddof` parameter is the 
+"delta degrees of freedom" adjustment to the number of degrees of freedom used for 
+calculation of p-values. The number of degrees of freedom is decreased by `ddof`.
 
 If `x` is a matrix with at least two rows and columns, it is taken as a two-dimensional
-contingency table. Otherwise, `x` and `y` must be vectors of the same length. The contingency
-table is calculated using `counts` function from the `StatsBase` package. Then the power
-divergence test is conducted under the null hypothesis that the joint distribution of the
-cell counts in a 2-dimensional contingency table is the product of the row and column
-marginals.
+contingency table. Otherwise, `x` and `y` must be vectors of the same length. The
+contingency table is calculated using `counts` function from the `StatsBase` package. Then
+the power divergence test is conducted under the null hypothesis that the joint
+distribution of the cell counts in a 2-dimensional contingency table is the product of the
+row and column marginals.
 
 Note that the entries of `x` (and `y` if provided) must be non-negative integers.
 
 Implements: [`pvalue`](@ref), [`confint`](@ref)
 """
-function ChisqTest(x::AbstractMatrix{T}) where T<:Integer
-    PowerDivergenceTest(x, lambda=1.0)
+function ChisqTest(x::AbstractMatrix{T}; ddof::Integer=0) where T<:Integer
+    PowerDivergenceTest(x, lambda=1.0, ddof=ddof)
 end
 
-function ChisqTest(x::AbstractVector{T}, y::AbstractVector{T}, levels::Levels{T}) where T<:Integer
+function ChisqTest(x::AbstractVector{T}, y::AbstractVector{T}, levels::Levels{T};
+	ddof::Integer=0) where T<:Integer
     d = counts(x, y, levels)
-    PowerDivergenceTest(d, lambda=1.0)
+    PowerDivergenceTest(d, lambda=1.0, ddof=ddof)
 end
 
-function ChisqTest(x::AbstractVector{T}, y::AbstractVector{T}, k::T) where T<:Integer
+function ChisqTest(x::AbstractVector{T}, y::AbstractVector{T}, k::T; 
+    ddof::Integer=0) where T<:Integer
     d = counts(x, y, k)
-    PowerDivergenceTest(d, lambda=1.0)
+    PowerDivergenceTest(d, lambda=1.0, ddof=ddof)
 end
 
-ChisqTest(x::AbstractVector{T}, theta0::Vector{U} = ones(length(x))/length(x)) where {T<:Integer,U<:AbstractFloat} =
-    PowerDivergenceTest(reshape(x, length(x), 1), lambda=1.0, theta0=theta0)
+ChisqTest(x::AbstractVector{T}, theta0::Vector{U} = ones(length(x))/length(x);
+	ddof::Integer=0) where {T<:Integer,U<:AbstractFloat} =
+    PowerDivergenceTest(reshape(x, length(x), 1), lambda=1.0, theta0=theta0, ddof=ddof)
 
 #MultinomialLRTest
 """
@@ -403,11 +439,11 @@ table). In this case, the hypothesis tested is whether the population probabilit
 those in `theta0`, or are all equal if `theta0` is not given.
 
 If `x` is a matrix with at least two rows and columns, it is taken as a two-dimensional
-contingency table. Otherwise, `x` and `y` must be vectors of the same length. The contingency
-table is calculated using `counts` function from the `StatsBase` package. Then the power
-divergence test is conducted under the null hypothesis that the joint distribution of the
-cell counts in a 2-dimensional contingency table is the product of the row and column
-marginals.
+contingency table. Otherwise, `x` and `y` must be vectors of the same length. The
+contingency table is calculated using `counts` function from the `StatsBase` package. Then
+the power divergence test is conducted under the null hypothesis that the joint
+distribution of the cell counts in a 2-dimensional contingency table is the product of the
+row and column marginals.
 
 Note that the entries of `x` (and `y` if provided) must be non-negative integers.
 
@@ -417,17 +453,20 @@ function MultinomialLRTest(x::AbstractMatrix{T}) where T<:Integer
     PowerDivergenceTest(x, lambda=0.0)
 end
 
-function MultinomialLRTest(x::AbstractVector{T}, y::AbstractVector{T}, levels::Levels{T}) where T<:Integer
+function MultinomialLRTest(x::AbstractVector{T}, y::AbstractVector{T},
+	levels::Levels{T}) where T<:Integer
     d = counts(x, y, levels)
     PowerDivergenceTest(d, lambda=0.0)
 end
 
-function MultinomialLRTest(x::AbstractVector{T}, y::AbstractVector{T}, k::T) where T<:Integer
+function MultinomialLRTest(x::AbstractVector{T}, y::AbstractVector{T}, k::T) where
+	T<:Integer
     d = counts(x, y, k)
     PowerDivergenceTest(d, lambda=0.0)
 end
 
-MultinomialLRTest(x::AbstractVector{T}, theta0::Vector{U} = ones(length(x))/length(x)) where {T<:Integer,U<:AbstractFloat} =
+MultinomialLRTest(x::AbstractVector{T}, 
+    theta0::Vector{U} = ones(length(x))/length(x)) where {T<:Integer,U<:AbstractFloat} =
     PowerDivergenceTest(reshape(x, length(x), 1), lambda=0.0, theta0=theta0)
 
 function show_params(io::IO, x::PowerDivergenceTest, ident="")

diff --git a/test/power_divergence.jl b/test/power_divergence.jl
@@ -198,4 +198,17 @@ MultinomialLRTest(x,y,(1:3,1:3))
 d = [113997 1031298
      334453 37471]
 PowerDivergenceTest(d)
+
+# Test ddof for the ChisqTest
+x = [8,10,16,6]
+probs = [0.15865525393145702, 0.341344746068543, 0.34134474606854304, 0.15865525393145702]
+m = ChisqTest(x, probs, ddof=2)
+@test pvalue(m) ≈ 0.17603510054227095
+
+# Test ddof for the PowerDivergenceTest
+x = [8,10,16,6]
+probs = [0.15865525393145702, 0.341344746068543, 0.34134474606854304, 0.15865525393145702]
+m = PowerDivergenceTest(x, lambda=1.0, theta0=probs, ddof=2)
+@test pvalue(m) ≈ 0.17603510054227095
+
 end