From cd7b2b1077dd32bb94489622fba3781fad1aa939 Mon Sep 17 00:00:00 2001 From: Mousum Dutta <44145580+mousum-github@users.noreply.github.com> Date: Thu, 4 May 2023 12:26:04 +0530 Subject: [PATCH] `QR` decomposition method in GLM. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit backport of #507 --------- Co-authored-by: Mousum Co-authored-by: ayushpatnaikgit Co-authored-by: Milan Bouchet-Valat Co-authored-by: Ayush Patnaik Co-authored-by: harsharora21 Co-authored-by: Bogumił Kamiński (cherry picked from commit a8016bd7b755306b4854742a2915b470fa51ba02) --- docs/src/examples.md | 123 +++++++ src/GLM.jl | 33 ++ src/glmfit.jl | 17 +- src/linpred.jl | 179 ++++++++-- src/lm.jl | 58 +++- src/negbinfit.jl | 15 +- test/runtests.jl | 779 +++++++++++++++++++++++++++++++------------ 7 files changed, 963 insertions(+), 241 deletions(-) diff --git a/docs/src/examples.md b/docs/src/examples.md index 338d01c0..edbfdacf 100644 --- a/docs/src/examples.md +++ b/docs/src/examples.md @@ -86,6 +86,129 @@ julia> round.(vcov(ols); digits=5) 0.38889 -0.16667 -0.16667 0.08333 ``` +By default, the `lm` method uses the Cholesky factorization which is known as fast but numerically unstable, especially for ill-conditioned design matrices. Also, the Cholesky method aggressively detects multicollinearity. You can use the `method` keyword argument to apply a more stable QR factorization method. + +```jldoctest +julia> data = DataFrame(X=[1,2,3], Y=[2,4,7]); + +julia> ols = lm(@formula(Y ~ X), data; method=:qr) +LinearModel + +Y ~ 1 + X + +Coefficients: +───────────────────────────────────────────────────────────────────────── + Coef. Std. Error t Pr(>|t|) Lower 95% Upper 95% +───────────────────────────────────────────────────────────────────────── +(Intercept) -0.666667 0.62361 -1.07 0.4788 -8.59038 7.25704 +X 2.5 0.288675 8.66 0.0732 -1.16797 6.16797 +───────────────────────────────────────────────────────────────────────── +``` +The following example shows that QR decomposition works better for an ill-conditioned design matrix. The linear model with the QR method is a better model than the linear model with Cholesky decomposition method since the estimated loglikelihood of previous model is higher. +Note that, the condition number of the design matrix is quite high (≈ 3.52e7). + +``` +julia> X = [-0.4011512997627107 0.6368622664511552; + -0.0808472925693535 0.12835204623364604; + -0.16931095045225217 0.2687956795496601; + -0.4110745650568839 0.6526163576003452; + -0.4035951747670475 0.6407421349445884; + -0.4649907741370211 0.7382129928076485; + -0.15772708898883683 0.25040532268222715; + -0.38144358562952446 0.6055745630707645; + -0.1012787681395544 0.16078875117643368; + -0.2741403589052255 0.4352214984054432]; + +julia> y = [4.362866166172215, + 0.8792840060172619, + 1.8414020451091684, + 4.470790758717395, + 4.3894454833815395, + 5.0571760643993455, + 1.7154177874916376, + 4.148527704012107, + 1.1014936742570425, + 2.9815131910316097]; + +julia> modelqr = lm(X, y; method=:qr) +LinearModel + +Coefficients: +──────────────────────────────────────────────────────────────── + Coef. Std. Error t Pr(>|t|) Lower 95% Upper 95% +──────────────────────────────────────────────────────────────── +x1 5.00389 0.0560164 89.33 <1e-12 4.87472 5.13307 +x2 10.0025 0.035284 283.48 <1e-16 9.92109 10.0838 +──────────────────────────────────────────────────────────────── + +julia> modelchol = lm(X, y; method=:cholesky) +LinearModel + +Coefficients: +──────────────────────────────────────────────────────────────── + Coef. Std. Error t Pr(>|t|) Lower 95% Upper 95% +──────────────────────────────────────────────────────────────── +x1 5.17647 0.0849184 60.96 <1e-11 4.98065 5.37229 +x2 10.1112 0.053489 189.03 <1e-15 9.98781 10.2345 +──────────────────────────────────────────────────────────────── + +julia> loglikelihood(modelqr) > loglikelihood(modelchol) +true +``` +Since the Cholesky method with `dropcollinear = true` aggressively detects multicollinearity, +if you ever encounter multicollinearity in any GLM model with Cholesky, +it is worth trying the same model with QR decomposition. +The following example is taken from `Introductory Econometrics: A Modern Approach, 7e" by Jeffrey M. Wooldridge`. +The dataset is used to study the relationship between firm size—often measured by annual sales—and spending on +research and development (R&D). +The following shows that for the given model, +the Cholesky method detects multicollinearity in the design matrix with `dropcollinear=true` +and hence does not estimate all parameters as opposed to QR. + +```jldoctest +julia> y = [9.42190647125244, 2.084805727005, 3.9376676082611, 2.61976027488708, 4.04761934280395, 2.15384602546691, + 2.66240668296813, 4.39475727081298, 5.74520826339721, 3.59616208076477, 1.54265284538269, 2.59368276596069, + 1.80476510524749, 1.69270837306976, 3.04201245307922, 2.18389105796813, 2.73844122886657, 2.88134002685546, + 2.46666669845581, 3.80616021156311, 5.12149810791015, 6.80378007888793, 3.73669862747192, 1.21332454681396, + 2.54629635810852, 5.1612901687622, 1.86798071861267, 1.21465551853179, 6.31019830703735, 1.02669405937194, + 2.50623273849487, 1.5936255455017]; + +julia> x = [4570.2001953125, 2830, 596.799987792968, 133.600006103515, 42, 390, 93.9000015258789, 907.900024414062, + 19773, 39709, 2936.5, 2513.80004882812, 1124.80004882812, 921.599975585937, 2432.60009765625, 6754, + 1066.30004882812, 3199.89990234375, 150, 509.700012207031, 1452.69995117187, 8995, 1212.30004882812, + 906.599975585937, 2592, 201.5, 2617.80004882812, 502.200012207031, 2824, 292.200012207031, 7621, 1631.5]; + +julia> rdchem = DataFrame(rdintens=y, sales=x); + +julia> mdl = lm(@formula(rdintens ~ sales + sales^2), rdchem; method=:cholesky) +LinearModel + +rdintens ~ 1 + sales + :(sales ^ 2) + +Coefficients: +─────────────────────────────────────────────────────────────────────────────────────── + Coef. Std. Error t Pr(>|t|) Lower 95% Upper 95% +─────────────────────────────────────────────────────────────────────────────────────── +(Intercept) 0.0 NaN NaN NaN NaN NaN +sales 0.000852509 0.000156784 5.44 <1e-05 0.000532313 0.00117271 +sales ^ 2 -1.97385e-8 4.56287e-9 -4.33 0.0002 -2.90571e-8 -1.04199e-8 +─────────────────────────────────────────────────────────────────────────────────────── + +julia> mdl = lm(@formula(rdintens ~ sales + sales^2), rdchem; method=:qr) +LinearModel + +rdintens ~ 1 + sales + :(sales ^ 2) + +Coefficients: +───────────────────────────────────────────────────────────────────────────────── + Coef. Std. Error t Pr(>|t|) Lower 95% Upper 95% +───────────────────────────────────────────────────────────────────────────────── +(Intercept) 2.61251 0.429442 6.08 <1e-05 1.73421 3.49082 +sales 0.000300571 0.000139295 2.16 0.0394 1.56805e-5 0.000585462 +sales ^ 2 -6.94594e-9 3.72614e-9 -1.86 0.0725 -1.45667e-8 6.7487e-10 +───────────────────────────────────────────────────────────────────────────────── +``` + ## Probit regression ```jldoctest diff --git a/src/GLM.jl b/src/GLM.jl index 4b83d5dc..9d051562 100644 --- a/src/GLM.jl +++ b/src/GLM.jl @@ -89,6 +89,39 @@ module GLM pivoted_cholesky!(A; kwargs...) = cholesky!(A, RowMaximum(); kwargs...) end + @static if VERSION < v"1.7.0" + pivoted_qr!(A; kwargs...) = qr!(A, Val(true); kwargs...) + else + pivoted_qr!(A; kwargs...) = qr!(A, ColumnNorm(); kwargs...) + end + + const COMMON_FIT_KWARGS_DOCS = """ + - `dropcollinear::Bool=true`: Controls whether or not a model matrix + less-than-full rank is accepted. + If `true` (the default) the coefficient for redundant linearly dependent columns is + `0.0` and all associated statistics are set to `NaN`. + Typically from a set of linearly-dependent columns the last ones are identified as redundant + (however, the exact selection of columns identified as redundant is not guaranteed). + - `method::Symbol=:cholesky`: Controls which decomposition method to use. + If `method=:cholesky` (the default), then the `Cholesky` decomposition method will be used. + If `method=:qr`, then the `QR` decomposition method (which is more stable + but slower) will be used. + - `wts::Vector=similar(y,0)`: Prior frequency (a.k.a. case) weights of observations. + Such weights are equivalent to repeating each observation a number of times equal + to its weight. Do note that this interpretation gives equal point estimates but + different standard errors from analytical (a.k.a. inverse variance) weights and + from probability (a.k.a. sampling) weights which are the default in some other + software. + Can be length 0 to indicate no weighting (default). + - `contrasts::AbstractDict{Symbol}=Dict{Symbol,Any}()`: a `Dict` mapping term names + (as `Symbol`s) to term types (e.g. `ContinuousTerm`) or contrasts + (e.g., `HelmertCoding()`, `SeqDiffCoding(; levels=["a", "b", "c"])`, + etc.). If contrasts are not provided for a variable, the appropriate + term type will be guessed based on the data type from the data column: + any numeric data is assumed to be continuous, and any non-numeric data + is assumed to be categorical (with `DummyCoding()` as the default contrast type). + """ + include("linpred.jl") include("lm.jl") include("glmtools.jl") diff --git a/src/glmfit.jl b/src/glmfit.jl index 33f744e2..15faefbd 100644 --- a/src/glmfit.jl +++ b/src/glmfit.jl @@ -283,7 +283,8 @@ function nulldeviance(m::GeneralizedLinearModel) X = fill(1.0, length(y), hasint ? 1 : 0) nullm = fit(GeneralizedLinearModel, X, y, d, r.link; wts=wts, offset=offset, - dropcollinear=isa(m.pp.chol, CholeskyPivoted), + dropcollinear=ispivoted(m.pp), + method=decomposition_method(m.pp), maxiter=m.maxiter, minstepfac=m.minstepfac, atol=m.atol, rtol=m.rtol) dev = deviance(nullm) @@ -338,7 +339,8 @@ function nullloglikelihood(m::GeneralizedLinearModel) X = fill(1.0, length(y), hasint ? 1 : 0) nullm = fit(GeneralizedLinearModel, X, y, d, r.link; wts=wts, offset=offset, - dropcollinear=isa(m.pp.chol, CholeskyPivoted), + dropcollinear=ispivoted(m.pp), + method=decomposition_method(m.pp), maxiter=m.maxiter, minstepfac=m.minstepfac, atol=m.atol, rtol=m.rtol) ll = loglikelihood(nullm) @@ -569,6 +571,7 @@ function fit(::Type{M}, d::UnivariateDistribution, l::Link = canonicallink(d); dropcollinear::Bool = true, + method::Symbol = :cholesky, dofit::Bool = true, wts::AbstractVector{<:Real} = similar(y, 0), offset::AbstractVector{<:Real} = similar(y, 0), @@ -580,7 +583,15 @@ function fit(::Type{M}, end rr = GlmResp(y, d, l, offset, wts) - res = M(rr, cholpred(X, dropcollinear), false) + + if method === :cholesky + res = M(rr, cholpred(X, dropcollinear), false) + elseif method === :qr + res = M(rr, qrpred(X, dropcollinear), false) + else + throw(ArgumentError("The only supported values for keyword argument `method` are `:cholesky` and `:qr`.")) + end + return dofit ? fit!(res; fitargs...) : res end diff --git a/src/linpred.jl b/src/linpred.jl index 770e4062..25425c78 100644 --- a/src/linpred.jl +++ b/src/linpred.jl @@ -35,7 +35,7 @@ end """ DensePredQR -A `LinPred` type with a dense, unpivoted QR decomposition of `X` +A `LinPred` type with a dense QR decomposition of `X` # Members @@ -43,28 +43,32 @@ A `LinPred` type with a dense, unpivoted QR decomposition of `X` - `beta0`: base coefficient vector of length `p` - `delbeta`: increment to coefficient vector, also of length `p` - `scratchbeta`: scratch vector of length `p`, used in `linpred!` method -- `qr`: a `QRCompactWY` object created from `X`, with optional row weights. +- `qr`: either a `QRCompactWY` or `QRPivoted` object created from `X`, with optional row weights. +- `scratchm1`: scratch Matrix{T} of the same size as `X` """ -mutable struct DensePredQR{T<:BlasReal} <: DensePred +mutable struct DensePredQR{T<:BlasReal,Q<:Union{QRCompactWY, QRPivoted}} <: DensePred X::Matrix{T} # model matrix beta0::Vector{T} # base coefficient vector delbeta::Vector{T} # coefficient increment scratchbeta::Vector{T} - qr::QRCompactWY{T} - function DensePredQR{T}(X::Matrix{T}, beta0::Vector{T}) where T - n, p = size(X) - length(beta0) == p || throw(DimensionMismatch("length(β0) ≠ size(X,2)")) - new{T}(X, beta0, zeros(T,p), zeros(T,p), qr(X)) - end - function DensePredQR{T}(X::Matrix{T}) where T + qr::Q + scratchm1::Matrix{T} + + function DensePredQR(X::AbstractMatrix, pivot::Bool=false) n, p = size(X) - new{T}(X, zeros(T, p), zeros(T,p), zeros(T,p), qr(X)) + T = typeof(float(zero(eltype(X)))) + Q = pivot ? QRPivoted : QRCompactWY + fX = float(X) + cfX = fX === X ? copy(fX) : fX + F = pivot ? pivoted_qr!(cfX) : qr!(cfX) + new{T,Q}(Matrix{T}(X), + zeros(T, p), + zeros(T, p), + zeros(T, p), + F, + similar(X, T)) end end -DensePredQR(X::Matrix, beta0::Vector) = DensePredQR{eltype(X)}(X, beta0) -DensePredQR(X::Matrix{T}) where T = DensePredQR{T}(X, zeros(T, size(X,2))) -convert(::Type{DensePredQR{T}}, X::Matrix{T}) where {T} = DensePredQR{T}(X, zeros(T, size(X, 2))) - """ delbeta!(p::LinPred, r::Vector) @@ -72,8 +76,71 @@ Evaluate and return `p.delbeta` the increment to the coefficient vector from res """ function delbeta! end -function delbeta!(p::DensePredQR{T}, r::Vector{T}) where T<:BlasReal +function delbeta!(p::DensePredQR{T,<:QRCompactWY}, r::Vector{T}) where T<:BlasReal + rnk = rank(p.qr.R) + rnk == length(p.delbeta) || throw(RankDeficientException(rnk)) p.delbeta = p.qr\r + mul!(p.scratchm1, Diagonal(ones(size(r))), p.X) + return p +end + +function delbeta!(p::DensePredQR{T,<:QRCompactWY}, r::Vector{T}, wt::Vector{T}) where T<:BlasReal + rnk = rank(p.qr.R) + rnk == length(p.delbeta) || throw(RankDeficientException(rnk)) + X = p.X + W = Diagonal(wt) + sqrtW = Diagonal(sqrt.(wt)) + mul!(p.scratchm1, sqrtW, X) + mul!(p.delbeta, X'W, r) + qnr = qr(p.scratchm1) + Rinv = inv(qnr.R) + p.delbeta = Rinv * Rinv' * p.delbeta + return p +end + +function delbeta!(p::DensePredQR{T,<:QRPivoted}, r::Vector{T}) where T<:BlasReal + rnk = rank(p.qr.R) + if rnk == length(p.delbeta) + p.delbeta = p.qr\r + else + R = @view p.qr.R[:, 1:rnk] + Q = @view p.qr.Q[:, 1:size(R, 1)] + piv = p.qr.p + p.delbeta = zeros(size(p.delbeta)) + p.delbeta[1:rnk] = R \ Q'r + invpermute!(p.delbeta, piv) + end + mul!(p.scratchm1, Diagonal(ones(size(r))), p.X) + return p +end + +function delbeta!(p::DensePredQR{T,<:QRPivoted}, r::Vector{T}, wt::Vector{T}) where T<:BlasReal + rnk = rank(p.qr.R) + X = p.X + W = Diagonal(wt) + sqrtW = Diagonal(sqrt.(wt)) + delbeta = p.delbeta + scratchm2 = similar(X, T) + mul!(p.scratchm1, sqrtW, X) + mul!(scratchm2, W, X) + mul!(delbeta, transpose(scratchm2), r) + + if rnk == length(p.delbeta) + qnr = qr(p.scratchm1) + Rinv = inv(qnr.R) + p.delbeta = Rinv * Rinv' * delbeta + else + qnr = pivoted_qr!(copy(p.scratchm1)) + R = @view qnr.R[1:rnk, 1:rnk] + Rinv = inv(R) + piv = qnr.p + permute!(delbeta, piv) + for k=(rnk+1):length(delbeta) + delbeta[k] = -zero(T) + end + p.delbeta[1:rnk] = Rinv * Rinv' * view(delbeta, 1:rnk) + invpermute!(delbeta, piv) + end return p end @@ -115,6 +182,7 @@ function DensePredChol(X::AbstractMatrix, pivot::Bool) end cholpred(X::AbstractMatrix, pivot::Bool=false) = DensePredChol(X, pivot) +qrpred(X::AbstractMatrix, pivot::Bool=false) = DensePredQR(X, pivot) cholfactors(c::Union{Cholesky,CholeskyPivoted}) = c.factors cholesky!(p::DensePredChol{T}) where {T<:FP} = p.chol @@ -124,7 +192,6 @@ function cholesky(p::DensePredChol{T}) where T<:FP c = p.chol Cholesky(copy(cholfactors(c)), c.uplo, c.info) end -cholesky!(p::DensePredQR{T}) where {T<:FP} = Cholesky{T,typeof(p.X)}(p.qr.R, 'U', 0) function delbeta!(p::DensePredChol{T,<:Cholesky}, r::Vector{T}) where T<:BlasReal ldiv!(p.chol, mul!(p.delbeta, transpose(p.X), r)) @@ -227,7 +294,33 @@ end LinearAlgebra.cholesky(p::SparsePredChol{T}) where {T} = copy(p.chol) LinearAlgebra.cholesky!(p::SparsePredChol{T}) where {T} = p.chol +function invqr(x::DensePredQR{T,<: QRCompactWY}) where T + Q,R = qr(x.scratchm1) + Rinv = inv(R) + Rinv*Rinv' +end + +function invqr(x::DensePredQR{T,<: QRPivoted}) where T + Q,R,pv = pivoted_qr!(copy(x.scratchm1)) + rnk = rank(R) + p = length(x.delbeta) + if rnk == p + Rinv = inv(R) + xinv = Rinv*Rinv' + ipiv = invperm(pv) + return xinv[ipiv, ipiv] + else + Rsub = R[1:rnk, 1:rnk] + RsubInv = inv(Rsub) + xinv = fill(convert(T, NaN), (p,p)) + xinv[1:rnk, 1:rnk] = RsubInv*RsubInv' + ipiv = invperm(pv) + return xinv[ipiv, ipiv] + end +end + invchol(x::DensePred) = inv(cholesky!(x)) + function invchol(x::DensePredChol{T,<: CholeskyPivoted}) where T ch = x.chol rnk = rank(ch) @@ -242,8 +335,14 @@ function invchol(x::DensePredChol{T,<: CholeskyPivoted}) where T ipiv = invperm(ch.p) res[ipiv, ipiv] end + invchol(x::SparsePredChol) = cholesky!(x) \ Matrix{Float64}(I, size(x.X, 2), size(x.X, 2)) -vcov(x::LinPredModel) = rmul!(invchol(x.pp), dispersion(x, true)) + +inverse(x::DensePred) = invchol(x) +inverse(x::DensePredQR) = invqr(x) +inverse(x::SparsePredChol) = invchol(x) + +vcov(x::LinPredModel) = rmul!(inverse(x.pp), dispersion(x, true)) function cor(x::LinPredModel) Σ = vcov(x) @@ -289,4 +388,46 @@ dof_residual(obj::LinPredModel) = nobs(obj) - dof(obj) + 1 hasintercept(m::LinPredModel) = any(i -> all(==(1), view(m.pp.X , :, i)), 1:size(m.pp.X, 2)) linpred_rank(x::LinPred) = length(x.beta0) -linpred_rank(x::DensePredChol{<:Any, <:CholeskyPivoted}) = x.chol.rank +linpred_rank(x::DensePredChol{<:Any, <:CholeskyPivoted}) = rank(x.chol) +linpred_rank(x::DensePredChol{<:Any, <:Cholesky}) = rank(x.chol.U) +linpred_rank(x::DensePredQR{<:Any,<:QRPivoted}) = rank(x.qr.R) + +ispivoted(x::LinPred) = false +ispivoted(x::DensePredChol{<:Any, <:CholeskyPivoted}) = true +ispivoted(x::DensePredQR{<:Any,<:QRPivoted}) = true + +decomposition_method(x::LinPred) = isa(x, DensePredQR) ? :qr : :cholesky + +_coltype(::ContinuousTerm{T}) where {T} = T + +# Function common to all LinPred models, but documented separately +# for LinearModel and GeneralizedLinearModel +function StatsBase.predict(mm::LinPredModel, data; + interval::Union{Symbol,Nothing}=nothing, + kwargs...) + Tables.istable(data) || + throw(ArgumentError("expected data in a Table, got $(typeof(data))")) + + f = formula(mm) + t = Tables.columntable(data) + cols, nonmissings = StatsModels.missing_omit(t, f.rhs) + newx = modelcols(f.rhs, cols) + prediction = Tables.allocatecolumn(Union{_coltype(f.lhs), Missing}, length(nonmissings)) + fill!(prediction, missing) + if interval === nothing + predict!(view(prediction, nonmissings), mm, newx; + interval=interval, kwargs...) + return prediction + else + # Finding integer indices once is faster + nonmissinginds = findall(nonmissings) + lower = Vector{Union{Float64, Missing}}(missing, length(nonmissings)) + upper = Vector{Union{Float64, Missing}}(missing, length(nonmissings)) + tup = (prediction=view(prediction, nonmissinginds), + lower=view(lower, nonmissinginds), + upper=view(upper, nonmissinginds)) + predict!(tup, mm, newx; + interval=interval, kwargs...) + return (prediction=prediction, lower=lower, upper=upper) + end +end diff --git a/src/lm.jl b/src/lm.jl index 2ee28022..52841691 100644 --- a/src/lm.jl +++ b/src/lm.jl @@ -122,10 +122,11 @@ const FIT_LM_DOC = """ """ """ - fit(LinearModel, formula, data, allowrankdeficient=false; - [wts::AbstractVector], dropcollinear::Bool=true) + fit(LinearModel, formula::FormulaTerm, data; + [wts::AbstractVector], dropcollinear::Bool=true, method::Symbol=:cholesky, + contrasts::AbstractDict{Symbol}=Dict{Symbol,Any}()) fit(LinearModel, X::AbstractMatrix, y::AbstractVector; - wts::AbstractVector=similar(y, 0), dropcollinear::Bool=true) + wts::AbstractVector=similar(y, 0), dropcollinear::Bool=true, method::Symbol=:cholesky) Fit a linear model to data. @@ -134,23 +135,30 @@ $FIT_LM_DOC function fit(::Type{LinearModel}, X::AbstractMatrix{<:Real}, y::AbstractVector{<:Real}, allowrankdeficient_dep::Union{Bool,Nothing}=nothing; wts::AbstractVector{<:Real}=similar(y, 0), - dropcollinear::Bool=true) + dropcollinear::Bool=true, + method::Symbol=:cholesky) if allowrankdeficient_dep !== nothing @warn "Positional argument `allowrankdeficient` is deprecated, use keyword " * "argument `dropcollinear` instead. Proceeding with positional argument value: $allowrankdeficient_dep" dropcollinear = allowrankdeficient_dep end - fit!(LinearModel(LmResp(y, wts), cholpred(X, dropcollinear))) + if method === :cholesky + fit!(LinearModel(LmResp(y, wts), cholpred(X, dropcollinear))) + elseif method === :qr + fit!(LinearModel(LmResp(y, wts), qrpred(X, dropcollinear))) + else + throw(ArgumentError("The only supported values for keyword argument `method` are `:cholesky` and `:qr`.")) + end end """ lm(formula, data, allowrankdeficient=false; - [wts::AbstractVector], dropcollinear::Bool=true) + [wts::AbstractVector], dropcollinear::Bool=true, method::Symbol=:cholesky) lm(X::AbstractMatrix, y::AbstractVector; - wts::AbstractVector=similar(y, 0), dropcollinear::Bool=true) + wts::AbstractVector=similar(y, 0), dropcollinear::Bool=true, method::Symbol=:cholesky) Fit a linear model to data. -An alias for `fit(LinearModel, X, y; wts=wts, dropcollinear=dropcollinear)` +An alias for `fit(LinearModel, X, y; wts=wts, dropcollinear=dropcollinear, method=method)` $FIT_LM_DOC """ @@ -255,12 +263,42 @@ function predict(mm::LinearModel, newx::AbstractMatrix; interval = :confidence end if interval === nothing - return retmean - elseif mm.pp.chol isa CholeskyPivoted && + res isa AbstractVector || + throw(ArgumentError("`res` must be a vector when `interval == nothing` or is omitted")) + length(res) == size(newx, 1) || + throw(DimensionMismatch("length of `res` must equal the number of rows in `newx`")) + res .= newx * coef(mm) + elseif mm.pp isa DensePredChol && + mm.pp.chol isa CholeskyPivoted && mm.pp.chol.rank < size(mm.pp.chol, 2) + throw(ArgumentError("prediction intervals are currently not implemented " * + "when some independent variables have been dropped " * + "from the model due to collinearity")) + elseif mm.pp isa DensePredQR && rank(mm.pp.qr.R) < size(mm.pp.qr.R, 2) throw(ArgumentError("prediction intervals are currently not implemented " * "when some independent variables have been dropped " * "from the model due to collinearity")) + else + res isa NamedTuple || + throw(ArgumentError("`res` must be a `NamedTuple` when `interval` is " * + "`:confidence` or `:prediction`")) + prediction, lower, upper = res + length(prediction) == length(lower) == length(upper) == size(newx, 1) || + throw(DimensionMismatch("length of vectors in `res` must equal the number of rows in `newx`")) + length(mm.rr.wts) == 0 || error("prediction with confidence intervals not yet implemented for weighted regression") + + dev = deviance(mm) + dofr = dof_residual(mm) + ret = diag(newx*vcov(mm)*newx') + if interval == :prediction + ret .+= dev/dofr + elseif interval != :confidence + error("only :confidence and :prediction intervals are defined") + end + ret .= quantile(TDist(dofr), (1 - level)/2) .* sqrt.(ret) + prediction .= newx * coef(mm) + lower .= prediction .+ ret + upper .= prediction -+ ret end length(mm.rr.wts) == 0 || error("prediction with confidence intervals not yet implemented for weighted regression") chol = cholesky!(mm.pp) diff --git a/src/negbinfit.jl b/src/negbinfit.jl index 7c36127c..2670fb42 100644 --- a/src/negbinfit.jl +++ b/src/negbinfit.jl @@ -60,6 +60,8 @@ In both cases, `link` may specify the link function # Keyword Arguments - `initialθ::Real=Inf`: Starting value for shape parameter θ. If it is `Inf` then the initial value will be estimated by fitting a Poisson distribution. +- `dropcollinear::Bool=true`: See `dropcollinear` for [`glm`](@ref) +- `method::Symbol=:cholesky`: See `method` for [`glm`](@ref) - `maxiter::Integer=30`: See `maxiter` for [`glm`](@ref) - `atol::Real=1.0e-6`: See `atol` for [`glm`](@ref) - `rtol::Real=1.0e-6`: See `rtol` for [`glm`](@ref) @@ -69,6 +71,8 @@ function negbin(F, D, args...; initialθ::Real=Inf, + dropcollinear::Bool=true, + method::Symbol=:cholesky, maxiter::Integer=30, minstepfac::Real=0.001, atol::Real=1e-6, @@ -103,10 +107,12 @@ function negbin(F, # fit a Poisson regression model if the user does not specify an initial θ if isinf(initialθ) regmodel = glm(F, D, Poisson(), args...; - maxiter=maxiter, atol=atol, rtol=rtol, verbose=verbose, kwargs...) + dropcollinear=dropcollinear, method=method, maxiter=maxiter, + atol=atol, rtol=rtol, verbose=verbose, kwargs...) else regmodel = glm(F, D, NegativeBinomial(initialθ), args...; - maxiter=maxiter, atol=atol, rtol=rtol, verbose=verbose, kwargs...) + dropcollinear=dropcollinear, method=method, maxiter=maxiter, + atol=atol, rtol=rtol, verbose=verbose, kwargs...) end μ = regmodel.model.rr.mu @@ -131,8 +137,9 @@ function negbin(F, end verbose && println("[ Alternating iteration ", i, ", θ = ", θ, " ]") regmodel = glm(F, D, NegativeBinomial(θ), args...; - maxiter=maxiter, atol=atol, rtol=rtol, verbose=verbose, kwargs...) - μ = regmodel.model.rr.mu + dropcollinear=dropcollinear, method=method, maxiter=maxiter, + atol=atol, rtol=rtol, verbose=verbose, kwargs...) + μ = regmodel.rr.mu prevθ = θ θ = mle_for_θ(y, μ, wts; maxiter=maxiter, tol=rtol) δ = prevθ - θ diff --git a/test/runtests.jl b/test/runtests.jl index f55bb0c0..729f0e4b 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -21,12 +21,14 @@ end linreg(x::AbstractVecOrMat, y::AbstractVector) = qr!(simplemm(x)) \ y -@testset "lm" begin - lm1 = fit(LinearModel, @formula(OptDen ~ Carb), form) - test_show(lm1) - @test isapprox(coef(lm1), linreg(form.Carb, form.OptDen)) +@testset "LM with $dmethod" for dmethod in (:cholesky, :qr) Σ = [6.136653061224592e-05 -9.464489795918525e-05 -9.464489795918525e-05 1.831836734693908e-04] + + lm1 = fit(LinearModel, @formula(OptDen ~ Carb), form; method=dmethod) + test_show(lm1) + @test isapprox(coef(lm1), linreg(form.Carb, form.OptDen)) + @test isapprox(vcov(lm1), Σ) @test isapprox(cor(lm1.model), Diagonal(diag(Σ))^(-1/2)*Σ*Diagonal(diag(Σ))^(-1/2)) @test dof(lm1) == 3 @@ -41,16 +43,22 @@ linreg(x::AbstractVecOrMat, y::AbstractVector) = qr!(simplemm(x)) \ y @test isapprox(aic(lm1), -36.409684288095946) @test isapprox(aicc(lm1), -24.409684288095946) @test isapprox(bic(lm1), -37.03440588041178) - lm2 = fit(LinearModel, hcat(ones(6), 10form.Carb), form.OptDen, true) - @test isa(lm2.pp.chol, CholeskyPivoted) - @test lm2.pp.chol.piv == [2, 1] + lm2 = fit(LinearModel, hcat(ones(6), 10form.Carb), form.OptDen, true; method=dmethod) + if dmethod == :cholesky + @test isa(lm2.pp.chol, CholeskyPivoted) + piv = lm2.pp.chol.piv + elseif dmethod == :qr + @test isa(lm2.pp.qr, QRPivoted) + piv = lm2.pp.qr.p + end + @test piv == [2, 1] @test isapprox(coef(lm1), coef(lm2) .* [1., 10.]) lm3 = lm(@formula(y~x), (y=1:25, x=repeat(1:5, 5)), contrasts=Dict(:x=>DummyCoding())) lm4 = lm(@formula(y~x), (y=1:25, x=categorical(repeat(1:5, 5)))) @test coef(lm3) == coef(lm4) ≈ [11, 1, 2, 3, 4] end -@testset "Linear Model Cook's Distance" begin +@testset "Cook's Distance in Linear Model with $dmethod" for dmethod in (:cholesky, :qr) st_df = DataFrame( Y=[6.4, 7.4, 10.4, 15.1, 12.3 , 11.4], XA=[1.5, 6.5, 11.5, 19.9, 17.0, 15.5], @@ -63,32 +71,32 @@ end ) # linear regression - t_lm_base = lm(@formula(Y ~ XA), st_df) + t_lm_base = lm(@formula(Y ~ XA), st_df; method=dmethod) @test isapprox(st_df.CooksD_base, cooksdistance(t_lm_base)) # linear regression, no intercept - t_lm_noint = lm(@formula(Y ~ XA +0), st_df) + t_lm_noint = lm(@formula(Y ~ XA +0), st_df; method=dmethod) @test isapprox(st_df.CooksD_noint, cooksdistance(t_lm_noint)) # linear regression, two collinear variables (Variance inflation factor ≊ 250) - t_lm_multi = lm(@formula(Y ~ XA + XB), st_df) + t_lm_multi = lm(@formula(Y ~ XA + XB), st_df; method=dmethod) @test isapprox(st_df.CooksD_multi, cooksdistance(t_lm_multi)) # linear regression, two full collinear variables (XC = 2 XA) hence should get the same results as the original # after pivoting - t_lm_colli = lm(@formula(Y ~ XA + XC), st_df, dropcollinear=true) + t_lm_colli = lm(@formula(Y ~ XA + XC), st_df; dropcollinear=true, method=dmethod) # Currently fails as the collinear variable is not dropped from `modelmatrix(obj)` @test_throws ArgumentError isapprox(st_df.CooksD_base, cooksdistance(t_lm_colli)) - end -@testset "linear model with weights" begin +@testset "Linear model with weights and $dmethod" for dmethod in (:cholesky, :qr) df = dataset("quantreg", "engel") N = nrow(df) df.weights = repeat(1:5, Int(N/5)) f = @formula(FoodExp ~ Income) - lm_model = lm(f, df, wts = df.weights) - glm_model = glm(f, df, Normal(), wts = df.weights) + + lm_model = lm(f, df, wts = df.weights; method=dmethod) + glm_model = glm(f, df, Normal(); wts = df.weights, method=dmethod) @test isapprox(coef(lm_model), [154.35104595140706, 0.4836896390157505]) @test isapprox(coef(glm_model), [154.35104595140706, 0.4836896390157505]) @test isapprox(stderror(lm_model), [9.382302620120193, 0.00816741377772968]) @@ -100,10 +108,29 @@ end @test isapprox(loglikelihood(lm_model), -4353.946729075838) @test isapprox(loglikelihood(glm_model), -4353.946729075838) @test isapprox(nullloglikelihood(lm_model), -4984.892139711452) - @test isapprox(mean(residuals(lm_model)), -5.412966629787718) + @test isapprox(mean(residuals(lm_model)), -5.412966629787718) +end + +@testset "Linear model with dropcollinearity and $dmethod" for dmethod in (:cholesky, :qr) + # for full rank design matrix, both should give same results + lm1 = lm(@formula(OptDen ~ Carb), form; method=dmethod, dropcollinear=true) + lm2 = lm(@formula(OptDen ~ Carb), form; method=dmethod, dropcollinear=false) + @test coef(lm1) ≈ coef(lm2) + @test stderror(lm1) ≈ stderror(lm2) + @test r2(lm1) ≈ r2(lm2) + @test adjr2(lm1) ≈ adjr2(lm2) + @test vcov(lm1) ≈ vcov(lm2) + @test predict(lm1) ≈ predict(lm2) + @test loglikelihood(lm1) ≈ loglikelihood(lm2) + @test nullloglikelihood(lm1) ≈ nullloglikelihood(lm2) + @test residuals(lm1) ≈ residuals(lm2) + @test aic(lm1) ≈ aic(lm2) + @test aicc(lm1) ≈ aicc(lm2) + @test bic(lm1) ≈ bic(lm2) + @test GLM.dispersion(lm1.model) ≈ GLM.dispersion(lm2.model) end -@testset "rankdeficient" begin +@testset "Linear model with $dmethod and rankdeficieny" for dmethod in (:cholesky, :qr) rng = StableRNG(1234321) # an example of rank deficiency caused by a missing cell in a table dfrm = DataFrame([categorical(repeat(string.('A':'D'), inner = 6)), @@ -113,106 +140,134 @@ end X = ModelMatrix(ModelFrame(f, dfrm)).m y = X * (1:size(X, 2)) + 0.1 * randn(rng, size(X, 1)) inds = deleteat!(collect(1:length(y)), 7:8) - m1 = fit(LinearModel, X, y) + + m1 = fit(LinearModel, X, y; method=dmethod) @test isapprox(deviance(m1), 0.12160301538297297) Xmissingcell = X[inds, :] ymissingcell = y[inds] - @test_throws PosDefException m2 = fit(LinearModel, Xmissingcell, ymissingcell; dropcollinear=false) - m2p = fit(LinearModel, Xmissingcell, ymissingcell) - @test isa(m2p.pp.chol, CholeskyPivoted) - @test rank(m2p.pp.chol) == 11 - @test isapprox(deviance(m2p), 0.1215758392280204) - @test isapprox(coef(m2p), [0.9772643585228885, 8.903341608496437, 3.027347397503281, - 3.9661379199401257, 5.079410103608552, 6.1944618141188625, 0.0, 7.930328728005131, - 8.879994918604757, 2.986388408421915, 10.84972230524356, 11.844809275711485]) - @test all(isnan, hcat(coeftable(m2p).cols[2:end]...)[7,:]) - - m2p_dep_pos = fit(LinearModel, Xmissingcell, ymissingcell, true) + m2p = fit(LinearModel, Xmissingcell, ymissingcell; method=dmethod) + m2p_dep_pos = fit(LinearModel, Xmissingcell, ymissingcell, true; method=dmethod) @test_logs (:warn, "Positional argument `allowrankdeficient` is deprecated, use keyword " * - "argument `dropcollinear` instead. Proceeding with positional argument value: true") fit(LinearModel, Xmissingcell, ymissingcell, true) - @test isa(m2p_dep_pos.pp.chol, CholeskyPivoted) - @test rank(m2p_dep_pos.pp.chol) == rank(m2p.pp.chol) + "argument `dropcollinear` instead. Proceeding with positional argument value: true") + m2p_dep_pos_kw = fit(LinearModel, Xmissingcell, ymissingcell, true; method=dmethod, dropcollinear = false) + + if dmethod == :cholesky + @test_throws PosDefException m2 = fit(LinearModel, Xmissingcell, ymissingcell; + method = dmethod, dropcollinear=false) + @test isa(m2p.pp.chol, CholeskyPivoted) + @test isapprox(coef(m2p), [0.9772643585228885, 8.903341608496437, 3.027347397503281, + 3.9661379199401257, 5.079410103608552, 6.1944618141188625, + 0.0, 7.930328728005131, 8.879994918604757, 2.986388408421915, + 10.84972230524356, 11.844809275711485]) + + @test isa(m2p_dep_pos.pp.chol, CholeskyPivoted) + @test isa(m2p_dep_pos_kw.pp.chol, CholeskyPivoted) + elseif dmethod == :qr + @test_throws RankDeficientException m2 = fit(LinearModel, Xmissingcell, ymissingcell; + method = dmethod, dropcollinear=false) + @test isapprox(coef(m2p), [0.9772643585228962, 11.889730016918342, 3.027347397503282, + 3.9661379199401177, 5.079410103608539, 6.194461814118862, + -2.9863884084219015, 7.930328728005132, 8.87999491860477, + 0.0, 10.849722305243555, 11.844809275711487]) || + isapprox(coef(m2p), [0.9772643585228885, 8.903341608496437, 3.027347397503281, + 3.9661379199401257, 5.079410103608552, 6.1944618141188625, + 0.0, 7.930328728005131, 8.879994918604757, 2.986388408421915, + 10.84972230524356, 11.844809275711485]) + + @test isa(m2p.pp.qr, QRPivoted) + + @test isa(m2p_dep_pos.pp.qr, QRPivoted) + @test isa(m2p_dep_pos_kw.pp.qr, QRPivoted) + end + + indx = findfirst(item -> item == 0.0, coef(m2p)) + @test all(isnan, hcat(coeftable(m2p).cols[2:end]...)[indx,:]) + + @test GLM.linpred_rank(m2p.pp) == 11 + @test isapprox(deviance(m2p), 0.1215758392280204) + + @test GLM.linpred_rank(m2p_dep_pos.pp) == GLM.linpred_rank(m2p.pp) @test isapprox(deviance(m2p_dep_pos), deviance(m2p)) @test isapprox(coef(m2p_dep_pos), coef(m2p)) - m2p_dep_pos_kw = fit(LinearModel, Xmissingcell, ymissingcell, true; dropcollinear = false) - @test isa(m2p_dep_pos_kw.pp.chol, CholeskyPivoted) - @test rank(m2p_dep_pos_kw.pp.chol) == rank(m2p.pp.chol) + @test GLM.linpred_rank(m2p_dep_pos_kw.pp) == GLM.linpred_rank(m2p.pp) @test isapprox(deviance(m2p_dep_pos_kw), deviance(m2p)) @test isapprox(coef(m2p_dep_pos_kw), coef(m2p)) end -@testset "saturated linear model" begin - df = DataFrame(x=["a", "b", "c"], y=[1, 2, 3]) - model = lm(@formula(y ~ x), df) +@testset "Saturated linear model with $dmethod" for dmethod in (:cholesky, :qr) + df1 = DataFrame(x=["a", "b", "c"], y=[1, 2, 3]) + + model = lm(@formula(y ~ x), df1; method=dmethod) ct = coeftable(model) @test dof_residual(model) == 0 @test dof(model) == 4 @test isinf(GLM.dispersion(model.model)) @test coef(model) ≈ [1, 1, 2] @test isequal(hcat(ct.cols[2:end]...), - [Inf 0.0 1.0 -Inf Inf - Inf 0.0 1.0 -Inf Inf - Inf 0.0 1.0 -Inf Inf]) + [Inf 0.0 1.0 -Inf Inf + Inf 0.0 1.0 -Inf Inf + Inf 0.0 1.0 -Inf Inf]) - model = lm(@formula(y ~ 0 + x), df) + model = lm(@formula(y ~ 0 + x), df1; method=dmethod) ct = coeftable(model) @test dof_residual(model) == 0 @test dof(model) == 4 @test isinf(GLM.dispersion(model.model)) @test coef(model) ≈ [1, 2, 3] @test isequal(hcat(ct.cols[2:end]...), - [Inf 0.0 1.0 -Inf Inf - Inf 0.0 1.0 -Inf Inf - Inf 0.0 1.0 -Inf Inf]) + [Inf 0.0 1.0 -Inf Inf + Inf 0.0 1.0 -Inf Inf + Inf 0.0 1.0 -Inf Inf]) - model = glm(@formula(y ~ x), df, Normal(), IdentityLink()) + model = glm(@formula(y ~ x), df1, Normal(), IdentityLink(); method=dmethod) ct = coeftable(model) @test dof_residual(model) == 0 @test dof(model) == 4 @test isinf(GLM.dispersion(model.model)) @test coef(model) ≈ [1, 1, 2] @test isequal(hcat(ct.cols[2:end]...), - [Inf 0.0 1.0 -Inf Inf - Inf 0.0 1.0 -Inf Inf - Inf 0.0 1.0 -Inf Inf]) + [Inf 0.0 1.0 -Inf Inf + Inf 0.0 1.0 -Inf Inf + Inf 0.0 1.0 -Inf Inf]) - model = glm(@formula(y ~ 0 + x), df, Normal(), IdentityLink()) + model = glm(@formula(y ~ 0 + x), df1, Normal(), IdentityLink(); method=dmethod) ct = coeftable(model) @test dof_residual(model) == 0 @test dof(model) == 4 @test isinf(GLM.dispersion(model.model)) @test coef(model) ≈ [1, 2, 3] @test isequal(hcat(ct.cols[2:end]...), - [Inf 0.0 1.0 -Inf Inf - Inf 0.0 1.0 -Inf Inf - Inf 0.0 1.0 -Inf Inf]) + [Inf 0.0 1.0 -Inf Inf + Inf 0.0 1.0 -Inf Inf + Inf 0.0 1.0 -Inf Inf]) # Saturated and rank-deficient model - df = DataFrame(x1=["a", "b", "c"], x2=["a", "b", "c"], y=[1, 2, 3]) - for model in (lm(@formula(y ~ x1 + x2), df), - glm(@formula(y ~ x1 + x2), df, Normal(), IdentityLink())) + df2 = DataFrame(x1=["a", "b", "c"], x2=["a", "b", "c"], y=[1, 2, 3]) + for model in (lm(@formula(y ~ x1 + x2), df2; method=dmethod), + glm(@formula(y ~ x1 + x2), df2, Normal(), IdentityLink(); method=dmethod)) ct = coeftable(model) @test dof_residual(model) == 0 @test dof(model) == 4 @test isinf(GLM.dispersion(model.model)) @test coef(model) ≈ [1, 1, 2, 0, 0] @test isequal(hcat(ct.cols[2:end]...), - [Inf 0.0 1.0 -Inf Inf - Inf 0.0 1.0 -Inf Inf - Inf 0.0 1.0 -Inf Inf - NaN NaN NaN NaN NaN - NaN NaN NaN NaN NaN]) + [Inf 0.0 1.0 -Inf Inf + Inf 0.0 1.0 -Inf Inf + Inf 0.0 1.0 -Inf Inf + NaN NaN NaN NaN NaN + NaN NaN NaN NaN NaN]) end end -@testset "Linear model with no intercept" begin +@testset "Linear model without intercept and $dmethod" for dmethod in (:cholesky, :qr) @testset "Test with NoInt1 Dataset" begin # test case to test r2 for no intercept model # https://www.itl.nist.gov/div898/strd/lls/data/LINKS/DATA/NoInt1.dat data = DataFrame(x = 60:70, y = 130:140) - mdl = lm(@formula(y ~ 0 + x), data) + + mdl = lm(@formula(y ~ 0 + x), data; method=:cholesky) @test coef(mdl) ≈ [2.07438016528926] @test stderror(mdl) ≈ [0.165289256198347E-01] @test GLM.dispersion(mdl.model) ≈ 3.56753034006338 @@ -235,7 +290,8 @@ end # https://www.itl.nist.gov/div898/strd/lls/data/LINKS/DATA/NoInt2.dat data = DataFrame(x = [4, 5, 6], y = [3, 4, 4]) - mdl = lm(@formula(y ~ 0 + x), data) + + mdl = lm(@formula(y ~ 0 + x), data; method=:cholesky) @test coef(mdl) ≈ [0.727272727272727] @test stderror(mdl) ≈ [0.420827318078432E-01] @test GLM.dispersion(mdl.model) ≈ 0.369274472937998 @@ -248,14 +304,15 @@ end @test aic(mdl) ≈ 5.3199453808329 @test loglikelihood(mdl) ≈ -0.6599726904164597 @test nullloglikelihood(mdl) ≈ -8.179255266668315 - @test predict(mdl) ≈ [2.909090909090908, 3.636363636363635, 4.363636363636362] + @test predict(mdl) ≈ [2.909090909090908, 3.636363636363635, + 4.363636363636362] end @testset "Test with without formula" begin X = [4 5 6]' y = [3, 4, 4] - data = DataFrame(x = [4, 5, 6], y = [3, 4, 4]) - mdl1 = lm(@formula(y ~ 0 + x), data) + + mdl1 = lm(@formula(y ~ 0 + x), data; method=dmethod) mdl2 = lm(X, y) @test coef(mdl1) ≈ coef(mdl2) @@ -274,14 +331,25 @@ end end end +@testset "Linear model with QR method and NASTY data" begin + x = [1, 2, 3, 4, 5, 6, 7, 8, 9] + nasty = DataFrame(X = x, TINY = 1.0E-12*x) + mdl = lm(@formula(X ~ TINY), nasty; method=:qr) + + @test coef(mdl) ≈ [0, 1.0E+12] + @test dof(mdl) ≈ 3 + @test r2(mdl) ≈ 1.0 + @test adjr2(mdl) ≈ 1.0 +end + dobson = DataFrame(Counts = [18.,17,15,20,10,20,25,13,12], Outcome = categorical(repeat(string.('A':'C'), outer = 3)), Treatment = categorical(repeat(string.('a':'c'), inner = 3))) -@testset "Poisson GLM" begin +@testset "Poisson GLM with $dmethod" for dmethod in (:cholesky, :qr) gm1 = fit(GeneralizedLinearModel, @formula(Counts ~ 1 + Outcome + Treatment), - dobson, Poisson()) - @test GLM.cancancel(gm1.model.rr) + dobson, Poisson(); method=dmethod) + @test GLM.cancancel(gm1.rr) test_show(gm1) @test dof(gm1) == 5 @test isapprox(deviance(gm1), 5.12914107700115, rtol = 1e-7) @@ -300,25 +368,28 @@ admit = CSV.read(joinpath(glm_datadir, "admit.csv"), DataFrame) admit.rank = categorical(admit.rank) @testset "$distr with LogitLink" for distr in (Binomial, Bernoulli) - gm2 = fit(GeneralizedLinearModel, @formula(admit ~ 1 + gre + gpa + rank), admit, distr()) - @test GLM.cancancel(gm2.model.rr) - test_show(gm2) - @test dof(gm2) == 6 - @test deviance(gm2) ≈ 458.5174924758994 - @test nulldeviance(gm2) ≈ 499.9765175549154 - @test loglikelihood(gm2) ≈ -229.25874623794968 - @test nullloglikelihood(gm2) ≈ -249.9882587774585 - @test isapprox(aic(gm2), 470.51749247589936) - @test isapprox(aicc(gm2), 470.7312329339146) - @test isapprox(bic(gm2), 494.4662797585473) - @test isapprox(coef(gm2), - [-3.9899786606380756, 0.0022644256521549004, 0.804037453515578, - -0.6754428594116578, -1.340203811748108, -1.5514636444657495]) + for dmethod in (:cholesky, :qr) + gm2 = fit(GeneralizedLinearModel, @formula(admit ~ 1 + gre + gpa + rank), admit, distr(); + method=dmethod) + @test GLM.cancancel(gm2.rr) + test_show(gm2) + @test dof(gm2) == 6 + @test deviance(gm2) ≈ 458.5174924758994 + @test nulldeviance(gm2) ≈ 499.9765175549154 + @test loglikelihood(gm2) ≈ -229.25874623794968 + @test nullloglikelihood(gm2) ≈ -249.9882587774585 + @test isapprox(aic(gm2), 470.51749247589936) + @test isapprox(aicc(gm2), 470.7312329339146) + @test isapprox(bic(gm2), 494.4662797585473) + @test isapprox(coef(gm2), + [-3.9899786606380756, 0.0022644256521549004, 0.804037453515578, + -0.6754428594116578, -1.340203811748108, -1.5514636444657495]) + end end -@testset "Bernoulli ProbitLink" begin +@testset "Bernoulli ProbitLink with $dmethod" for dmethod in (:cholesky, :qr) gm3 = fit(GeneralizedLinearModel, @formula(admit ~ 1 + gre + gpa + rank), admit, - Binomial(), ProbitLink()) + Binomial(), ProbitLink(); method=dmethod) test_show(gm3) @test !GLM.cancancel(gm3.model.rr) @test dof(gm3) == 6 @@ -334,10 +405,10 @@ end -0.4154125854823675, -0.8121458010130354, -0.9359047862425297]) end -@testset "Bernoulli CauchitLink" begin +@testset "Bernoulli CauchitLink with $dmethod" for dmethod in (:cholesky, :qr) gm4 = fit(GeneralizedLinearModel, @formula(admit ~ gre + gpa + rank), admit, - Binomial(), CauchitLink()) - @test !GLM.cancancel(gm4.model.rr) + Binomial(), CauchitLink(); method=dmethod) + @test !GLM.cancancel(gm4.rr) test_show(gm4) @test dof(gm4) == 6 @test isapprox(deviance(gm4), 459.3401112751141) @@ -349,10 +420,10 @@ end @test isapprox(bic(gm4), 495.28889855776214) end -@testset "Bernoulli CloglogLink" begin +@testset "Bernoulli CloglogLink with $dmethod" for dmethod in (:cholesky, :qr) gm5 = fit(GeneralizedLinearModel, @formula(admit ~ gre + gpa + rank), admit, - Binomial(), CloglogLink()) - @test !GLM.cancancel(gm5.model.rr) + Binomial(), CloglogLink(); method=dmethod) + @test !GLM.cancancel(gm5.rr) test_show(gm5) @test dof(gm5) == 6 @test isapprox(deviance(gm5), 458.89439629612616) @@ -372,18 +443,18 @@ end X = [ones(n) randn(rng, n)] y = logistic.(X*ones(2) + 1/10*randn(rng, n)) .> 1/2 - @test coeftable(glm(X, y, Binomial(), CloglogLink())).cols[4][2] < 0.05 + @test coeftable(glm(X, y, Binomial(), CloglogLink(); method=dmethod)).cols[4][2] < 0.05 end end ## Example with offsets from Venables & Ripley (2002, p.189) anorexia = CSV.read(joinpath(glm_datadir, "anorexia.csv"), DataFrame) -@testset "Normal offset" begin +@testset "Normal offset with $dmethod" for dmethod in (:cholesky, :qr) gm6 = fit(GeneralizedLinearModel, @formula(Postwt ~ 1 + Prewt + Treat), anorexia, - Normal(), IdentityLink(), offset=Array{Float64}(anorexia.Prewt)) - - @test GLM.cancancel(gm6.model.rr) + Normal(), IdentityLink(), method=dmethod, + offset=Array{Float64}(anorexia.Prewt)) + @test GLM.cancancel(gm6.rr) test_show(gm6) @test dof(gm6) == 5 @test isapprox(deviance(gm6), 3311.262619919613) @@ -400,11 +471,10 @@ anorexia = CSV.read(joinpath(glm_datadir, "anorexia.csv"), DataFrame) [13.3909581, 0.1611824, 1.8934926, 2.1333359]) end -@testset "Normal LogLink offset" begin +@testset "Normal LogLink offset with $dmethod" for dmethod in (:cholesky, :qr) gm7 = fit(GeneralizedLinearModel, @formula(Postwt ~ 1 + Prewt + Treat), anorexia, - Normal(), LogLink(), offset=anorexia.Prewt, rtol=1e-8) - - @test !GLM.cancancel(gm7.model.rr) + Normal(), LogLink(), method=dmethod, offset=anorexia.Prewt, rtol=1e-8) + @test !GLM.cancancel(gm7.rr) test_show(gm7) @test isapprox(deviance(gm7), 3265.207242977156) @test isapprox(nulldeviance(gm7), 507625.1718547432) @@ -418,9 +488,9 @@ end atol=1e-6) end -@testset "Poisson LogLink offset" begin +@testset "Poisson LogLink offset with $dmethod" for dmethod in (:cholesky, :qr) gm7p = fit(GeneralizedLinearModel, @formula(round(Postwt) ~ 1 + Prewt + Treat), anorexia, - Poisson(), LogLink(), offset=log.(anorexia.Prewt), rtol=1e-8) + Poisson(), LogLink(), method=dmethod, offset=log.(anorexia.Prewt), rtol=1e-8) @test GLM.cancancel(gm7p.model.rr) test_show(gm7p) @@ -434,9 +504,9 @@ end [0.2091138392, 0.0025136984, 0.0297381842, 0.0324618795] end -@testset "Poisson LogLink offset with weights" begin +@testset "Poisson LogLink offset with weights with $dmethod" for dmethod in (:cholesky, :qr) gm7pw = fit(GeneralizedLinearModel, @formula(round(Postwt) ~ 1 + Prewt + Treat), anorexia, - Poisson(), LogLink(), offset=log.(anorexia.Prewt), + Poisson(), LogLink(), method=dmethod, offset=log.(anorexia.Prewt), wts=repeat(1:4, outer=18), rtol=1e-8) @test GLM.cancancel(gm7pw.model.rr) @@ -455,10 +525,10 @@ end clotting = DataFrame(u = log.([5,10,15,20,30,40,60,80,100]), lot1 = [118,58,42,35,27,25,21,19,18]) -@testset "Gamma" begin - gm8 = fit(GeneralizedLinearModel, @formula(lot1 ~ 1 + u), clotting, Gamma()) - @test !GLM.cancancel(gm8.model.rr) - @test isa(GLM.Link(gm8.model), InverseLink) +@testset "Gamma with $dmethod" for dmethod in (:cholesky, :qr) + gm8 = fit(GeneralizedLinearModel, @formula(lot1 ~ 1 + u), clotting, Gamma(); method=dmethod) + @test !GLM.cancancel(gm8.rr) + @test isa(GLM.Link(gm8), InverseLink) test_show(gm8) @test dof(gm8) == 3 @test isapprox(deviance(gm8), 0.016729715178484157) @@ -473,10 +543,11 @@ clotting = DataFrame(u = log.([5,10,15,20,30,40,60,80,100]), @test isapprox(stderror(gm8), [0.00092754223, 0.000414957683], atol=1e-6) end -@testset "InverseGaussian" begin - gm8a = fit(GeneralizedLinearModel, @formula(lot1 ~ 1 + u), clotting, InverseGaussian()) - @test !GLM.cancancel(gm8a.model.rr) - @test isa(GLM.Link(gm8a.model), InverseSquareLink) +@testset "InverseGaussian with $dmethod" for dmethod in (:cholesky, :qr) + gm8a = fit(GeneralizedLinearModel, @formula(lot1 ~ 1 + u), clotting, InverseGaussian(); + method=dmethod) + @test !GLM.cancancel(gm8a.rr) + @test isa(GLM.Link(gm8a), InverseSquareLink) test_show(gm8a) @test dof(gm8a) == 3 @test isapprox(deviance(gm8a), 0.006931128347234519) @@ -491,10 +562,10 @@ end @test isapprox(stderror(gm8a), [0.0001675339726910311,9.468485015919463e-5], atol=1e-6) end -@testset "Gamma LogLink" begin +@testset "Gamma LogLink with $dmethod" for dmethod in (:cholesky, :qr) gm9 = fit(GeneralizedLinearModel, @formula(lot1 ~ 1 + u), clotting, Gamma(), LogLink(), - rtol=1e-8, atol=0.0) - @test !GLM.cancancel(gm9.model.rr) + method=dmethod, rtol=1e-8, atol=0.0) + @test !GLM.cancancel(gm9.rr) test_show(gm9) @test dof(gm9) == 3 @test deviance(gm9) ≈ 0.16260829451739 @@ -509,10 +580,10 @@ end @test stderror(gm9) ≈ [0.19030107482720, 0.05530784660144] end -@testset "Gamma IdentityLink" begin - gm10 = fit(GeneralizedLinearModel, @formula(lot1 ~ 1 + u), clotting, Gamma(), IdentityLink(), - rtol=1e-8, atol=0.0) - @test !GLM.cancancel(gm10.model.rr) +@testset "Gamma IdentityLink with $dmethod" for dmethod in (:cholesky, :qr) + gm10 = fit(GeneralizedLinearModel, @formula(lot1 ~ 1 + u), clotting, Gamma(), IdentityLink(); + method=dmethod, rtol=1e-8, atol=0.0) + @test !GLM.cancancel(gm10.rr) test_show(gm10) @test dof(gm10) == 3 @test isapprox(deviance(gm10), 0.60845414895344) @@ -532,10 +603,10 @@ admit_agr = DataFrame(count = [28., 97, 93, 55, 33, 54, 28, 12], admit = repeat([false, true], inner=[4]), rank = categorical(repeat(1:4, outer=2))) -@testset "Aggregated Binomial LogitLink" begin +@testset "Aggregated Binomial LogitLink with $dmethod" for dmethod in (:cholesky, :qr) for distr in (Binomial, Bernoulli) - gm14 = fit(GeneralizedLinearModel, @formula(admit ~ 1 + rank), admit_agr, distr(), - wts=Array(admit_agr.count)) + gm14 = fit(GeneralizedLinearModel, @formula(admit ~ 1 + rank), admit_agr, distr(); + method=dmethod, wts=Array(admit_agr.count)) @test dof(gm14) == 4 @test nobs(gm14) == 400 @test isapprox(deviance(gm14), 474.9667184280627) @@ -556,9 +627,9 @@ admit_agr2 = DataFrame(Any[[61., 151, 121, 67], [33., 54, 28, 12], categorical(1 admit_agr2.p = admit_agr2.admit ./ admit_agr2.count ## The model matrix here is singular so tests like the deviance are just round off error -@testset "Binomial LogitLink aggregated" begin +@testset "Binomial LogitLink aggregated with $dmethod" for dmethod in (:cholesky, :qr) gm15 = fit(GeneralizedLinearModel, @formula(p ~ rank), admit_agr2, Binomial(), - wts=admit_agr2.count) + method=dmethod, wts=admit_agr2.count) test_show(gm15) @test dof(gm15) == 4 @test nobs(gm15) == 400 @@ -573,9 +644,9 @@ admit_agr2.p = admit_agr2.admit ./ admit_agr2.count end # Weighted Gamma example (weights are totally made up) -@testset "Gamma InverseLink Weights" begin +@testset "Gamma InverseLink Weights with $dmethod" for dmethod in (:cholesky, :qr) gm16 = fit(GeneralizedLinearModel, @formula(lot1 ~ 1 + u), clotting, Gamma(), - wts=[1.5,2.0,1.1,4.5,2.4,3.5,5.6,5.4,6.7]) + method=dmethod, wts=[1.5,2.0,1.1,4.5,2.4,3.5,5.6,5.4,6.7]) test_show(gm16) @test dof(gm16) == 3 @test nobs(gm16) == 32.7 @@ -590,9 +661,9 @@ end end # Weighted Poisson example (weights are totally made up) -@testset "Poisson LogLink Weights" begin - gm17 = fit(GeneralizedLinearModel, @formula(Counts ~ Outcome + Treatment), dobson, Poisson(), - wts = [1.5,2.0,1.1,4.5,2.4,3.5,5.6,5.4,6.7]) +@testset "Poisson LogLink Weights with $dmethod" for dmethod in (:cholesky, :qr) + gm17 = glm(@formula(Counts ~ Outcome + Treatment), dobson, Poisson(), + method=dmethod, wts = [1.5,2.0,1.1,4.5,2.4,3.5,5.6,5.4,6.7]) test_show(gm17) @test dof(gm17) == 5 @test isapprox(deviance(gm17), 17.699857821414266) @@ -603,14 +674,15 @@ end @test isapprox(aicc(gm17), 181.39578038136298) @test isapprox(bic(gm17), 186.5854647596431) @test isapprox(coef(gm17), [3.1218557035404793, -0.5270435906931427,-0.40300384148562746, - -0.017850203824417415,-0.03507851122782909]) + -0.017850203824417415,-0.03507851122782909]) end # "quine" dataset discussed in Section 7.4 of "Modern Applied Statistics with S" quine = dataset("MASS", "quine") -@testset "NegativeBinomial LogLink Fixed θ" begin - gm18 = fit(GeneralizedLinearModel, @formula(Days ~ Eth+Sex+Age+Lrn), quine, NegativeBinomial(2.0), LogLink()) - @test !GLM.cancancel(gm18.model.rr) +@testset "NegativeBinomial LogLink Fixed θ with $dmethod" for dmethod in (:cholesky, :qr) + gm18 = fit(GeneralizedLinearModel, @formula(Days ~ Eth+Sex+Age+Lrn), quine, + NegativeBinomial(2.0), LogLink(), method=dmethod) + @test !GLM.cancancel(gm18.rr) test_show(gm18) @test dof(gm18) == 8 @test isapprox(deviance(gm18), 239.11105911824325, rtol = 1e-7) @@ -640,11 +712,11 @@ end @test isapprox(bic(gm19), 1146.96262250587) @test isapprox(coef(gm19)[1:7], [-0.12737182842213654, -0.055871700989224705, 0.01561618806384601, - -0.041113722732799125, 0.024042387142113462, 0.04400234618798099, 0.035765875508382027, -]) + -0.041113722732799125, 0.024042387142113462, 0.04400234618798099, + 0.035765875508382027]) end -@testset "NegativeBinomial LogLink, θ to be estimated" begin +@testset "NegativeBinomial LogLink, θ to be estimated with Cholesky" begin gm20 = negbin(@formula(Days ~ Eth+Sex+Age+Lrn), quine, LogLink()) test_show(gm20) @test dof(gm20) == 8 @@ -679,9 +751,25 @@ end end end -@testset "NegativeBinomial NegativeBinomialLink, θ to be estimated" begin +@testset "NegativeBinomial LogLink, θ to be estimated with QR" begin + gm20 = negbin(@formula(Days ~ Eth+Sex+Age+Lrn), quine, LogLink(); method=:qr) + test_show(gm20) + @test dof(gm20) == 8 + @test isapprox(deviance(gm20), 167.9518430624193, rtol = 1e-7) + @test isapprox(nulldeviance(gm20), 195.28668602703388, rtol = 1e-7) + @test isapprox(loglikelihood(gm20), -546.57550938017, rtol = 1e-7) + @test isapprox(nullloglikelihood(gm20), -560.2429308624774, rtol = 1e-7) + @test isapprox(aic(gm20), 1109.15101876034) + @test isapprox(aicc(gm20), 1110.202113650851) + @test isapprox(bic(gm20), 1133.0198717340068) + @test isapprox(coef(gm20)[1:7], + [2.894527697811509, -0.5693411448715979, 0.08238813087070128, -0.4484636623590206, + 0.08805060372902418, 0.3569553124412582, 0.2921383118842893]) +end + +@testset "NegativeBinomial NegativeBinomialLink, θ to be estimated with $dmethod" for dmethod in (:cholesky, :qr) # the default/canonical link is NegativeBinomialLink - gm21 = negbin(@formula(Days ~ Eth+Sex+Age+Lrn), quine) + gm21 = negbin(@formula(Days ~ Eth+Sex+Age+Lrn), quine; method=dmethod) test_show(gm21) @test dof(gm21) == 8 @test isapprox(deviance(gm21), 168.0465485656672, rtol = 1e-7) @@ -696,9 +784,10 @@ end 0.01582155341041012, 0.029074956147127032, 0.023628812427424876]) end -@testset "Geometric LogLink" begin +@testset "Geometric LogLink with $dmethod" for dmethod in (:cholesky, :qr) # the default/canonical link is LogLink - gm22 = fit(GeneralizedLinearModel, @formula(Days ~ Eth + Sex + Age + Lrn), quine, Geometric()) + gm22 = glm(@formula(Days ~ Eth + Sex + Age + Lrn), quine, Geometric(); + method=dmethod) test_show(gm22) @test dof(gm22) == 8 @test deviance(gm22) ≈ 137.8781581814965 @@ -714,9 +803,11 @@ end 0.18553339017028742] end -@testset "Geometric is a special case of NegativeBinomial with θ = 1" begin - gm23 = glm(@formula(Days ~ Eth + Sex + Age + Lrn), quine, Geometric(), InverseLink()) - gm24 = glm(@formula(Days ~ Eth + Sex + Age + Lrn), quine, NegativeBinomial(1), InverseLink()) +@testset "Geometric is a special case of NegativeBinomial with θ = 1 and $dmethod" for dmethod in (:cholesky, :qr) + gm23 = glm(@formula(Days ~ Eth + Sex + Age + Lrn), quine, Geometric(), + InverseLink(); method=dmethod) + gm24 = glm(@formula(Days ~ Eth + Sex + Age + Lrn), quine, NegativeBinomial(1), + InverseLink(); method=dmethod) @test coef(gm23) ≈ coef(gm24) @test stderror(gm23) ≈ stderror(gm24) @test confint(gm23) ≈ confint(gm24) @@ -729,7 +820,7 @@ end @test predict(gm23) ≈ predict(gm24) end -@testset "GLM with no intercept" begin +@testset "GLM with no intercept with Cholesky" begin # Gamma with single numeric predictor nointglm1 = fit(GeneralizedLinearModel, @formula(lot1 ~ 0 + u), clotting, Gamma()) @test !hasintercept(nointglm1.model) @@ -784,13 +875,68 @@ end [0.0015910084415445974, 0.13185097176418983, 0.13016395889443858, 0.1336778089431681] end +@testset "GLM with no intercept with $dmethod" for dmethod in (:cholesky, :qr) + # Gamma with single numeric predictor + nointglm1 = glm(@formula(lot1 ~ 0 + u), clotting, Gamma(); method=dmethod) + @test !hasintercept(nointglm1) + @test !GLM.cancancel(nointglm1.rr) + @test isa(GLM.Link(nointglm1), InverseLink) + test_show(nointglm1) + @test dof(nointglm1) == 2 + @test deviance(nointglm1) ≈ 0.6629903395245351 + @test isnan(nulldeviance(nointglm1)) + @test loglikelihood(nointglm1) ≈ -32.60688972888763 + @test_throws DomainError nullloglikelihood(nointglm1) + @test aic(nointglm1) ≈ 69.21377945777526 + @test aicc(nointglm1) ≈ 71.21377945777526 + @test bic(nointglm1) ≈ 69.6082286124477 + @test coef(nointglm1) ≈ [0.009200201253724151] + @test GLM.dispersion(nointglm1, true) ≈ 0.10198331431820506 + @test stderror(nointglm1) ≈ [0.000979309363228589] + + # Bernoulli with numeric predictors + nointglm2 = glm(@formula(admit ~ 0 + gre + gpa), admit, Bernoulli(); method=dmethod) + @test !hasintercept(nointglm2) + @test GLM.cancancel(nointglm2.rr) + test_show(nointglm2) + @test dof(nointglm2) == 2 + @test deviance(nointglm2) ≈ 503.5584368354113 + @test nulldeviance(nointglm2) ≈ 554.5177444479574 + @test loglikelihood(nointglm2) ≈ -251.77921841770578 + @test nullloglikelihood(nointglm2) ≈ -277.2588722239787 + @test aic(nointglm2) ≈ 507.55843683541156 + @test aicc(nointglm2) ≈ 507.58866353566344 + @test bic(nointglm2) ≈ 515.5413659296275 + @test coef(nointglm2) ≈ [0.0015622695743609228, -0.4822556276412118] + @test stderror(nointglm2) ≈ [0.000987218133602179, 0.17522675354523715] + + # Poisson with categorical predictors, weights and offset + nointglm3 = fit(GeneralizedLinearModel, @formula(round(Postwt) ~ 0 + Prewt + Treat), anorexia, + Poisson(), LogLink(); method=dmethod, offset=log.(anorexia.Prewt), + wts=repeat(1:4, outer=18), rtol=1e-8, dropcollinear=false) + @test !hasintercept(nointglm3) + @test GLM.cancancel(nointglm3.rr) + test_show(nointglm3) + @test deviance(nointglm3) ≈ 90.17048668870225 + @test nulldeviance(nointglm3) ≈ 159.32999067102548 + @test loglikelihood(nointglm3) ≈ -610.3058020030296 + @test nullloglikelihood(nointglm3) ≈ -644.885553994191 + @test aic(nointglm3) ≈ 1228.6116040060592 + @test aicc(nointglm3) ≈ 1228.8401754346307 + @test bic(nointglm3) ≈ 1241.38343140962 + @test coef(nointglm3) ≈ + [-0.007008396492196935, 0.6038154674863438, 0.5654250124481003, 0.6931599989992452] + @test stderror(nointglm3) ≈ + [0.0015910084415445974, 0.13185097176418983, 0.13016395889443858, 0.1336778089431681] +end + @testset "Sparse GLM" begin rng = StableRNG(1) X = sprand(rng, 1000, 10, 0.01) β = randn(rng, 10) y = Bool[rand(rng) < logistic(x) for x in X * β] - gmsparse = fit(GeneralizedLinearModel, X, y, Binomial()) - gmdense = fit(GeneralizedLinearModel, Matrix(X), y, Binomial()) + gmsparse = fit(GeneralizedLinearModel, X, y, Binomial(); method=:cholesky) + gmdense = fit(GeneralizedLinearModel, Matrix(X), y, Binomial(); method=:cholesky) @test isapprox(deviance(gmsparse), deviance(gmdense)) @test isapprox(coef(gmsparse), coef(gmdense)) @@ -814,12 +960,13 @@ end end end -@testset "Predict" begin +@testset "Predict with $dmethod" for dmethod in (:cholesky, :qr) + # Binomial GLM rng = StableRNG(123) X = rand(rng, 10, 2) Y = logistic.(X * [3; -3]) - gm11 = fit(GeneralizedLinearModel, X, Y, Binomial()) + gm11 = fit(GeneralizedLinearModel, X, Y, Binomial(); method=dmethod) @test isapprox(predict(gm11), Y) @test predict(gm11) == fitted(gm11) @@ -851,7 +998,7 @@ end @test_throws ArgumentError predict(gm11, newX, offset=newoff) - gm12 = fit(GeneralizedLinearModel, X, Y, Binomial(), offset=off) + gm12 = fit(GeneralizedLinearModel, X, Y, Binomial(); method=dmethod, offset=off) @test_throws ArgumentError predict(gm12, newX) @test isapprox(predict(gm12, newX, offset=newoff), logistic.(newX * coef(gm12) .+ newoff)) @@ -860,15 +1007,62 @@ end d = DataFrame(X, :auto) d.y = Y - gm13 = fit(GeneralizedLinearModel, @formula(y ~ 0 + x1 + x2), d, Binomial()) - @test predict(gm13) ≈ predict(gm13, d[:,[:x1, :x2]]) - @test predict(gm13) ≈ predict(gm13, d) + gm13 = fit(GeneralizedLinearModel, @formula(y ~ 0 + x1 + x2), d, Binomial(); + method=dmethod) + @test predict(gm13) ≈ predict(gm13, d[:,[:x1, :x2]]) == predict(gm13, X) + @test predict(gm13) ≈ predict(gm13, d) == predict(gm13, X) + @test predict(gm13) ≈ predict(gm11) + @test predict(gm13, newX; interval=:confidence, interval_method=:delta) == + predict(gm11, newX; interval=:confidence, interval_method=:delta) + @test predict(gm13, newX; interval=:confidence, interval_method=:transformation) == + predict(gm11, newX; interval=:confidence, interval_method=:transformation) newd = DataFrame(newX, :auto) predict(gm13, newd) + + # Prediction from DataFrames with missing values + drep = d[[1, 2, 3, 3, 4, 5, 6, 7, 8, 8, 9, 10], :] + dm = allowmissing(drep) + dm.x1[3] = missing + dm.y[9] = missing + + gm13m = fit(GeneralizedLinearModel, @formula(y ~ 0 + x1 + x2), dm, Binomial(); + method=dmethod) + @test predict(gm13m) == predict(gm13) + @test predict(gm13m, d) == predict(gm13, d) + @test isequal(predict(gm13m, dm), predict(gm13, dm)) + gm13m_pred2 = predict(gm13m, dm; interval=:confidence, interval_method=:delta) + gm13m_pred3 = predict(gm13m, dm; interval=:confidence, interval_method=:transformation) + expected_pred = allowmissing(predict(gm13m, drep)) + expected_pred[3] = missing + @test collect(skipmissing(predict(gm13m, dm))) ≈ + collect(skipmissing(predict(gm13, dm))) ≈ + collect(skipmissing(gm13m_pred2.prediction)) ≈ + collect(skipmissing(gm13m_pred3.prediction)) ≈ + collect(skipmissing(expected_pred)) + @test ismissing.(predict(gm13m, dm)) == + ismissing.(predict(gm13, dm)) == + ismissing.(gm13m_pred2.prediction) == + ismissing.(gm13m_pred3.prediction) == + ismissing.(expected_pred) + expected_lower = + allowmissing(predict(gm13m, drep; + interval=:confidence, interval_method=:delta).lower) + expected_lower[3] = missing + @test collect(skipmissing(gm13m_pred2.lower)) ≈ collect(skipmissing(expected_lower)) + @test ismissing.(gm13m_pred2.lower) == ismissing.(expected_lower) + expected_upper = + allowmissing(predict(gm13m, drep; + interval=:confidence, interval_method=:delta).upper) + expected_upper[3] = missing + @test collect(skipmissing(gm13m_pred2.upper)) ≈ collect(skipmissing(expected_upper)) + @test ismissing.(gm13m_pred2.upper) == ismissing.(expected_upper) + + + # Linear Model Ylm = X * [0.8, 1.6] + 0.8randn(rng, 10) - mm = fit(LinearModel, X, Ylm) + mm = fit(LinearModel, X, Ylm; method=dmethod) pred1 = predict(mm, newX) pred2 = predict(mm, newX, interval=:confidence) se_pred = sqrt.(diag(newX*vcov(mm)*newX')) @@ -894,13 +1088,95 @@ end @test pred3.upper ≈ pred3.prediction + quantile(TDist(dof_residual(mm)), 0.975)*sqrt.(diag(newX*vcov(mm)*newX') .+ deviance(mm)/dof_residual(mm)) ≈ [3.9288331595737196, 4.077092463922373, 4.762903743958081, 3.82184595169028, 4.034521019386702] + @test predict!(similar(Y, size(newX, 1)), mm, newX) == predict(mm, newX) + @test predict!((prediction=similar(Y, size(newX, 1)), + lower=similar(Y, size(newX, 1)), + upper=similar(Y, size(newX, 1))), + mm, newX, interval=:confidence) == + predict(mm, newX, interval=:confidence) + @test predict!((prediction=similar(Y, size(newX, 1)), + lower=similar(Y, size(newX, 1)), + upper=similar(Y, size(newX, 1))), + mm, newX, interval=:prediction) == + predict(mm, newX, interval=:prediction) + @test_throws ArgumentError predict!((prediction=similar(Y, size(newX, 1)), + lower=similar(Y, size(newX, 1)), + upper=similar(Y, size(newX, 1))), mm, newX) + @test_throws ArgumentError predict!(similar(Y, size(newX, 1)), mm, newX, + interval=:confidence) + @test_throws ArgumentError predict!(similar(Y, size(newX, 1)), mm, newX, + interval=:prediction) + @test_throws DimensionMismatch predict!([1], mm, newX) + @test_throws DimensionMismatch predict!((prediction=similar(Y, size(newX, 1)), + lower=similar(Y, size(newX, 1)), + upper=[1]), + mm, newX, interval=:confidence) + + + # Prediction from DataFrames + d = DataFrame(X, :auto) + d.y = Ylm + + mmd = lm(@formula(y ~ 0 + x1 + x2), d; method=dmethod) + @test predict(mmd) ≈ predict(mmd, d[:,[:x1, :x2]]) == predict(mm, X) + @test predict(mmd) ≈ predict(mmd, d) == predict(mm, X) + @test predict(mmd) ≈ predict(mm) + @test predict(mmd, newX; interval=:confidence) == + predict(mm, newX; interval=:confidence) + @test predict(mmd, newX; interval=:prediction) == + predict(mm, newX; interval=:prediction) + + newd = DataFrame(newX, :auto) + @test predict(mmd, newd) == predict(mm, newX) + @test predict(mmd, newd; interval=:confidence) == + predict(mm, newX; interval=:confidence) + @test predict(mmd, newd; interval=:prediction) == + predict(mm, newX; interval=:prediction) + + + # Prediction from DataFrames with missing values + drep = d[[1, 2, 3, 3, 4, 5, 6, 7, 8, 8, 9, 10], :] + dm = allowmissing(drep) + dm.x1[3] = missing + dm.y[9] = missing + + mmdm = lm(@formula(y ~ 0 + x1 + x2), dm; method=dmethod) + @test predict(mmdm) == predict(mmd) + @test predict(mmdm, d) == predict(mmd, d) + @test isequal(predict(mmdm, dm), predict(mmd, dm)) + mmdm_pred2 = predict(mmdm, dm; interval=:confidence) + mmdm_pred3 = predict(mmdm, dm; interval=:prediction) + expected_pred = allowmissing(predict(mmdm, drep)) + expected_pred[3] = missing + @test collect(skipmissing(predict(mmdm, dm))) ≈ + collect(skipmissing(predict(mmd, dm))) ≈ + collect(skipmissing(mmdm_pred2.prediction)) ≈ + collect(skipmissing(mmdm_pred3.prediction)) ≈ + collect(skipmissing(expected_pred)) + @test ismissing.(predict(mmdm, dm)) == + ismissing.(predict(mmdm, dm)) == + ismissing.(mmdm_pred2.prediction) == + ismissing.(mmdm_pred3.prediction) == + ismissing.(expected_pred) + expected_lower = + allowmissing(predict(mmdm, drep; interval=:confidence).lower) + expected_lower[3] = missing + @test collect(skipmissing(mmdm_pred2.lower)) ≈ collect(skipmissing(expected_lower)) + @test ismissing.(mmdm_pred2.lower) == ismissing.(expected_lower) + expected_upper = + allowmissing(predict(mmdm, drep; interval=:confidence).upper) + expected_upper[3] = missing + @test collect(skipmissing(mmdm_pred2.upper)) ≈ collect(skipmissing(expected_upper)) + @test ismissing.(mmdm_pred2.upper) == ismissing.(expected_upper) + + # Prediction with dropcollinear (#409) x = [1.0 1.0 1.0 2.0 1.0 -1.0] y = [1.0, 3.0, -2.0] - m1 = lm(x, y, dropcollinear=true) - m2 = lm(x, y, dropcollinear=false) + m1 = lm(x, y, dropcollinear=true, method=dmethod) + m2 = lm(x, y, dropcollinear=false, method=dmethod) p1 = predict(m1, x, interval=:confidence) p2 = predict(m2, x, interval=:confidence) @@ -915,8 +1191,8 @@ end 1.0 -1000.0 4.6 1.0 5000 2.4] y = [1.0, 3.0, -2.0, 4.5] - m1 = lm(x, y, dropcollinear=true) - m2 = lm(x, y, dropcollinear=false) + m1 = lm(x, y, dropcollinear=true, method=dmethod) + m2 = lm(x, y, dropcollinear=false, method=dmethod) p1 = predict(m1, x, interval=:confidence) p2 = predict(m2, x, interval=:confidence) @@ -934,8 +1210,8 @@ end 1.0 2.0 3.0 1.0 -1.0 0.0] y = [1.0, 3.0, -2.0] - m1 = lm(x, y) - m2 = lm(x[:, 1:2], y) + m1 = lm(x, y, method=dmethod) + m2 = lm(x[:, 1:2], y, method=dmethod) @test predict(m1) ≈ predict(m2) @test_broken predict(m1, interval=:confidence) ≈ @@ -946,10 +1222,10 @@ end @test_throws ArgumentError predict(m1, x, interval=:prediction) end -@testset "GLM confidence intervals" begin +@testset "GLM confidence intervals with $dmethod" for dmethod in (:cholesky, :qr) X = [fill(1,50) range(0,1, length=50)] Y = vec([0 0 0 1 0 1 1 0 0 0 0 0 0 0 1 0 1 1 0 1 1 0 1 0 0 1 1 1 0 1 1 1 1 1 0 1 0 1 1 1 0 1 1 1 1 1 1 1 1 1]) - gm = fit(GeneralizedLinearModel, X, Y, Binomial()) + gm = fit(GeneralizedLinearModel, X, Y, Binomial(); method=dmethod) newX = [fill(1,5) [0.0000000, 0.2405063, 0.4936709, 0.7468354, 1.0000000]] @@ -972,7 +1248,7 @@ end @test_throws ArgumentError predict(gm, newX, interval=:undefined) end -@testset "F test comparing to null model" begin +@testset "F test with $dmethod comparing to null model" for dmethod in (:cholesky, :qr) d = DataFrame(Treatment=[1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2.], Result=[1.1, 1.2, 1, 2.2, 1.9, 2, .9, 1, 1, 2.2, 2, 2], Other=categorical([1, 1, 2, 1, 2, 1, 3, 1, 1, 2, 2, 1])) @@ -1228,7 +1504,7 @@ end @testset "Issue 153" begin X = [ones(10) randn(10)] - Test.@inferred cholesky(GLM.DensePredQR{Float64}(X)) + Test.@inferred cholesky(GLM.DensePredQR(X)) end @testset "Issue 224" begin @@ -1300,54 +1576,54 @@ end @test hasintercept(secondcolinterceptmod) end -@testset "Views" begin +@testset "Views with $dmethod method" for dmethod in (:cholesky, :qr) @testset "#444" begin X = randn(10, 2) y = X*ones(2) + randn(10) - @test coef(glm(X, y, Normal(), IdentityLink())) == - coef(glm(view(X, 1:10, :), view(y, 1:10), Normal(), IdentityLink())) + @test coef(glm(X, y, Normal(), IdentityLink(), method=dmethod)) == + coef(glm(view(X, 1:10, :), view(y, 1:10), Normal(), IdentityLink(); method=dmethod)) x, y, w = rand(100, 2), rand(100), rand(100) - lm1 = lm(x, y) - lm2 = lm(x, view(y, :)) - lm3 = lm(view(x, :, :), y) - lm4 = lm(view(x, :, :), view(y, :)) + lm1 = lm(x, y; method=dmethod) + lm2 = lm(x, view(y, :); method=dmethod) + lm3 = lm(view(x, :, :), y; method=dmethod) + lm4 = lm(view(x, :, :), view(y, :); method=dmethod) @test coef(lm1) == coef(lm2) == coef(lm3) == coef(lm4) - lm5 = lm(x, y, wts=w) - lm6 = lm(x, view(y, :), wts=w) - lm7 = lm(view(x, :, :), y, wts=w) - lm8 = lm(view(x, :, :), view(y, :), wts=w) - lm9 = lm(x, y, wts=view(w, :)) - lm10 = lm(x, view(y, :), wts=view(w, :)) - lm11 = lm(view(x, :, :), y, wts=view(w, :)) - lm12 = lm(view(x, :, :), view(y, :), wts=view(w, :)) + lm5 = lm(x, y, wts=w, method=dmethod) + lm6 = lm(x, view(y, :), method=dmethod, wts=w) + lm7 = lm(view(x, :, :), y, method=dmethod, wts=w) + lm8 = lm(view(x, :, :), view(y, :), method=dmethod, wts=w) + lm9 = lm(x, y, method=dmethod, wts=view(w, :)) + lm10 = lm(x, view(y, :), method=dmethod, wts=view(w, :)) + lm11 = lm(view(x, :, :), y, method=dmethod, wts=view(w, :)) + lm12 = lm(view(x, :, :), view(y, :), method=dmethod, wts=view(w, :)) @test coef(lm5) == coef(lm6) == coef(lm7) == coef(lm8) == coef(lm9) == coef(lm10) == coef(lm11) == coef(lm12) x, y, w = rand(100, 2), rand(Bool, 100), rand(100) - glm1 = glm(x, y, Binomial()) - glm2 = glm(x, view(y, :), Binomial()) - glm3 = glm(view(x, :, :), y, Binomial()) - glm4 = glm(view(x, :, :), view(y, :), Binomial()) + glm1 = glm(x, y, Binomial(), method=dmethod) + glm2 = glm(x, view(y, :), Binomial(), method=dmethod) + glm3 = glm(view(x, :, :), y, Binomial(), method=dmethod) + glm4 = glm(view(x, :, :), view(y, :), Binomial(), method=dmethod) @test coef(glm1) == coef(glm2) == coef(glm3) == coef(glm4) - glm5 = glm(x, y, Binomial(), wts=w) - glm6 = glm(x, view(y, :), Binomial(), wts=w) - glm7 = glm(view(x, :, :), y, Binomial(), wts=w) - glm8 = glm(view(x, :, :), view(y, :), Binomial(), wts=w) - glm9 = glm(x, y, Binomial(), wts=view(w, :)) - glm10 = glm(x, view(y, :), Binomial(), wts=view(w, :)) - glm11 = glm(view(x, :, :), y, Binomial(), wts=view(w, :)) - glm12 = glm(view(x, :, :), view(y, :), Binomial(), wts=view(w, :)) + glm5 = glm(x, y, Binomial(), method=dmethod, wts=w) + glm6 = glm(x, view(y, :), Binomial(), method=dmethod, wts=w) + glm7 = glm(view(x, :, :), y, Binomial(), method=dmethod, wts=w) + glm8 = glm(view(x, :, :), view(y, :), Binomial(), method=dmethod, wts=w) + glm9 = glm(x, y, Binomial(), method=dmethod, wts=view(w, :)) + glm10 = glm(x, view(y, :), Binomial(), method=dmethod, wts=view(w, :)) + glm11 = glm(view(x, :, :), y, Binomial(), method=dmethod, wts=view(w, :)) + glm12 = glm(view(x, :, :), view(y, :), Binomial(), method=dmethod, wts=view(w, :)) @test coef(glm5) == coef(glm6) == coef(glm7) == coef(glm8) == coef(glm9) == coef(glm10) == coef(glm11) == coef(glm12) end @testset "Views: #213, #470" begin xs = randn(46, 3) ys = randn(46) - glm_dense = lm(xs, ys) - glm_views = lm(@view(xs[1:end, 1:end]), ys) + glm_dense = lm(xs, ys; method=dmethod) + glm_views = lm(@view(xs[1:end, 1:end]), ys; method=dmethod) @test coef(glm_dense) == coef(glm_views) rows = 1:2:size(xs,1) cols = 1:2:size(xs,2) @@ -1355,14 +1631,14 @@ end xs_altview = @view xs[rows, cols] ys_altcopy = ys[rows] ys_altview = @view ys[rows] - glm_dense_alt = lm(xs_altcopy, ys_altcopy) - glm_views_alt = lm(xs_altview, ys_altview) + glm_dense_alt = lm(xs_altcopy, ys_altcopy; method=dmethod) + glm_views_alt = lm(xs_altview, ys_altview; method=dmethod) # exact equality fails in the final decimal digit for Julia 1.9 @test coef(glm_dense_alt) ≈ coef(glm_views_alt) end end -@testset "PowerLink" begin +@testset "PowerLink with $dmethod" for dmethod in (:cholesky, :qr) @testset "Functions related to PowerLink" begin @test GLM.linkfun(IdentityLink(), 10) ≈ GLM.linkfun(PowerLink(1), 10) @test GLM.linkfun(SqrtLink(), 10) ≈ GLM.linkfun(PowerLink(0.5), 10) @@ -1392,7 +1668,8 @@ end end trees = dataset("datasets", "trees") @testset "GLM with PowerLink" begin - mdl = glm(@formula(Volume ~ Height + Girth), trees, Normal(), PowerLink(1 / 3); rtol=1.0e-12, atol=1.0e-12) + mdl = glm(@formula(Volume ~ Height + Girth), trees, Normal(), PowerLink(1 / 3); + method=dmethod, rtol=1.0e-12, atol=1.0e-12) @test coef(mdl) ≈ [-0.05132238692134761, 0.01428684676273272, 0.15033126098228242] @test stderror(mdl) ≈ [0.224095414423756, 0.003342439119757, 0.005838227761632] atol=1.0e-8 @test dof(mdl) == 4 @@ -1403,8 +1680,10 @@ end @test predict(mdl)[1] ≈ 10.59735275421753 end @testset "Compare PowerLink(0) and LogLink" begin - mdl1 = glm(@formula(Volume ~ Height + Girth), trees, Normal(), PowerLink(0)) - mdl2 = glm(@formula(Volume ~ Height + Girth), trees, Normal(), LogLink()) + mdl1 = glm(@formula(Volume ~ Height + Girth), trees, Normal(), PowerLink(0); + method=dmethod) + mdl2 = glm(@formula(Volume ~ Height + Girth), trees, Normal(), LogLink(); + method=dmethod) @test coef(mdl1) ≈ coef(mdl2) @test stderror(mdl1) ≈ stderror(mdl2) @test dof(mdl1) == dof(mdl2) @@ -1417,8 +1696,10 @@ end @test predict(mdl1) ≈ predict(mdl2) end @testset "Compare PowerLink(0.5) and SqrtLink" begin - mdl1 = glm(@formula(Volume ~ Height + Girth), trees, Normal(), PowerLink(0.5)) - mdl2 = glm(@formula(Volume ~ Height + Girth), trees, Normal(), SqrtLink()) + mdl1 = glm(@formula(Volume ~ Height + Girth), trees, Normal(), PowerLink(0.5); + method=dmethod) + mdl2 = glm(@formula(Volume ~ Height + Girth), trees, Normal(), SqrtLink(); + method=dmethod) @test coef(mdl1) ≈ coef(mdl2) @test stderror(mdl1) ≈ stderror(mdl2) @test dof(mdl1) == dof(mdl2) @@ -1431,8 +1712,10 @@ end @test predict(mdl1) ≈ predict(mdl2) end @testset "Compare PowerLink(1) and IdentityLink" begin - mdl1 = glm(@formula(Volume ~ Height + Girth), trees, Normal(), PowerLink(1)) - mdl2 = glm(@formula(Volume ~ Height + Girth), trees, Normal(), IdentityLink()) + mdl1 = glm(@formula(Volume ~ Height + Girth), trees, Normal(), PowerLink(1); + method=dmethod) + mdl2 = glm(@formula(Volume ~ Height + Girth), trees, Normal(), IdentityLink(); + method=dmethod) @test coef(mdl1) ≈ coef(mdl2) @test stderror(mdl1) ≈ stderror(mdl2) @test dof(mdl1) == dof(mdl2) @@ -1446,7 +1729,88 @@ end end end -@testset "dropcollinear with GLMs" begin +@testset "contrasts argument" begin + # DummyCoding (default) + m = lm(@formula(y~x), (y=1:25, x=repeat(1:5, 5)), + contrasts=Dict(:x=>DummyCoding())) + mcat = lm(@formula(y~x), (y=1:25, x=categorical(repeat(1:5, 5)))) + gm = glm(@formula(y~x), (y=1:25, x=repeat(1:5, 5)), Normal(), IdentityLink(), + contrasts=Dict(:x=>DummyCoding())) + gmcat = glm(@formula(y~x), (y=1:25, x=categorical(repeat(1:5, 5))), + Normal(), IdentityLink()) + @test coef(m) == coef(mcat) ≈ [11, 1, 2, 3, 4] + @test coef(gm) == coef(gmcat) ≈ [11, 1, 2, 3, 4] + + m = fit(LinearModel, @formula(y~x), (y=1:25, x=repeat(1:5, 5)), + contrasts=Dict(:x=>DummyCoding())) + mcat = fit(LinearModel, @formula(y~x), (y=1:25, x=categorical(repeat(1:5, 5)))) + gm = fit(GeneralizedLinearModel, @formula(y~x), + (y=1:25, x=repeat(1:5, 5)), Normal(), IdentityLink(), + contrasts=Dict(:x=>DummyCoding())) + gmcat = fit(GeneralizedLinearModel, @formula(y~x), + (y=1:25, x=categorical(repeat(1:5, 5))), + Normal(), IdentityLink()) + @test coef(m) == coef(mcat) ≈ [11, 1, 2, 3, 4] + @test coef(gm) == coef(gmcat) ≈ [11, 1, 2, 3, 4] + + # EffectsCoding + m = lm(@formula(y~x), (y=1:25, x=repeat(1:5, 5)), + contrasts=Dict(:x=>EffectsCoding())) + gm = glm(@formula(y~x), (y=1:25, x=repeat(1:5, 5)), Normal(), IdentityLink(), + contrasts=Dict(:x=>EffectsCoding())) + @test coef(m) ≈ coef(gm) ≈ [13, -1, 0, 1, 2] + + m = fit(LinearModel, @formula(y~x), (y=1:25, x=repeat(1:5, 5)), + contrasts=Dict(:x=>EffectsCoding())) + gm = fit(GeneralizedLinearModel, @formula(y~x), + (y=1:25, x=repeat(1:5, 5)), Normal(), IdentityLink(), + contrasts=Dict(:x=>EffectsCoding())) + @test coef(m) ≈ coef(gm) ≈ [13, -1, 0, 1, 2] +end + + +@testset "dofit argument" begin + gm1 = glm(@formula(y~x), (y=1:25, x=repeat(1:5, 5)), Normal(), IdentityLink(), + dofit=true) + gm2 = glm(@formula(y~x), (y=1:25, x=repeat(1:5, 5)), Normal(), IdentityLink(), + dofit=false) + @test gm1.fit + @test !gm2.fit + fit!(gm2) + @test gm2.fit + @test coef(gm1) == coef(gm2) ≈ [10, 1] + + # Deprecated + gm1 = fit(GeneralizedLinearModel, @formula(y~x), + (y=1:25, x=repeat(1:5, 5)), Normal(), IdentityLink(), + dofit=true) + gm2 = fit(GeneralizedLinearModel, @formula(y~x), + (y=1:25, x=repeat(1:5, 5)), Normal(), IdentityLink(), + dofit=false) + @test gm1.fit + @test !gm2.fit + fit!(gm2) + @test gm2.fit + @test coef(gm1) == coef(gm2) ≈ [10, 1] +end + +@testset "formula accessor" begin + m = lm(rand(10, 2), rand(10)) + @test_throws ArgumentError formula(m) + + m = glm(rand(10, 2), rand(10), Normal(), IdentityLink()) + @test_throws ArgumentError formula(m) + + df = DataFrame(x=["a", "b", "c"], y=[1, 2, 3]) + m = lm(@formula(y ~ x), df) + @test formula(m)::FormulaTerm === m.formula + + df = DataFrame(x=["a", "b", "c"], y=[1, 2, 3]) + m = glm(@formula(y ~ x), df, Normal(), IdentityLink()) + @test formula(m)::FormulaTerm === m.formula +end + +@testset "dropcollinear in GLM with Cholesky" begin data = DataFrame(x1=[4, 5, 9, 6, 5], x2=[5, 3, 6, 7, 1], x3=[4.2, 4.6, 8.4, 6.2, 4.2], y=[14, 14, 24, 20, 11]) @@ -1602,3 +1966,8 @@ end # 3. 44 / wt == y @test GLM.loglik_obs(Binomial(), y, μ, wt, ϕ) ≈ GLM.logpdf(Binomial(Int(wt), μ), 44) end + +@testset "GLM with wrong option value in method argument" begin + @test_throws ArgumentError lm(@formula(OptDen ~ Carb), form; method=:pr) + @test_throws ArgumentError glm(@formula(OptDen ~ Carb), form, Normal(); method=:pr) +end