diff --git a/src/GLM.jl b/src/GLM.jl index 019f80e3..8625a48b 100644 --- a/src/GLM.jl +++ b/src/GLM.jl @@ -12,14 +12,14 @@ module GLM import Statistics: cor import StatsBase: coef, coeftable, confint, deviance, nulldeviance, dof, dof_residual, loglikelihood, nullloglikelihood, nobs, stderror, vcov, residuals, predict, - fitted, fit, model_response, response, modelmatrix, r2, r², adjr2, adjr², PValue + fitted, fit, model_response, response, modelmatrix, r2, r², adjr2, adjr², PValue, weights import StatsFuns: xlogy import SpecialFunctions: erfc, erfcinv, digamma, trigamma import StatsModels: hasintercept export coef, coeftable, confint, deviance, nulldeviance, dof, dof_residual, loglikelihood, nullloglikelihood, nobs, stderror, vcov, residuals, predict, fitted, fit, fit!, model_response, response, modelmatrix, r2, r², adjr2, adjr², - cooksdistance, hasintercept + cooksdistance, hasintercept, weights, AnalyticWeights, ProbabilityWeights, FrequencyWeights export # types @@ -52,17 +52,17 @@ module GLM LinearModel, # functions - canonicallink, # canonical link function for a distribution - deviance, # deviance of fitted and observed responses - devresid, # vector of squared deviance residuals - formula, # extract the formula from a model - glm, # general interface - linpred, # linear predictor - lm, # linear model - negbin, # interface to fitting negative binomial regression - nobs, # total number of observations - predict, # make predictions - ftest # compare models with an F test + canonicallink, # canonical link function for a distribution + deviance, # deviance of fitted and observed responses + devresid, # vector of squared deviance residuals + formula, # extract the formula from a model + glm, # general interface + linpred, # linear predictor + lm, # linear model + negbin, # interface to fitting negative binomial regression + nobs, # total number of observations + predict, # make predictions + ftest # compare models with an F test const FP = AbstractFloat const FPVector{T<:FP} = AbstractArray{T,1} diff --git a/src/glmfit.jl b/src/glmfit.jl index 0c108296..94058ef8 100644 --- a/src/glmfit.jl +++ b/src/glmfit.jl @@ -3,7 +3,7 @@ The response vector and various derived vectors in a generalized linear model. """ -struct GlmResp{V<:FPVector,D<:UnivariateDistribution,L<:Link} <: ModResp +struct GlmResp{V<:FPVector, D<:UnivariateDistribution,L<:Link,W<:AbstractWeights{<:Real}} <: ModResp "`y`: response vector" y::V d::D @@ -17,15 +17,15 @@ struct GlmResp{V<:FPVector,D<:UnivariateDistribution,L<:Link} <: ModResp mu::V "`offset:` offset added to `Xβ` to form `eta`. Can be of length 0" offset::V - "`wts:` prior case weights. Can be of length 0." - wts::V + "`wts`: prior case weights. Can be of length 0." + wts::W "`wrkwt`: working case weights for the Iteratively Reweighted Least Squares (IRLS) algorithm" wrkwt::V "`wrkresid`: working residuals for IRLS" wrkresid::V end -function GlmResp(y::V, d::D, l::L, η::V, μ::V, off::V, wts::V) where {V<:FPVector, D, L} +function GlmResp(y::V, d::D, l::L, η::V, μ::V, off::V, wts::W) where {V<:FPVector, D, L, W} n = length(y) nη = length(η) nμ = length(μ) @@ -41,36 +41,34 @@ function GlmResp(y::V, d::D, l::L, η::V, μ::V, off::V, wts::V) where {V<:FPVec end # Lengths of wts and off can be either n or 0 - if lw != 0 && lw != n - throw(DimensionMismatch("wts must have length $n or length 0 but was $lw")) + if lw != n + throw(DimensionMismatch("wts must have length $n but was $lw")) end if lo != 0 && lo != n throw(DimensionMismatch("offset must have length $n or length 0 but was $lo")) end - return GlmResp{V,D,L}(y, d, l, similar(y), η, μ, off, wts, similar(y), similar(y)) + return GlmResp{V,D,L,W}(y, d, l, similar(y), η, μ, off, wts, similar(y), similar(y)) end -function GlmResp(y::FPVector, d::Distribution, l::Link, off::FPVector, wts::FPVector) +function GlmResp(y::FPVector, d::Distribution, l::Link, off::FPVector, wts::AbstractWeights{<:Real}) # Instead of convert(Vector{Float64}, y) to be more ForwardDiff friendly _y = convert(Vector{float(eltype(y))}, y) _off = convert(Vector{float(eltype(off))}, off) - _wts = convert(Vector{float(eltype(wts))}, wts) η = similar(_y) μ = similar(_y) - r = GlmResp(_y, d, l, η, μ, _off, _wts) - initialeta!(r.eta, d, l, _y, _wts, _off) + r = GlmResp(_y, d, l, η, μ, _off, wts) + initialeta!(r.eta, d, l, _y, wts, _off) updateμ!(r, r.eta) return r end -function GlmResp(y::AbstractVector{<:Real}, d::D, l::L, off::AbstractVector{<:Real}, - wts::AbstractVector{<:Real}) where {D, L} - GlmResp(float(y), d, l, float(off), float(wts)) +function GlmResp(y::AbstractVector{<:Real}, d::D, l::L, off::AbstractVector{<:Real}, wts::AbstractWeights{<:Real}) where {D, L} + GlmResp(float(y), d, l, float(off), wts) end deviance(r::GlmResp) = sum(r.devresid) - +weights(r::GlmResp) = r.wts """ cancancel(r::GlmResp{V,D,L}) @@ -288,7 +286,7 @@ function _fit!(m::AbstractGLM, verbose::Bool, maxiter::Integer, minstepfac::Real lp = r.mu # Initialize β, μ, and compute deviance - if start == nothing || isempty(start) + if start === nothing || isempty(start) # Compute beta update based on default response value # if no starting values have been passed delbeta!(p, wrkresp(r), r.wrkwt) @@ -440,12 +438,9 @@ const FIT_GLM_DOC = """ # Keyword Arguments - `dofit::Bool=true`: Determines whether model will be fit - - `wts::Vector=similar(y,0)`: Prior frequency (a.k.a. case) weights of observations. - Such weights are equivalent to repeating each observation a number of times equal - to its weight. Do note that this interpretation gives equal point estimates but - different standard errors from analytical (a.k.a. inverse variance) weights and - from probability (a.k.a. sampling) weights which are the default in some other - software. + - `wts::AbstractWeights=aweights(similar(y,0))`: Weights of observations. + Allowed weights are `AnalyticalWeights`, `FrequencyWeights`, or `ProbabilityWeights`. + If a vector is passed (deprecated) it is coerced to FrequencyWeights. Can be length 0 to indicate no weighting (default). - `offset::Vector=similar(y,0)`: offset added to `Xβ` to form `eta`. Can be of length 0 @@ -476,17 +471,26 @@ function fit(::Type{M}, d::UnivariateDistribution, l::Link = canonicallink(d); dofit::Bool = true, - wts::AbstractVector{<:Real} = similar(y, 0), - offset::AbstractVector{<:Real} = similar(y, 0), + wts::AbstractVector{<:Real} = uweights(length(y)), + offset::AbstractVector{<:Real} = similar(y, 0), fitargs...) where {M<:AbstractGLM} - # Check that X and y have the same number of observations if size(X, 1) != size(y, 1) throw(DimensionMismatch("number of rows in X and y must match")) end - - rr = GlmResp(y, d, l, offset, wts) - res = M(rr, cholpred(X), false) + # For backward compatibility accept wts as AbstractArray and coerce them to FrequencyWeights + _wts = if isa(wts, AbstractWeights) + wts + elseif isa(wts, AbstractVector) + Base.depwarn("Passing weights as vector is deprecated in favor of explicitely using " * + "AnalyticalWeights, ProbabilityWeights, or FrequencyWeights. Proceeding " * + "by coercing wts to `FrequencyWeights`", :fit) + fweights(wts) + else + throw(ArgumentError("`wts` should be an AbstractVector coercible to AbstractWeights")) + end + rr = GlmResp(y, d, l, offset, _wts) + res = M(rr, cholpred(X, false, _wts), false) return dofit ? fit!(res; fitargs...) : res end @@ -537,6 +541,7 @@ function dispersion(m::AbstractGLM, sqr::Bool=false) end end + """ predict(mm::AbstractGLM, newX::AbstractMatrix; offset::FPVector=eltype(newX)[], interval::Union{Symbol,Nothing}=nothing, level::Real = 0.95, @@ -649,3 +654,18 @@ function checky(y, d::Binomial) end return nothing end + +""" + nobs(obj::LinearModel) + nobs(obj::GLM) + +For linear and generalized linear models, returns the number of rows, or, +when prior weights of type FrequencyWeights are specified, the sum of weights. +""" +nobs(obj::LinPredModel) = nobs(obj.rr) + +nobs(r::LmResp{V,W}) where {V,W} = oftype(sum(one(eltype(r.wts))), length(r.y)) +nobs(r::LmResp{V,W}) where {V,W<:FrequencyWeights} = isempty(r.wts) ? oftype(sum(one(eltype(r.wts))), length(r.y)) : r.wts.sum + +nobs(r::GlmResp{V,D,L,W}) where {V,D,L,W<:FrequencyWeights} = isempty(r.wts) ? oftype(sum(one(eltype(r.wts))), length(r.y)) : r.wts.sum +nobs(r::GlmResp{V,D,L,W}) where {V,D,L,W} = oftype(sum(one(eltype(r.wts))), length(r.y)) diff --git a/src/linpred.jl b/src/linpred.jl index 4274f575..441f1211 100644 --- a/src/linpred.jl +++ b/src/linpred.jl @@ -45,28 +45,39 @@ A `LinPred` type with a dense, unpivoted QR decomposition of `X` - `scratchbeta`: scratch vector of length `p`, used in `linpred!` method - `qr`: a `QRCompactWY` object created from `X`, with optional row weights. """ -mutable struct DensePredQR{T<:BlasReal} <: DensePred +mutable struct DensePredQR{T<:BlasReal, W<:AbstractWeights{<:Real}} <: DensePred X::Matrix{T} # model matrix + Xw::Matrix{T} # weighted model matrix beta0::Vector{T} # base coefficient vector delbeta::Vector{T} # coefficient increment scratchbeta::Vector{T} qr::QRCompactWY{T} - function DensePredQR{T}(X::Matrix{T}, beta0::Vector{T}) where T + wts::W + wresponse::Vector{T} + function DensePredQR{T}(X::Matrix{T}, beta0::Vector{T}, wts::W) where {T,W<:AbstractWeights{<:Real}} n, p = size(X) length(beta0) == p || throw(DimensionMismatch("length(β0) ≠ size(X,2)")) - new{T}(X, beta0, zeros(T,p), zeros(T,p), qr(X)) + (length(wts) == n || isempty(wts)) || throw(DimensionMismatch("Lenght of weights does not match the dimension of X")) + Xw = wts isa UnitWeights ? Matrix{T}(undef, 0, 0) : sqrt.(wts).*X + qrX = wts isa UnitWeights ? qr(X) : qr(Xw) + new{T,W}(X, Xw, beta0, zeros(T,p), zeros(T,p), qrX, wts, similar(X, T, (size(X,1),) )) end - function DensePredQR{T}(X::Matrix{T}) where T + function DensePredQR{T}(X::Matrix{T}, wts::W) where {T,W} n, p = size(X) - new{T}(X, zeros(T, p), zeros(T,p), zeros(T,p), qr(X)) + DensePredQR(X, zeros(T, p), wts) + end + function DensePredQR(X::Matrix{T}) where T + n, p = size(X) + DensePredQR{T}(X, zeros(T, p), uweights(0)) end end -DensePredQR(X::Matrix, beta0::Vector) = DensePredQR{eltype(X)}(X, beta0) -DensePredQR(X::Matrix{T}) where T = DensePredQR{T}(X, zeros(T, size(X,2))) -convert(::Type{DensePredQR{T}}, X::Matrix{T}) where {T} = DensePredQR{T}(X, zeros(T, size(X, 2))) +DensePredQR{T}(X::Matrix) where T = DensePredQR{eltype(X)}(X, zeros(T, size(X, 2)), uweights(size(X,1))) +DensePredQR(X::Matrix, beta0::Vector, wts::AbstractVector) = DensePredQR{eltype(X)}(X, beta0, wts) +DensePredQR(X::Matrix{T}, wts::AbstractVector) where T = DensePredQR{T}(X, zeros(T, size(X,2)), wts) +convert(::Type{DensePredQR{T}}, X::Matrix{T}) where {T} = DensePredQR{T}(X) """ - delbeta!(p::LinPred, r::Vector) +delbeta!(p::LinPred, r::Vector) Evaluate and return `p.delbeta` the increment to the coefficient vector from residual `r` """ @@ -78,7 +89,7 @@ function delbeta!(p::DensePredQR{T}, r::Vector{T}) where T<:BlasReal end """ - DensePredChol{T} +DensePredChol{T} A `LinPred` type with a dense Cholesky factorization of `X'X` @@ -90,31 +101,40 @@ A `LinPred` type with a dense Cholesky factorization of `X'X` - `scratchbeta`: scratch vector of length `p`, used in `linpred!` method - `chol`: a `Cholesky` object created from `X'X`, possibly using row weights. - `scratchm1`: scratch Matrix{T} of the same size as `X` -- `scratchm2`: scratch Matrix{T} os the same size as `X'X` +- `scratchm2`: scratch Matrix{T} of the same size as `X'X` +- `scratchv1`: scratch Vector{T} of the same size of `y` """ -mutable struct DensePredChol{T<:BlasReal,C} <: DensePred +mutable struct DensePredChol{T<:BlasReal,C,W<:AbstractVector{<:Real}} <: DensePred X::Matrix{T} # model matrix + Xw::Matrix{T} # weighted model matrix beta0::Vector{T} # base vector for coefficients delbeta::Vector{T} # coefficient increment scratchbeta::Vector{T} chol::C + wts::W scratchm1::Matrix{T} scratchm2::Matrix{T} + scratchv1::Vector{T} end -function DensePredChol(X::AbstractMatrix, pivot::Bool) - F = Hermitian(float(X'X)) +function DensePredChol(X::AbstractMatrix, pivot::Bool, wts::AbstractWeights{<:Real}) + Xw = isempty(wts) ? Matrix{eltype(X)}(undef, 0, 0) : sqrt.(wts).*X + F = isempty(wts) ? Hermitian(float(X'X)) : Hermitian(float(Xw'Xw)) T = eltype(F) F = pivot ? pivoted_cholesky!(F, tol = -one(T), check = false) : cholesky!(F) DensePredChol(Matrix{T}(X), + Matrix{T}(Xw), zeros(T, size(X, 2)), zeros(T, size(X, 2)), zeros(T, size(X, 2)), F, + wts, similar(X, T), - similar(cholfactors(F))) + similar(cholfactors(F)), + similar(X, T, (size(X,1),))) end -cholpred(X::AbstractMatrix, pivot::Bool=false) = DensePredChol(X, pivot) +cholpred(X::AbstractMatrix, pivot::Bool, wts::AbstractWeights) = DensePredChol(X, pivot, wts) +cholpred(X::AbstractMatrix, pivot::Bool=false) = DensePredChol(X, pivot, uweights(size(X,1))) cholfactors(c::Union{Cholesky,CholeskyPivoted}) = c.factors cholesky!(p::DensePredChol{T}) where {T<:FP} = p.chol @@ -126,12 +146,16 @@ function cholesky(p::DensePredChol{T}) where T<:FP end cholesky!(p::DensePredQR{T}) where {T<:FP} = Cholesky{T,typeof(p.X)}(p.qr.R, 'U', 0) -function delbeta!(p::DensePredChol{T,<:Cholesky}, r::Vector{T}) where T<:BlasReal +function delbeta!(p::DensePredChol{T,<:Cholesky, <:UnitWeights}, r::Vector{T}) where T<:BlasReal ldiv!(p.chol, mul!(p.delbeta, transpose(p.X), r)) - p end -function delbeta!(p::DensePredChol{T,<:CholeskyPivoted}, r::Vector{T}) where T<:BlasReal +function delbeta!(p::DensePredChol{T,<:Cholesky, <:AbstractWeights}, r::Vector{T}) where T<:BlasReal + p.scratchv1 .= r.*sqrt(p.wts) + ldiv!(p.chol, mul!(p.delbeta, transpose(p.Xw), p.scratchv1)) +end + +function delbeta!(p::DensePredChol{T,<:CholeskyPivoted,<:UnitWeights}, r::Vector{T}) where T<:BlasReal ch = p.chol delbeta = mul!(p.delbeta, adjoint(p.X), r) rnk = rank(ch) @@ -148,59 +172,102 @@ function delbeta!(p::DensePredChol{T,<:CholeskyPivoted}, r::Vector{T}) where T<: p end -function delbeta!(p::DensePredChol{T,<:Cholesky}, r::Vector{T}, wt::Vector{T}) where T<:BlasReal - scr = mul!(p.scratchm1, Diagonal(wt), p.X) - cholesky!(Hermitian(mul!(cholfactors(p.chol), transpose(scr), p.X), :U)) - mul!(p.delbeta, transpose(scr), r) +function delbeta!(p::DensePredChol{T,<:CholeskyPivoted,<:AbstractWeights}, r::Vector{T}) where T<:BlasReal + ch = p.chol + Z = p.Xw + p.scratchv1 .= r.*sqrt.(p.wts) + delbeta = mul!(p.delbeta, adjoint(p.Xw), p.scratchv1) + rnk = rank(ch) + if rnk == length(delbeta) + ldiv!(ch, delbeta) + else + permute!(delbeta, ch.p) + for k=(rnk+1):length(delbeta) + delbeta[k] = -zero(T) + end + LAPACK.potrs!(ch.uplo, view(ch.factors, 1:rnk, 1:rnk), view(delbeta, 1:rnk)) + invpermute!(delbeta, ch.p) + end + p +end + +function delbeta!(p::DensePredChol{T,<:Cholesky,<:AbstractWeights}, r::Vector{T}, wt::Vector{T}) where T<:BlasReal + p.scratchm1 .= wt.*p.X + cholesky!(Hermitian(mul!(cholfactors(p.chol), transpose(p.scratchm1), p.X), :U)) + mul!(p.delbeta, transpose(p.scratchm1), r) ldiv!(p.chol, p.delbeta) p end -function delbeta!(p::DensePredChol{T,<:CholeskyPivoted}, r::Vector{T}, wt::Vector{T}) where T<:BlasReal +function delbeta!(p::DensePredChol{T,<:CholeskyPivoted,<:AbstractWeights}, r::Vector{T}, wt::Vector{T}) where T<:BlasReal cf = cholfactors(p.chol) piv = p.chol.p - cf .= mul!(p.scratchm2, adjoint(LinearAlgebra.mul!(p.scratchm1, Diagonal(wt), p.X)), p.X)[piv, piv] + p.scratchm1 .= wt.*p.X + cf .= mul!(p.scratchm2, adjoint(p.scratchm1), p.X)[piv, piv] cholesky!(Hermitian(cf, Symbol(p.chol.uplo))) ldiv!(p.chol, mul!(p.delbeta, transpose(p.scratchm1), r)) p end -mutable struct SparsePredChol{T,M<:SparseMatrixCSC,C} <: GLM.LinPred +mutable struct SparsePredChol{T,M<:SparseMatrixCSC,C,W<:AbstractWeights{<:Real}} <: GLM.LinPred X::M # model matrix + Xw::M # weighted model matrix Xt::M # X' beta0::Vector{T} # base vector for coefficients delbeta::Vector{T} # coefficient increment scratchbeta::Vector{T} chol::C scratch::M + wts::W end -function SparsePredChol(X::SparseMatrixCSC{T}) where T + +function SparsePredChol(X::SparseMatrixCSC{T}, wts::AbstractVector) where T chol = cholesky(sparse(I, size(X, 2), size(X,2))) - return SparsePredChol{eltype(X),typeof(X),typeof(chol)}(X, + sqrtwts = sqrt.(wts) + Xw = isempty(wts) ? SparseMatrixCSC(I, 0, 0) : sqrtwts.*X + return SparsePredChol{eltype(X),typeof(X),typeof(chol), typeof(wts)}(X, + Xw, X', zeros(T, size(X, 2)), zeros(T, size(X, 2)), zeros(T, size(X, 2)), chol, - similar(X)) + similar(X), + wts) end -cholpred(X::SparseMatrixCSC, pivot::Bool=false) = SparsePredChol(X) +cholpred(X::SparseMatrixCSC, pivot::Bool=false) = SparsePredChol(X, uweights(size(X,1))) +cholpred(X::SparseMatrixCSC, pivot::Bool, wts::AbstractWeights) = SparsePredChol(X, wts) -function delbeta!(p::SparsePredChol{T}, r::Vector{T}, wt::Vector{T}) where T +function delbeta!(p::SparsePredChol{T,M,C,<:UnitWeights}, r::Vector{T}, wt::Vector{T}) where {T,M,C} scr = mul!(p.scratch, Diagonal(wt), p.X) XtWX = p.Xt*scr c = p.chol = cholesky(Symmetric{eltype(XtWX),typeof(XtWX)}(XtWX, 'L')) p.delbeta = c \ mul!(p.delbeta, adjoint(scr), r) end -function delbeta!(p::SparsePredChol{T}, r::Vector{T}) where T +function delbeta!(p::SparsePredChol{T,M,C,<:AbstractWeights}, r::Vector{T}, wt::Vector{T}) where {T,M,C} + scr = mul!(p.scratch, Diagonal(wt.*p.wts), p.X) + XtWX = p.Xt*scr + c = p.chol = cholesky(Symmetric{eltype(XtWX),typeof(XtWX)}(XtWX, 'L')) + p.delbeta = c \ mul!(p.delbeta, adjoint(scr), r) +end + +function delbeta!(p::SparsePredChol{T,M,C,<:UnitWeights}, r::Vector{T}) where {T,M,C} scr = p.scratch = p.X XtWX = p.Xt*scr c = p.chol = cholesky(Symmetric{eltype(XtWX),typeof(XtWX)}(XtWX, 'L')) p.delbeta = c \ mul!(p.delbeta, adjoint(scr), r) end +function delbeta!(p::SparsePredChol{T,M,C,<:AbstractWeights}, r::Vector{T}) where {T,M,C} + scr = p.scratch .= p.X.*p.wts + XtWX = p.Xt*scr + @show XtWX + c = p.chol = cholesky(Symmetric{eltype(XtWX),typeof(XtWX)}(XtWX, 'L')) + p.delbeta = c \ mul!(p.delbeta, adjoint(scr), r) +end + LinearAlgebra.cholesky(p::SparsePredChol{T}) where {T} = copy(p.chol) LinearAlgebra.cholesky!(p::SparsePredChol{T}) where {T} = p.chol @@ -219,8 +286,38 @@ function invchol(x::DensePredChol{T,<: CholeskyPivoted}) where T ipiv = invperm(ch.p) res[ipiv, ipiv] end + invchol(x::SparsePredChol) = cholesky!(x) \ Matrix{Float64}(I, size(x.X, 2), size(x.X, 2)) -vcov(x::LinPredModel) = rmul!(invchol(x.pp), dispersion(x, true)) + +function vcov(x::LinPredModel) + d = dispersion(x, true) + B = _covm(x.pp) + rmul!(B, dispersion(x, true)) +end + +_covm(pp::LinPred) = invchol(pp) + +function _covm(pp::DensePredChol{T, <:ProbabilityWeights, <:Cholesky}) where {T} + wts = pp.wts + Z = pp.scratchm1 .= pp.X.*wts + XtW2X = Z'Z + invXtWX = invchol(pp) + invXtWX*XtW2X*invXtWX +end + +function _covm(pp::DensePredChol{T, <:ProbabilityWeights, <:CholeskyPivoted}) where {T} + wts = pp.wts + Z = pp.scratchm1 .= pp.X.*wts + rnk = rank(pp.chol) + p = length(pp.delbeta) + if rnk == p + XtW2X = Z'Z + else + ## no idea + end + invXtWX = invchol(pp) + invXtWX*XtW2X*invXtWX +end function cor(x::LinPredModel) Σ = vcov(x) @@ -235,28 +332,24 @@ function show(io::IO, obj::LinPredModel) end modelframe(obj::LinPredModel) = obj.fr -modelmatrix(obj::LinPredModel) = obj.pp.X + +function modelmatrix(obj::LinPredModel; weighted=false) + if !weighted + obj.pp.X + elseif !isempty(weights(obj)) + obj.pp.Xw + else + throw(ArgumentError("`weighted=true` allowed only for weighted models.")) + end +end + response(obj::LinPredModel) = obj.rr.y fitted(m::LinPredModel) = m.rr.mu predict(mm::LinPredModel) = fitted(mm) StatsModels.formula(obj::LinPredModel) = modelframe(obj).formula -residuals(obj::LinPredModel) = residuals(obj.rr) - -""" - nobs(obj::LinearModel) - nobs(obj::GLM) - -For linear and generalized linear models, returns the number of rows, or, -when prior weights are specified, the sum of weights. -""" -function nobs(obj::LinPredModel) - if isempty(obj.rr.wts) - oftype(sum(one(eltype(obj.rr.wts))), length(obj.rr.y)) - else - sum(obj.rr.wts) - end -end +residuals(obj::LinPredModel; kwarg...) = residuals(obj.rr; kwarg...) +weights(obj::LinPredModel) = weights(obj.rr) coef(x::LinPred) = x.beta0 coef(obj::LinPredModel) = coef(obj.pp) diff --git a/src/lm.jl b/src/lm.jl index 079bc893..42a419c5 100644 --- a/src/lm.jl +++ b/src/lm.jl @@ -7,46 +7,41 @@ Encapsulates the response for a linear model - `mu`: current value of the mean response vector or fitted value - `offset`: optional offset added to the linear predictor to form `mu` -- `wts`: optional vector of prior frequency (a.k.a. case) weights for observations +- `wts`: optional weights for observations (AbstractWeights) - `y`: observed response vector Either or both `offset` and `wts` may be of length 0 """ -mutable struct LmResp{V<:FPVector} <: ModResp # response in a linear model +mutable struct LmResp{V<:FPVector, W<:AbstractWeights{<:Real}} <: ModResp # response in a linear model mu::V # mean response offset::V # offset added to linear predictor (may have length 0) - wts::V # prior weights (may have length 0) + wts::W # prior weights (may have length 0) y::V # response - function LmResp{V}(mu::V, off::V, wts::V, y::V) where V + function LmResp{V, W}(mu::V, off::V, wts::W, y::V) where {V, W} n = length(y) length(mu) == n || error("mismatched lengths of mu and y") ll = length(off) ll == 0 || ll == n || error("length of offset is $ll, must be $n or 0") ll = length(wts) ll == 0 || ll == n || error("length of wts is $ll, must be $n or 0") - new{V}(mu, off, wts, y) + new{V,W}(mu, off, wts, y) end end -function LmResp(y::AbstractVector{<:Real}, wts::Union{Nothing,AbstractVector{<:Real}}=nothing) +function LmResp(y::AbstractVector{<:Real}, wts::AbstractWeights{<:Real}) # Instead of convert(Vector{Float64}, y) to be more ForwardDiff friendly _y = convert(Vector{float(eltype(y))}, y) - _wts = if wts === nothing - similar(_y, 0) - else - convert(Vector{float(eltype(wts))}, wts) - end - return LmResp{typeof(_y)}(zero(_y), zero(_y), _wts, _y) + return LmResp{typeof(_y), typeof(wts)}(zero(_y), zero(_y), wts, _y) end -function updateμ!(r::LmResp{V}, linPr::V) where V<:FPVector +function updateμ!(r::LmResp{V, W}, linPr::V) where {V<:FPVector, W} n = length(linPr) length(r.y) == n || error("length(linPr) is $n, should be $(length(r.y))") length(r.offset) == 0 ? copyto!(r.mu, linPr) : broadcast!(+, r.mu, linPr, r.offset) deviance(r) end -updateμ!(r::LmResp{V}, linPr) where {V<:FPVector} = updateμ!(r, convert(V, vec(linPr))) +updateμ!(r::LmResp{V, W}, linPr) where {V<:FPVector, W} = updateμ!(r, convert(V, vec(linPr))) function deviance(r::LmResp) y = r.y @@ -70,7 +65,25 @@ function loglikelihood(r::LmResp) -n/2 * (log(2π * deviance(r)/n) + 1) end -residuals(r::LmResp) = r.y - r.mu +function nullloglikelihood(r::LmResp) + n = isempty(r.wts) ? length(r.y) : sum(r.wts) + -n/2 * (log(2π * nulldeviance(r)/n) + 1) +end + +function residuals(r::LmResp; weighted=false) + wts = weights(r) + res = r.y - r.mu + if !weighted + res + elseif !isempty(wts) + sqrt.(wts).*res + else + throw(ArgumentError("`weighted=true` allowed only for weighted models.")) + end +end + +weights(r::LmResp) = r.wts + """ LinearModel @@ -90,12 +103,8 @@ end LinearAlgebra.cholesky(x::LinearModel) = cholesky(x.pp) function StatsBase.fit!(obj::LinearModel) - if isempty(obj.rr.wts) - delbeta!(obj.pp, obj.rr.y) - else - delbeta!(obj.pp, obj.rr.y, obj.rr.wts) - end - installbeta!(obj.pp) + delbeta!(obj.pp, obj.rr.y) + installbeta!(obj.pp) updateμ!(obj.rr, linpred(obj.pp, zero(eltype(obj.rr.y)))) return obj end @@ -108,12 +117,15 @@ const FIT_LM_DOC = """ in columns (including if appropriate the intercept), and `y` must be a vector holding values of the dependent variable. - The keyword argument `wts` can be a `Vector` specifying frequency weights for observations. - Such weights are equivalent to repeating each observation a number of times equal - to its weight. Do note that this interpretation gives equal point estimates but - different standard errors from analytical (a.k.a. inverse variance) weights and - from probability (a.k.a. sampling) weights which are the default in some other - software. + The keyword argument `wts` can be an `AbstractWeights` specifying frequency weights for observations. + Weights allowed are: + - `AnalyticaWeights`: describe a non-random relative importance (usually between 0 and 1) + for each observation. + - `FrequencyWeights`: describe the number of times (or frequency) each observation was observed. + - `ProbabilityWeights`: represent the inverse of the sampling probability for each observation, + providing a correction mechanism for under- or over-sampling certain population groups + These weights gives equal point estimates but different standard errors. + If a vector is passed (deprecated), it is coerced to `FrequencyWeights`. `dropcollinear` controls whether or not `lm` accepts a model matrix which is less-than-full rank. If `true` (the default), only the first of each set of @@ -133,14 +145,25 @@ $FIT_LM_DOC """ function fit(::Type{LinearModel}, X::AbstractMatrix{<:Real}, y::AbstractVector{<:Real}, allowrankdeficient_dep::Union{Bool,Nothing}=nothing; - wts::AbstractVector{<:Real}=similar(y, 0), + wts::AbstractVector{<:Real}=uweights(length(y)), dropcollinear::Bool=true) if allowrankdeficient_dep !== nothing @warn "Positional argument `allowrankdeficient` is deprecated, use keyword " * "argument `dropcollinear` instead. Proceeding with positional argument value: $allowrankdeficient_dep" dropcollinear = allowrankdeficient_dep end - fit!(LinearModel(LmResp(y, wts), cholpred(X, dropcollinear))) + # For backward compatibility accept wts as AbstractArray and coerce them to FrequencyWeights + _wts = if isa(wts, AbstractWeights) + wts + elseif isa(wts, AbstractVector) + Base.depwarn("Passing weights as vector is deprecated in favor of explicitely using " * + "AnalyticalWeights, ProbabilityWeights, or FrequencyWeights. Proceeding " * + "by coercing wts to `FrequencyWeights`", :fit) + fweights(wts) + else + throw(ArgumentError("`wts` should be an AbstractVector coercible to AbstractWeights")) + end + fit!(LinearModel(LmResp(y, _wts), cholpred(X, dropcollinear, _wts))) end """ @@ -299,19 +322,15 @@ of each data point. Currently only implemented for linear models without weights. """ function StatsBase.cooksdistance(obj::LinearModel) - u = residuals(obj) - mse = dispersion(obj,true) + wts = weights(obj) + u = residuals(obj; weighted=!isempty(wts)) + mse = GLM.dispersion(obj,true) k = dof(obj)-1 d_res = dof_residual(obj) - X = modelmatrix(obj) - XtX = crossmodelmatrix(obj) + X = modelmatrix(obj; weighted=!isempty(wts)) + XtX = crossmodelmatrix(obj; weighted=!isempty(wts)) k == size(X,2) || throw(ArgumentError("Models with collinear terms are not currently supported.")) - wts = obj.rr.wts - if isempty(wts) - hii = diag(X * inv(XtX) * X') - else - throw(ArgumentError("Weighted models are not currently supported.")) - end + hii = diag(X * inv(XtX) * X') D = @. u^2 * (hii / (1 - hii)^2) / (k*mse) return D end diff --git a/src/scratch.jl b/src/scratch.jl new file mode 100644 index 00000000..4f2612b0 --- /dev/null +++ b/src/scratch.jl @@ -0,0 +1,77 @@ +using GLM +using DataFrames +using Random +using CSV +using StatsBase +using RDatasets +Random.seed!(11) + +y = rand(10) +x = rand(10,2) +wts = rand(10) +df = DataFrame(x, :auto) +df.y = y +df.wts = wts +lm1 = lm(x,y) +lmw = lm(x,y; wts = wts) +lmf = lm(@formula(y~x1+x2-1), df) +lmfw = lm(@formula(y~-1+x1+x2), df; wts = aweights(wts)) +lmfw = lm(@formula(y~-1+x1+x2), df; wts = pweights(wts)) +lmfw = lm(@formula(y~-1+x1+x2), df; wts = fweights(wts)) + +glm(@formula(y~-1+x1+x2), df, Normal, IdentityLink; wts = fweights(wts)) + +cooksdistance(lm1) + + + +df = dataset("quantreg", "engel") +N = nrow(df) +df.weights = repeat(1:5, Int(N/5)) +f = @formula(FoodExp ~ Income) +lm_model = lm(f, df, wts = FrequencyWeights(df.weights)) +glm_model = glm(f, df, Normal(), wts = FrequencyWeights(df.weights)) +@test isapprox(coef(lm_model), [154.35104595140706, 0.4836896390157505]) +@test isapprox(coef(glm_model), [154.35104595140706, 0.4836896390157505]) +@test isapprox(stderror(lm_model), [9.382302620120193, 0.00816741377772968]) +@test isapprox(r2(lm_model), 0.8330258148644486) +@test isapprox(adjr2(lm_model), 0.832788298242634) +@test isapprox(vcov(lm_model), [88.02760245551447 -0.06772589439264813; + -0.06772589439264813 6.670664781664879e-5]) +@test isapprox(first(predict(lm_model)), 357.57694841780994) +@test isapprox(loglikelihood(lm_model), -4353.946729075838) +@test isapprox(loglikelihood(glm_model), -4353.946729075838) +@test isapprox(nullloglikelihood(lm_model), -4984.892139711452) +@test isapprox(mean(residuals(lm_model)), -5.412966629787718) + +lm_model = lm(f, df, wts = df.weights) +glm_model = glm(f, df, Normal(), wts = df.weights) +@test isapprox(coef(lm_model), [154.35104595140706, 0.4836896390157505]) +@test isapprox(coef(glm_model), [154.35104595140706, 0.4836896390157505]) +@test isapprox(stderror(lm_model), [9.382302620120193, 0.00816741377772968]) +@test isapprox(r2(lm_model), 0.8330258148644486) +@test isapprox(adjr2(lm_model), 0.832788298242634) +@test isapprox(vcov(lm_model), [88.02760245551447 -0.06772589439264813; + -0.06772589439264813 6.670664781664879e-5]) +@test isapprox(first(predict(lm_model)), 357.57694841780994) +@test isapprox(loglikelihood(lm_model), -4353.946729075838) +@test isapprox(loglikelihood(glm_model), -4353.946729075838) +@test isapprox(nullloglikelihood(lm_model), -4984.892139711452) +@test isapprox(mean(residuals(lm_model)), -5.412966629787718) + + + +lm_model = lm(f, df, wts = aweights(df.weights)) +glm_model = glm(f, df, Normal(), wts = aweights(df.weights)) +@test isapprox(coef(lm_model), [154.35104595140706, 0.4836896390157505]) +@test isapprox(coef(glm_model), [154.35104595140706, 0.4836896390157505]) +@test isapprox(stderror(lm_model), [16.297055281313032, 0.014186793927918842]) +@test isapprox(r2(lm_model), 0.8330258148644486) +@test isapprox(adjr2(lm_model), 0.8323091874604334) +@test isapprox(vcov(lm_model), [265.59401084217296 -0.20434035947652907; + -0.20434035947652907 0.00020126512195323495]) +@test isapprox(first(predict(lm_model)), 357.57694841780994) +@test isapprox(loglikelihood(lm_model), -4353.946729075838) +@test isapprox(loglikelihood(glm_model), -4353.946729075838) +@test isapprox(nullloglikelihood(lm_model), -4984.892139711452) +@test isapprox(mean(residuals(lm_model)), -5.412966629787718) \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 447798b2..b85ee5c8 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -83,12 +83,13 @@ end end @testset "linear model with weights" begin + df = dataset("quantreg", "engel") N = nrow(df) df.weights = repeat(1:5, Int(N/5)) f = @formula(FoodExp ~ Income) - lm_model = lm(f, df, wts = df.weights) - glm_model = glm(f, df, Normal(), wts = df.weights) + lm_model = lm(f, df, wts = FrequencyWeights(df.weights)) + glm_model = glm(f, df, Normal(), wts = FrequencyWeights(df.weights)) @test isapprox(coef(lm_model), [154.35104595140706, 0.4836896390157505]) @test isapprox(coef(glm_model), [154.35104595140706, 0.4836896390157505]) @test isapprox(stderror(lm_model), [9.382302620120193, 0.00816741377772968]) @@ -101,6 +102,29 @@ end @test isapprox(loglikelihood(glm_model), -4353.946729075838) @test isapprox(nullloglikelihood(lm_model), -4984.892139711452) @test isapprox(mean(residuals(lm_model)), -5.412966629787718) + + lm_model = lm(f, df, wts = df.weights) + glm_model = glm(f, df, Normal(), wts = df.weights) + @test isa(weights(lm_model), FrequencyWeights) + @test isa(weights(glm_model), FrequencyWeights) + + + + + lm_model = lm(f, df, wts = aweights(df.weights)) + glm_model = glm(f, df, Normal(), wts = aweights(df.weights)) + @test isapprox(coef(lm_model), [154.35104595140706, 0.4836896390157505]) + @test isapprox(coef(glm_model), [154.35104595140706, 0.4836896390157505]) + @test isapprox(stderror(lm_model), [16.297055281313032, 0.014186793927918842]) + @test isapprox(r2(lm_model), 0.8330258148644486) + @test isapprox(adjr2(lm_model), 0.8323091874604334) + @test isapprox(vcov(lm_model), [265.59401084217296 -0.20434035947652907; + -0.20434035947652907 0.00020126512195323495]) + @test isapprox(first(predict(lm_model)), 357.57694841780994) + @test isapprox(loglikelihood(lm_model), -4353.946729075838) + @test isapprox(loglikelihood(glm_model), -4353.946729075838) + @test isapprox(nullloglikelihood(lm_model), -4984.892139711452) + @test isapprox(mean(residuals(lm_model)), -5.412966629787718) end @testset "rankdeficient" begin @@ -142,6 +166,16 @@ end @test isapprox(coef(m2p_dep_pos_kw), coef(m2p)) end +@testset "Passing wts (depwarn)" begin + df = DataFrame(x=["a", "b", "c"], y=[1, 2, 3], wts = [3,3,3]) + @test_logs (:warn, "Passing weights as vector is deprecated in favor of explicitely using " * + "AnalyticalWeights, ProbabilityWeights, or FrequencyWeights. Proceeding " * + "by coercing wts to `FrequencyWeights`") lm(@formula(y~x), df; wts=wts) + @test_logs (:warn, "Passing weights as vector is deprecated in favor of explicitely using " * + "AnalyticalWeights, ProbabilityWeights, or FrequencyWeights. Proceeding " * + "by coercing wts to `FrequencyWeights`") glm(@formula(y~x), Normal(), IdentityLink(), df; wts=wts) +end + @testset "saturated linear model" begin df = DataFrame(x=["a", "b", "c"], y=[1, 2, 3]) model = lm(@formula(y ~ x), df) @@ -475,10 +509,10 @@ admit_agr = DataFrame(count = [28., 97, 93, 55, 33, 54, 28, 12], admit = repeat([false, true], inner=[4]), rank = categorical(repeat(1:4, outer=2))) -@testset "Aggregated Binomial LogitLink" begin +@testset "Aggregated Binomial LogitLink (FrequencyWeights)" begin for distr in (Binomial, Bernoulli) gm14 = fit(GeneralizedLinearModel, @formula(admit ~ 1 + rank), admit_agr, distr(), - wts=Array(admit_agr.count)) + wts=fweights(admit_agr.count)) @test dof(gm14) == 4 @test nobs(gm14) == 400 @test isapprox(deviance(gm14), 474.9667184280627) @@ -489,8 +523,25 @@ admit_agr = DataFrame(count = [28., 97, 93, 55, 33, 54, 28, 12], @test isapprox(coef(gm14), [0.164303051291, -0.7500299832, -1.36469792994, -1.68672866457], atol=1e-5) end + end +@testset "Aggregated Binomial LogitLink (AnalyticWeights)" begin + for distr in (Binomial, Bernoulli) + gm14 = fit(GeneralizedLinearModel, @formula(admit ~ 1 + rank), admit_agr, distr(), + wts=aweights(admit_agr.count)) + @test dof(gm14) == 4 + @test nobs(gm14) == 8 + @test isapprox(deviance(gm14), 474.9667184280627) + @test isapprox(loglikelihood(gm14), -237.48335921403134) + @test isapprox(aic(gm14), 482.96671842822883) + @test isapprox(aicc(gm14), 496.3000517613874) + @test isapprox(bic(gm14), 483.28448459477346) + @test isapprox(coef(gm14), + [0.164303051291, -0.7500299832, -1.36469792994, -1.68672866457], atol=1e-5) + end + +end # Logistic regression using aggregated data with proportions of successes and weights admit_agr2 = DataFrame(Any[[61., 151, 121, 67], [33., 54, 28, 12], categorical(1:4)], [:count, :admit, :rank]) @@ -499,7 +550,7 @@ admit_agr2.p = admit_agr2.admit ./ admit_agr2.count ## The model matrix here is singular so tests like the deviance are just round off error @testset "Binomial LogitLink aggregated" begin gm15 = fit(GeneralizedLinearModel, @formula(p ~ rank), admit_agr2, Binomial(), - wts=admit_agr2.count) + wts=fweights(admit_agr2.count)) test_show(gm15) @test dof(gm15) == 4 @test nobs(gm15) == 400 @@ -514,7 +565,7 @@ end # Weighted Gamma example (weights are totally made up) @testset "Gamma InverseLink Weights" begin gm16 = fit(GeneralizedLinearModel, @formula(lot1 ~ 1 + u), clotting, Gamma(), - wts=[1.5,2.0,1.1,4.5,2.4,3.5,5.6,5.4,6.7]) + wts=fweights([1.5,2.0,1.1,4.5,2.4,3.5,5.6,5.4,6.7])) test_show(gm16) @test dof(gm16) == 3 @test nobs(gm16) == 32.7 @@ -529,7 +580,7 @@ end # Weighted Poisson example (weights are totally made up) @testset "Poisson LogLink Weights" begin gm17 = fit(GeneralizedLinearModel, @formula(Counts ~ Outcome + Treatment), dobson, Poisson(), - wts = [1.5,2.0,1.1,4.5,2.4,3.5,5.6,5.4,6.7]) + wts = fweights([1.5,2.0,1.1,4.5,2.4,3.5,5.6,5.4,6.7])) test_show(gm17) @test dof(gm17) == 5 @test isapprox(deviance(gm17), 17.699857821414266) @@ -686,6 +737,37 @@ end end end +@testset "Sparse LM (weighted)" begin + rng = StableRNG(1) + X = sprand(rng, 1000, 10, 0.01) + β = randn(rng, 10) + y = Bool[rand(rng) < logistic(x) for x in X * β] + wts = rand(1000) + gmsparsev = [fit(LinearModel, X, y; wts=fweights(wts)), + fit(LinearModel, X, sparse(y); wts=fweights(wts)), + fit(LinearModel, Matrix(X), sparse(y); wts=fweights(wts))] + gmdense = fit(LinearModel, Matrix(X), y; wts=fweights(wts)) + + for gmsparse in gmsparsev + @test isapprox(deviance(gmsparse), deviance(gmdense)) + @test isapprox(coef(gmsparse), coef(gmdense)) + @test isapprox(vcov(gmsparse), vcov(gmdense)) + @test isapprox(Matrix(modelmatrix(gmsparse; weighted=true)), modelmatrix(gmdense; weighted=true)) + end + + gmsparsev = [fit(LinearModel, X, y; wts=aweights(wts)), + fit(LinearModel, X, sparse(y); wts=aweights(wts)), + fit(LinearModel, Matrix(X), sparse(y); wts=aweights(wts))] + gmdense = fit(LinearModel, Matrix(X), y; wts=aweights(wts)) + + for gmsparse in gmsparsev + @test isapprox(deviance(gmsparse), deviance(gmdense)) + @test isapprox(coef(gmsparse), coef(gmdense)) + @test isapprox(vcov(gmsparse), vcov(gmdense)) + @test isapprox(Matrix(modelmatrix(gmsparse; weighted=true)), modelmatrix(gmdense; weighted=true)) + end +end + @testset "Predict" begin rng = StableRNG(123) X = rand(rng, 10, 2) @@ -1201,14 +1283,14 @@ end glm4 = glm(view(x, :, :), view(y, :), Binomial()) @test coef(glm1) == coef(glm2) == coef(glm3) == coef(glm4) - glm5 = glm(x, y, Binomial(), wts=w) - glm6 = glm(x, view(y, :), Binomial(), wts=w) - glm7 = glm(view(x, :, :), y, Binomial(), wts=w) - glm8 = glm(view(x, :, :), view(y, :), Binomial(), wts=w) - glm9 = glm(x, y, Binomial(), wts=view(w, :)) - glm10 = glm(x, view(y, :), Binomial(), wts=view(w, :)) - glm11 = glm(view(x, :, :), y, Binomial(), wts=view(w, :)) - glm12 = glm(view(x, :, :), view(y, :), Binomial(), wts=view(w, :)) + glm5 = glm(x, y, Binomial(), wts=fweights(w)) + glm6 = glm(x, view(y, :), Binomial(), wts=fweights(w)) + glm7 = glm(view(x, :, :), y, Binomial(), wts=fweights(w)) + glm8 = glm(view(x, :, :), view(y, :), Binomial(), wts=fweights(w)) + glm9 = glm(x, y, Binomial(), wts=fweights(view(w, :))) + glm10 = glm(x, view(y, :), Binomial(), wts=fweights(view(w, :))) + glm11 = glm(view(x, :, :), y, Binomial(), wts=fweights(view(w, :))) + glm12 = glm(view(x, :, :), view(y, :), Binomial(), wts=fweights(view(w, :))) @test coef(glm5) == coef(glm6) == coef(glm7) == coef(glm8) == coef(glm9) == coef(glm10) == coef(glm11) == coef(glm12) end