From 1754cbd9ce7265e5c8426a4a1630f70145a25220 Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Fri, 10 Jun 2022 20:53:44 +0200 Subject: [PATCH 001/106] WIP --- src/scratch.jl | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 src/scratch.jl diff --git a/src/scratch.jl b/src/scratch.jl new file mode 100644 index 00000000..561ecb22 --- /dev/null +++ b/src/scratch.jl @@ -0,0 +1,20 @@ +using GLM +using DataFrames + +y = rand(10) +x = rand(10,2) +wts = rand(10) +df = DataFrame(x, :auto) +df.y = y +df.wts = wts +lm1 = lm(x,y) +lmw = lm(x,y; wts = wts) +lmf = lm(@formula(y~x1+x2), df) +lmfw = lm(@formula(y~x1+x2), df; wts = wts) +glm(x, y) + + +cooksdistance(lm) + + + From 1d778a5b37013fc3333ba7b9f43b5a25ece29f23 Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Wed, 15 Jun 2022 19:04:42 +0200 Subject: [PATCH 002/106] WIP --- src/GLM.jl | 26 +++++++------- src/glmfit.jl | 58 ++++++++++++++++++++---------- src/linpred.jl | 86 ++++++++++++++++++++++++++------------------- src/lm.jl | 77 ++++++++++++++++++++++++---------------- src/scratch.jl | 65 +++++++++++++++++++++++++++++++--- test/runtests.jl | 91 +++++++++++++++++++++++++++++++++++++++++++----- 6 files changed, 294 insertions(+), 109 deletions(-) diff --git a/src/GLM.jl b/src/GLM.jl index 019f80e3..8625a48b 100644 --- a/src/GLM.jl +++ b/src/GLM.jl @@ -12,14 +12,14 @@ module GLM import Statistics: cor import StatsBase: coef, coeftable, confint, deviance, nulldeviance, dof, dof_residual, loglikelihood, nullloglikelihood, nobs, stderror, vcov, residuals, predict, - fitted, fit, model_response, response, modelmatrix, r2, r², adjr2, adjr², PValue + fitted, fit, model_response, response, modelmatrix, r2, r², adjr2, adjr², PValue, weights import StatsFuns: xlogy import SpecialFunctions: erfc, erfcinv, digamma, trigamma import StatsModels: hasintercept export coef, coeftable, confint, deviance, nulldeviance, dof, dof_residual, loglikelihood, nullloglikelihood, nobs, stderror, vcov, residuals, predict, fitted, fit, fit!, model_response, response, modelmatrix, r2, r², adjr2, adjr², - cooksdistance, hasintercept + cooksdistance, hasintercept, weights, AnalyticWeights, ProbabilityWeights, FrequencyWeights export # types @@ -52,17 +52,17 @@ module GLM LinearModel, # functions - canonicallink, # canonical link function for a distribution - deviance, # deviance of fitted and observed responses - devresid, # vector of squared deviance residuals - formula, # extract the formula from a model - glm, # general interface - linpred, # linear predictor - lm, # linear model - negbin, # interface to fitting negative binomial regression - nobs, # total number of observations - predict, # make predictions - ftest # compare models with an F test + canonicallink, # canonical link function for a distribution + deviance, # deviance of fitted and observed responses + devresid, # vector of squared deviance residuals + formula, # extract the formula from a model + glm, # general interface + linpred, # linear predictor + lm, # linear model + negbin, # interface to fitting negative binomial regression + nobs, # total number of observations + predict, # make predictions + ftest # compare models with an F test const FP = AbstractFloat const FPVector{T<:FP} = AbstractArray{T,1} diff --git a/src/glmfit.jl b/src/glmfit.jl index 0c108296..942ffc18 100644 --- a/src/glmfit.jl +++ b/src/glmfit.jl @@ -3,7 +3,7 @@ The response vector and various derived vectors in a generalized linear model. """ -struct GlmResp{V<:FPVector,D<:UnivariateDistribution,L<:Link} <: ModResp +struct GlmResp{V<:FPVector, D<:UnivariateDistribution,L<:Link,W<:AbstractWeights{<:Real}} <: ModResp "`y`: response vector" y::V d::D @@ -18,14 +18,14 @@ struct GlmResp{V<:FPVector,D<:UnivariateDistribution,L<:Link} <: ModResp "`offset:` offset added to `Xβ` to form `eta`. Can be of length 0" offset::V "`wts:` prior case weights. Can be of length 0." - wts::V + wts::W "`wrkwt`: working case weights for the Iteratively Reweighted Least Squares (IRLS) algorithm" wrkwt::V "`wrkresid`: working residuals for IRLS" wrkresid::V end -function GlmResp(y::V, d::D, l::L, η::V, μ::V, off::V, wts::V) where {V<:FPVector, D, L} +function GlmResp(y::V, d::D, l::L, η::V, μ::V, off::V, wts::W) where {V<:FPVector, D, L, W} n = length(y) nη = length(η) nμ = length(μ) @@ -48,14 +48,23 @@ function GlmResp(y::V, d::D, l::L, η::V, μ::V, off::V, wts::V) where {V<:FPVec throw(DimensionMismatch("offset must have length $n or length 0 but was $lo")) end - return GlmResp{V,D,L}(y, d, l, similar(y), η, μ, off, wts, similar(y), similar(y)) + return GlmResp{V,D,L,W}(y, d, l, similar(y), η, μ, off, wts, similar(y), similar(y)) end -function GlmResp(y::FPVector, d::Distribution, l::Link, off::FPVector, wts::FPVector) +function GlmResp(y::FPVector, d::Distribution, l::Link, off::FPVector, wts::AbstractVector{<:Real}) # Instead of convert(Vector{Float64}, y) to be more ForwardDiff friendly _y = convert(Vector{float(eltype(y))}, y) _off = convert(Vector{float(eltype(off))}, off) - _wts = convert(Vector{float(eltype(wts))}, wts) + _wts = if wts === nothing + ## This should be removed - here for allowing + ## passing a vector (deprecated) + aweights(similar(_y, 0)) + elseif isa(wts, AbstractWeights) + wts + elseif isa(wts, AbstractVector) + ## for backward compatibility + fweights(wts) + end η = similar(_y) μ = similar(_y) r = GlmResp(_y, d, l, η, μ, _off, _wts) @@ -64,13 +73,12 @@ function GlmResp(y::FPVector, d::Distribution, l::Link, off::FPVector, wts::FPVe return r end -function GlmResp(y::AbstractVector{<:Real}, d::D, l::L, off::AbstractVector{<:Real}, - wts::AbstractVector{<:Real}) where {D, L} - GlmResp(float(y), d, l, float(off), float(wts)) +function GlmResp(y::AbstractVector{<:Real}, d::D, l::L, off::AbstractVector{<:Real}, wts::AbstractVector{<:Real}) where {D, L} + GlmResp(float(y), d, l, float(off), wts) end deviance(r::GlmResp) = sum(r.devresid) - +weights(r::GlmResp) = r.wts """ cancancel(r::GlmResp{V,D,L}) @@ -374,7 +382,7 @@ function StatsBase.fit!(m::AbstractGLM; if haskey(kwargs, :tol) Base.depwarn("`tol` argument is deprecated, use `atol` and `rtol` instead", :fit!) rtol = kwargs[:tol] - end + end _fit!(m, verbose, maxiter, minstepfac, atol, rtol, start) end @@ -440,12 +448,9 @@ const FIT_GLM_DOC = """ # Keyword Arguments - `dofit::Bool=true`: Determines whether model will be fit - - `wts::Vector=similar(y,0)`: Prior frequency (a.k.a. case) weights of observations. - Such weights are equivalent to repeating each observation a number of times equal - to its weight. Do note that this interpretation gives equal point estimates but - different standard errors from analytical (a.k.a. inverse variance) weights and - from probability (a.k.a. sampling) weights which are the default in some other - software. + - `wts::AbstractWeights=aweights(similar(y,0))`: Weights of observations. + Allowed weights are `AnalyticalWeights`, `FrequencyWeights`, or `ProbabilityWeights`. + If a vector is passed (deprecated) it is coerced to FrequencyWeights. Can be length 0 to indicate no weighting (default). - `offset::Vector=similar(y,0)`: offset added to `Xβ` to form `eta`. Can be of length 0 @@ -476,7 +481,7 @@ function fit(::Type{M}, d::UnivariateDistribution, l::Link = canonicallink(d); dofit::Bool = true, - wts::AbstractVector{<:Real} = similar(y, 0), + wts::Union{AbstractWeights{<:Real}, AbstractVector{<:Real}} = aweights(similar(y, 0)), offset::AbstractVector{<:Real} = similar(y, 0), fitargs...) where {M<:AbstractGLM} @@ -537,6 +542,7 @@ function dispersion(m::AbstractGLM, sqr::Bool=false) end end + """ predict(mm::AbstractGLM, newX::AbstractMatrix; offset::FPVector=eltype(newX)[], interval::Union{Symbol,Nothing}=nothing, level::Real = 0.95, @@ -649,3 +655,19 @@ function checky(y, d::Binomial) end return nothing end + +""" + nobs(obj::LinearModel) + nobs(obj::GLM) + +For linear and generalized linear models, returns the number of rows, or, +when prior weights of type FrequencyWeights are specified, the sum of weights. +""" +nobs(obj::LinPredModel) = nobs(obj.rr) + +nobs(r::LmResp{V,W}) where {V,W} = oftype(sum(one(eltype(r.wts))), length(r.y)) +nobs(r::LmResp{V,W}) where {V,W<:FrequencyWeights} = r.wts.sum + +nobs(r::GlmResp{V,D,L,W}) where {V,D,L,W<:FrequencyWeights} = r.wts.sum +nobs(r::GlmResp{V,D,L,W}) where {V,D,L,W} = oftype(sum(one(eltype(r.wts))), length(r.y)) + diff --git a/src/linpred.jl b/src/linpred.jl index 4274f575..553fb503 100644 --- a/src/linpred.jl +++ b/src/linpred.jl @@ -1,5 +1,5 @@ """ - linpred!(out, p::LinPred, f::Real=1.0) +linpred!(out, p::LinPred, f::Real=1.0) Overwrite `out` with the linear predictor from `p` with factor `f` @@ -11,14 +11,14 @@ function linpred!(out, p::LinPred, f::Real=1.) end """ - linpred(p::LinPred, f::Real=1.0) +linpred(p::LinPred, f::Real=1.0) Return the linear predictor `p.X * (p.beta0 .+ f * p.delbeta)` """ linpred(p::LinPred, f::Real=1.) = linpred!(Vector{eltype(p.X)}(undef, size(p.X, 1)), p, f) """ - installbeta!(p::LinPred, f::Real=1.0) +installbeta!(p::LinPred, f::Real=1.0) Install `pbeta0 .+= f * p.delbeta` and zero out `p.delbeta`. Return the updated `p.beta0`. """ @@ -33,7 +33,7 @@ function installbeta!(p::LinPred, f::Real=1.) end """ - DensePredQR +DensePredQR A `LinPred` type with a dense, unpivoted QR decomposition of `X` @@ -66,7 +66,7 @@ DensePredQR(X::Matrix{T}) where T = DensePredQR{T}(X, zeros(T, size(X,2))) convert(::Type{DensePredQR{T}}, X::Matrix{T}) where {T} = DensePredQR{T}(X, zeros(T, size(X, 2))) """ - delbeta!(p::LinPred, r::Vector) +delbeta!(p::LinPred, r::Vector) Evaluate and return `p.delbeta` the increment to the coefficient vector from residual `r` """ @@ -78,7 +78,7 @@ function delbeta!(p::DensePredQR{T}, r::Vector{T}) where T<:BlasReal end """ - DensePredChol{T} +DensePredChol{T} A `LinPred` type with a dense Cholesky factorization of `X'X` @@ -106,12 +106,12 @@ function DensePredChol(X::AbstractMatrix, pivot::Bool) T = eltype(F) F = pivot ? pivoted_cholesky!(F, tol = -one(T), check = false) : cholesky!(F) DensePredChol(Matrix{T}(X), - zeros(T, size(X, 2)), - zeros(T, size(X, 2)), - zeros(T, size(X, 2)), - F, - similar(X, T), - similar(cholfactors(F))) + zeros(T, size(X, 2)), + zeros(T, size(X, 2)), + zeros(T, size(X, 2)), + F, + similar(X, T), + similar(cholfactors(F))) end cholpred(X::AbstractMatrix, pivot::Bool=false) = DensePredChol(X, pivot) @@ -177,12 +177,12 @@ end function SparsePredChol(X::SparseMatrixCSC{T}) where T chol = cholesky(sparse(I, size(X, 2), size(X,2))) return SparsePredChol{eltype(X),typeof(X),typeof(chol)}(X, - X', - zeros(T, size(X, 2)), - zeros(T, size(X, 2)), - zeros(T, size(X, 2)), - chol, - similar(X)) + X', + zeros(T, size(X, 2)), + zeros(T, size(X, 2)), + zeros(T, size(X, 2)), + chol, + similar(X)) end cholpred(X::SparseMatrixCSC, pivot::Bool=false) = SparsePredChol(X) @@ -220,6 +220,7 @@ function invchol(x::DensePredChol{T,<: CholeskyPivoted}) where T res[ipiv, ipiv] end invchol(x::SparsePredChol) = cholesky!(x) \ Matrix{Float64}(I, size(x.X, 2), size(x.X, 2)) +## For ProbabilityWeights the variance is diferent vcov(x::LinPredModel) = rmul!(invchol(x.pp), dispersion(x, true)) function cor(x::LinPredModel) @@ -235,28 +236,42 @@ function show(io::IO, obj::LinPredModel) end modelframe(obj::LinPredModel) = obj.fr -modelmatrix(obj::LinPredModel) = obj.pp.X + +function modelmatrix(obj::LinPredModel; weighted=false) + wts = weights(obj) + X = obj.pp.X + if !weighted + X + elseif !isempty(wts) + wts_times_X(X, wts) + else + throw(ArgumentError("`weighted=true` allowed only for weighted models.")) + end +end + +function wts_times_X(X::AbstractSparseMatrix, wts::AbstractArray) + Z = copy(X) + rows = rowvals(Z) + vals = nonzeros(Z) + m, n = size(Z) + for j = 1:n + for i in nzrange(Z, j) + r = rows[i] + vals[i] *= sqrt(wts[r]) + end + end + return Z +end + +wts_times_X(X::AbstractMatrix, wts::AbstractArray) = sqrt.(wts).*X + response(obj::LinPredModel) = obj.rr.y fitted(m::LinPredModel) = m.rr.mu predict(mm::LinPredModel) = fitted(mm) StatsModels.formula(obj::LinPredModel) = modelframe(obj).formula -residuals(obj::LinPredModel) = residuals(obj.rr) - -""" - nobs(obj::LinearModel) - nobs(obj::GLM) - -For linear and generalized linear models, returns the number of rows, or, -when prior weights are specified, the sum of weights. -""" -function nobs(obj::LinPredModel) - if isempty(obj.rr.wts) - oftype(sum(one(eltype(obj.rr.wts))), length(obj.rr.y)) - else - sum(obj.rr.wts) - end -end +residuals(obj::LinPredModel; kwarg...) = residuals(obj.rr; kwarg...) +weights(obj::LinPredModel) = weights(obj.rr) coef(x::LinPred) = x.beta0 coef(obj::LinPredModel) = coef(obj.pp) @@ -264,3 +279,4 @@ coef(obj::LinPredModel) = coef(obj.pp) dof_residual(obj::LinPredModel) = nobs(obj) - dof(obj) + 1 hasintercept(m::LinPredModel) = any(i -> all(==(1), view(m.pp.X , :, i)), 1:size(m.pp.X, 2)) + \ No newline at end of file diff --git a/src/lm.jl b/src/lm.jl index ed11f450..994ac895 100644 --- a/src/lm.jl +++ b/src/lm.jl @@ -7,46 +7,48 @@ Encapsulates the response for a linear model - `mu`: current value of the mean response vector or fitted value - `offset`: optional offset added to the linear predictor to form `mu` -- `wts`: optional vector of prior frequency (a.k.a. case) weights for observations +- `wts`: optional weights for observations (AbstractWeights) - `y`: observed response vector Either or both `offset` and `wts` may be of length 0 """ -mutable struct LmResp{V<:FPVector} <: ModResp # response in a linear model +mutable struct LmResp{V<:FPVector, W<:Union{AbstractWeights{<:Real}, AbstractVector{<:Real}}} <: ModResp # response in a linear model mu::V # mean response offset::V # offset added to linear predictor (may have length 0) - wts::V # prior weights (may have length 0) + wts::W # prior weights (may have length 0) y::V # response - function LmResp{V}(mu::V, off::V, wts::V, y::V) where V + function LmResp{V, W}(mu::V, off::V, wts::W, y::V) where {V, W} n = length(y) length(mu) == n || error("mismatched lengths of mu and y") ll = length(off) ll == 0 || ll == n || error("length of offset is $ll, must be $n or 0") ll = length(wts) ll == 0 || ll == n || error("length of wts is $ll, must be $n or 0") - new{V}(mu, off, wts, y) + new{V,W}(mu, off, wts, y) end end -function LmResp(y::AbstractVector{<:Real}, wts::Union{Nothing,AbstractVector{<:Real}}=nothing) +function LmResp(y::AbstractVector{<:Real}, wts::Union{Nothing,AbstractVector{<:Real}, AbstractWeights{<:Real}}=nothing) # Instead of convert(Vector{Float64}, y) to be more ForwardDiff friendly _y = convert(Vector{float(eltype(y))}, y) - _wts = if wts === nothing - similar(_y, 0) + _wts = if wts === nothing + aweights(similar(_y, 0)) + elseif isa(wts, Vector) + fweights(wts) else - convert(Vector{float(eltype(wts))}, wts) + wts end - return LmResp{typeof(_y)}(zero(_y), zero(_y), _wts, _y) + return LmResp{typeof(_y), typeof(_wts)}(zero(_y), zero(_y), _wts, _y) end -function updateμ!(r::LmResp{V}, linPr::V) where V<:FPVector +function updateμ!(r::LmResp{V, W}, linPr::V) where {V<:FPVector, W} n = length(linPr) length(r.y) == n || error("length(linPr) is $n, should be $(length(r.y))") length(r.offset) == 0 ? copyto!(r.mu, linPr) : broadcast!(+, r.mu, linPr, r.offset) deviance(r) end -updateμ!(r::LmResp{V}, linPr) where {V<:FPVector} = updateμ!(r, convert(V, vec(linPr))) +updateμ!(r::LmResp{V, W}, linPr) where {V<:FPVector, W} = updateμ!(r, convert(V, vec(linPr))) function deviance(r::LmResp) y = r.y @@ -97,7 +99,19 @@ function nullloglikelihood(r::LmResp) -n/2 * (log(2π * nulldeviance(r)/n) + 1) end -residuals(r::LmResp) = r.y - r.mu +function residuals(r::LmResp; weighted=false) + wts = weights(r) + res = r.y - r.mu + if !weighted + res + elseif !isempty(wts) + sqrt.(wts).*res + else + throw(ArgumentError("`weighted=true` allowed only for weighted models.")) + end +end + +weights(r::LmResp) = r.wts """ LinearModel @@ -120,7 +134,7 @@ function StatsBase.fit!(obj::LinearModel) if isempty(obj.rr.wts) delbeta!(obj.pp, obj.rr.y) else - delbeta!(obj.pp, obj.rr.y, obj.rr.wts) + delbeta!(obj.pp, obj.rr.y, convert(Vector{eltype(obj.rr.y)}, obj.rr.wts)) end installbeta!(obj.pp) updateμ!(obj.rr, linpred(obj.pp, zero(eltype(obj.rr.y)))) @@ -135,12 +149,15 @@ const FIT_LM_DOC = """ in columns (including if appropriate the intercept), and `y` must be a vector holding values of the dependent variable. - The keyword argument `wts` can be a `Vector` specifying frequency weights for observations. - Such weights are equivalent to repeating each observation a number of times equal - to its weight. Do note that this interpretation gives equal point estimates but - different standard errors from analytical (a.k.a. inverse variance) weights and - from probability (a.k.a. sampling) weights which are the default in some other - software. + The keyword argument `wts` can be an `AbstractWeights` specifying frequency weights for observations. + Weights allowed are: + - `AnalyticaWeights`: describe a non-random relative importance (usually between 0 and 1) + for each observation. + - `FrequencyWeights`: describe the number of times (or frequency) each observation was observed. + - `ProbabilityWeights`: represent the inverse of the sampling probability for each observation, + providing a correction mechanism for under- or over-sampling certain population groups + These weights gives equal point estimates but different standard errors. + If a vector is passed (deprecated), it is coerced to `FrequencyWeights`. `dropcollinear` controls whether or not `lm` accepts a model matrix which is less-than-full rank. If `true` (the default), only the first of each set of @@ -166,6 +183,9 @@ function fit(::Type{LinearModel}, X::AbstractMatrix{<:Real}, y::AbstractVector{< @warn "Positional argument `allowrankdeficient` is deprecated, use keyword " * "argument `dropcollinear` instead. Proceeding with positional argument value: $allowrankdeficient_dep" dropcollinear = allowrankdeficient_dep + end + if isa(wts, Vector) + Base.depwarn("Passing weights as vector is deprecated in favor of explicitely using AnalyticalWeights, ProbabilityWeights, or FrequencyWeights.", :fit) end fit!(LinearModel(LmResp(y, wts), cholpred(X, dropcollinear))) end @@ -206,6 +226,7 @@ nullloglikelihood(obj::LinearModel) = nullloglikelihood(obj.rr) r2(obj::LinearModel) = 1 - deviance(obj)/nulldeviance(obj) + function adjr2(obj::LinearModel) n = nobs(obj) # dof() includes the dispersion parameter @@ -299,19 +320,15 @@ of each data point. Currently only implemented for linear models without weights. """ function StatsBase.cooksdistance(obj::LinearModel) - u = residuals(obj) - mse = dispersion(obj,true) + wts = weights(obj) + u = residuals(obj; weighted=!isempty(wts)) + mse = GLM.dispersion(obj,true) k = dof(obj)-1 d_res = dof_residual(obj) - X = modelmatrix(obj) - XtX = crossmodelmatrix(obj) + X = modelmatrix(obj; weighted=!isempty(wts)) + XtX = crossmodelmatrix(obj; weighted=!isempty(wts)) k == size(X,2) || throw(ArgumentError("Models with collinear terms are not currently supported.")) - wts = obj.rr.wts - if isempty(wts) - hii = diag(X * inv(XtX) * X') - else - throw(ArgumentError("Weighted models are not currently supported.")) - end + hii = diag(X * inv(XtX) * X') D = @. u^2 * (hii / (1 - hii)^2) / (k*mse) return D end diff --git a/src/scratch.jl b/src/scratch.jl index 561ecb22..4f2612b0 100644 --- a/src/scratch.jl +++ b/src/scratch.jl @@ -1,5 +1,10 @@ using GLM using DataFrames +using Random +using CSV +using StatsBase +using RDatasets +Random.seed!(11) y = rand(10) x = rand(10,2) @@ -9,12 +14,64 @@ df.y = y df.wts = wts lm1 = lm(x,y) lmw = lm(x,y; wts = wts) -lmf = lm(@formula(y~x1+x2), df) -lmfw = lm(@formula(y~x1+x2), df; wts = wts) -glm(x, y) +lmf = lm(@formula(y~x1+x2-1), df) +lmfw = lm(@formula(y~-1+x1+x2), df; wts = aweights(wts)) +lmfw = lm(@formula(y~-1+x1+x2), df; wts = pweights(wts)) +lmfw = lm(@formula(y~-1+x1+x2), df; wts = fweights(wts)) +glm(@formula(y~-1+x1+x2), df, Normal, IdentityLink; wts = fweights(wts)) -cooksdistance(lm) +cooksdistance(lm1) +df = dataset("quantreg", "engel") +N = nrow(df) +df.weights = repeat(1:5, Int(N/5)) +f = @formula(FoodExp ~ Income) +lm_model = lm(f, df, wts = FrequencyWeights(df.weights)) +glm_model = glm(f, df, Normal(), wts = FrequencyWeights(df.weights)) +@test isapprox(coef(lm_model), [154.35104595140706, 0.4836896390157505]) +@test isapprox(coef(glm_model), [154.35104595140706, 0.4836896390157505]) +@test isapprox(stderror(lm_model), [9.382302620120193, 0.00816741377772968]) +@test isapprox(r2(lm_model), 0.8330258148644486) +@test isapprox(adjr2(lm_model), 0.832788298242634) +@test isapprox(vcov(lm_model), [88.02760245551447 -0.06772589439264813; + -0.06772589439264813 6.670664781664879e-5]) +@test isapprox(first(predict(lm_model)), 357.57694841780994) +@test isapprox(loglikelihood(lm_model), -4353.946729075838) +@test isapprox(loglikelihood(glm_model), -4353.946729075838) +@test isapprox(nullloglikelihood(lm_model), -4984.892139711452) +@test isapprox(mean(residuals(lm_model)), -5.412966629787718) + +lm_model = lm(f, df, wts = df.weights) +glm_model = glm(f, df, Normal(), wts = df.weights) +@test isapprox(coef(lm_model), [154.35104595140706, 0.4836896390157505]) +@test isapprox(coef(glm_model), [154.35104595140706, 0.4836896390157505]) +@test isapprox(stderror(lm_model), [9.382302620120193, 0.00816741377772968]) +@test isapprox(r2(lm_model), 0.8330258148644486) +@test isapprox(adjr2(lm_model), 0.832788298242634) +@test isapprox(vcov(lm_model), [88.02760245551447 -0.06772589439264813; + -0.06772589439264813 6.670664781664879e-5]) +@test isapprox(first(predict(lm_model)), 357.57694841780994) +@test isapprox(loglikelihood(lm_model), -4353.946729075838) +@test isapprox(loglikelihood(glm_model), -4353.946729075838) +@test isapprox(nullloglikelihood(lm_model), -4984.892139711452) +@test isapprox(mean(residuals(lm_model)), -5.412966629787718) + + + +lm_model = lm(f, df, wts = aweights(df.weights)) +glm_model = glm(f, df, Normal(), wts = aweights(df.weights)) +@test isapprox(coef(lm_model), [154.35104595140706, 0.4836896390157505]) +@test isapprox(coef(glm_model), [154.35104595140706, 0.4836896390157505]) +@test isapprox(stderror(lm_model), [16.297055281313032, 0.014186793927918842]) +@test isapprox(r2(lm_model), 0.8330258148644486) +@test isapprox(adjr2(lm_model), 0.8323091874604334) +@test isapprox(vcov(lm_model), [265.59401084217296 -0.20434035947652907; + -0.20434035947652907 0.00020126512195323495]) +@test isapprox(first(predict(lm_model)), 357.57694841780994) +@test isapprox(loglikelihood(lm_model), -4353.946729075838) +@test isapprox(loglikelihood(glm_model), -4353.946729075838) +@test isapprox(nullloglikelihood(lm_model), -4984.892139711452) +@test isapprox(mean(residuals(lm_model)), -5.412966629787718) \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index e38c10f4..90bd33a0 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -83,12 +83,13 @@ end end @testset "linear model with weights" begin + df = dataset("quantreg", "engel") N = nrow(df) df.weights = repeat(1:5, Int(N/5)) f = @formula(FoodExp ~ Income) - lm_model = lm(f, df, wts = df.weights) - glm_model = glm(f, df, Normal(), wts = df.weights) + lm_model = lm(f, df, wts = FrequencyWeights(df.weights)) + glm_model = glm(f, df, Normal(), wts = FrequencyWeights(df.weights)) @test isapprox(coef(lm_model), [154.35104595140706, 0.4836896390157505]) @test isapprox(coef(glm_model), [154.35104595140706, 0.4836896390157505]) @test isapprox(stderror(lm_model), [9.382302620120193, 0.00816741377772968]) @@ -101,6 +102,29 @@ end @test isapprox(loglikelihood(glm_model), -4353.946729075838) @test isapprox(nullloglikelihood(lm_model), -4984.892139711452) @test isapprox(mean(residuals(lm_model)), -5.412966629787718) + + lm_model = lm(f, df, wts = df.weights) + glm_model = glm(f, df, Normal(), wts = df.weights) + @test isa(weights(lm_model), FrequencyWeights) + @test isa(weights(glm_model), FrequencyWeights) + + + + + lm_model = lm(f, df, wts = aweights(df.weights)) + glm_model = glm(f, df, Normal(), wts = aweights(df.weights)) + @test isapprox(coef(lm_model), [154.35104595140706, 0.4836896390157505]) + @test isapprox(coef(glm_model), [154.35104595140706, 0.4836896390157505]) + @test isapprox(stderror(lm_model), [16.297055281313032, 0.014186793927918842]) + @test isapprox(r2(lm_model), 0.8330258148644486) + @test isapprox(adjr2(lm_model), 0.8323091874604334) + @test isapprox(vcov(lm_model), [265.59401084217296 -0.20434035947652907; + -0.20434035947652907 0.00020126512195323495]) + @test isapprox(first(predict(lm_model)), 357.57694841780994) + @test isapprox(loglikelihood(lm_model), -4353.946729075838) + @test isapprox(loglikelihood(glm_model), -4353.946729075838) + @test isapprox(nullloglikelihood(lm_model), -4984.892139711452) + @test isapprox(mean(residuals(lm_model)), -5.412966629787718) end @testset "rankdeficient" begin @@ -128,8 +152,9 @@ end @test all(isnan, hcat(coeftable(m2p).cols[2:end]...)[7,:]) m2p_dep_pos = fit(LinearModel, Xmissingcell, ymissingcell, true) - @test_logs (:warn, "Positional argument `allowrankdeficient` is deprecated, use keyword " * - "argument `dropcollinear` instead. Proceeding with positional argument value: true") fit(LinearModel, Xmissingcell, ymissingcell, true) + @test_logs (:warn, "Positional argument `allowrankdeficient` is deprecated, use keyword " * "argument `dropcollinear` instead. Proceeding with positional argument value: true") (:warn, "Passing weights as vector is deprecated in favor of explicitely using " * "AnalyticalWeights, ProbabilityWeights, or FrequencyWeights.") fit(LinearModel, Xmissingcell, ymissingcell, true) + # @test_logs (:warn, "Positional argument `allowrankdeficient` is deprecated, use keyword " * + # "argument `dropcollinear` instead. Proceeding with positional argument value: true") fit(LinearModel, Xmissingcell, ymissingcell, true) @test isa(m2p_dep_pos.pp.chol, CholeskyPivoted) @test rank(m2p_dep_pos.pp.chol) == rank(m2p.pp.chol) @test isapprox(deviance(m2p_dep_pos), deviance(m2p)) @@ -407,10 +432,10 @@ admit_agr = DataFrame(count = [28., 97, 93, 55, 33, 54, 28, 12], admit = repeat([false, true], inner=[4]), rank = categorical(repeat(1:4, outer=2))) -@testset "Aggregated Binomial LogitLink" begin +@testset "Aggregated Binomial LogitLink (FrequencyWeights)" begin for distr in (Binomial, Bernoulli) gm14 = fit(GeneralizedLinearModel, @formula(admit ~ 1 + rank), admit_agr, distr(), - wts=Array(admit_agr.count)) + wts=fweights(admit_agr.count)) @test dof(gm14) == 4 @test nobs(gm14) == 400 @test isapprox(deviance(gm14), 474.9667184280627) @@ -421,8 +446,25 @@ admit_agr = DataFrame(count = [28., 97, 93, 55, 33, 54, 28, 12], @test isapprox(coef(gm14), [0.164303051291, -0.7500299832, -1.36469792994, -1.68672866457], atol=1e-5) end + end +@testset "Aggregated Binomial LogitLink (AnalyticWeights)" begin + for distr in (Binomial, Bernoulli) + gm14 = fit(GeneralizedLinearModel, @formula(admit ~ 1 + rank), admit_agr, distr(), + wts=aweights(admit_agr.count)) + @test dof(gm14) == 4 + @test nobs(gm14) == 8 + @test isapprox(deviance(gm14), 474.9667184280627) + @test isapprox(loglikelihood(gm14), -237.48335921403134) + @test isapprox(aic(gm14), 482.96671842822883) + @test isapprox(aicc(gm14), 496.3000517613874) + @test isapprox(bic(gm14), 483.28448459477346) + @test isapprox(coef(gm14), + [0.164303051291, -0.7500299832, -1.36469792994, -1.68672866457], atol=1e-5) + end + +end # Logistic regression using aggregated data with proportions of successes and weights admit_agr2 = DataFrame(Any[[61., 151, 121, 67], [33., 54, 28, 12], categorical(1:4)], [:count, :admit, :rank]) @@ -431,7 +473,7 @@ admit_agr2.p = admit_agr2.admit ./ admit_agr2.count ## The model matrix here is singular so tests like the deviance are just round off error @testset "Binomial LogitLink aggregated" begin gm15 = fit(GeneralizedLinearModel, @formula(p ~ rank), admit_agr2, Binomial(), - wts=admit_agr2.count) + wts=fweights(admit_agr2.count)) test_show(gm15) @test dof(gm15) == 4 @test nobs(gm15) == 400 @@ -446,7 +488,7 @@ end # Weighted Gamma example (weights are totally made up) @testset "Gamma InverseLink Weights" begin gm16 = fit(GeneralizedLinearModel, @formula(lot1 ~ 1 + u), clotting, Gamma(), - wts=[1.5,2.0,1.1,4.5,2.4,3.5,5.6,5.4,6.7]) + wts=fweights([1.5,2.0,1.1,4.5,2.4,3.5,5.6,5.4,6.7])) test_show(gm16) @test dof(gm16) == 3 @test nobs(gm16) == 32.7 @@ -461,7 +503,7 @@ end # Weighted Poisson example (weights are totally made up) @testset "Poisson LogLink Weights" begin gm17 = fit(GeneralizedLinearModel, @formula(Counts ~ Outcome + Treatment), dobson, Poisson(), - wts = [1.5,2.0,1.1,4.5,2.4,3.5,5.6,5.4,6.7]) + wts = fweights([1.5,2.0,1.1,4.5,2.4,3.5,5.6,5.4,6.7])) test_show(gm17) @test dof(gm17) == 5 @test isapprox(deviance(gm17), 17.699857821414266) @@ -618,6 +660,37 @@ end end end +@testset "Sparse LM (weighted)" begin + rng = StableRNG(1) + X = sprand(rng, 1000, 10, 0.01) + β = randn(rng, 10) + y = Bool[rand(rng) < logistic(x) for x in X * β] + wts = rand(1000) + gmsparsev = [fit(LinearModel, X, y; wts=fweights(wts)), + fit(LinearModel, X, sparse(y); wts=fweights(wts)), + fit(LinearModel, Matrix(X), sparse(y); wts=fweights(wts))] + gmdense = fit(LinearModel, Matrix(X), y; wts=fweights(wts)) + + for gmsparse in gmsparsev + @test isapprox(deviance(gmsparse), deviance(gmdense)) + @test isapprox(coef(gmsparse), coef(gmdense)) + @test isapprox(vcov(gmsparse), vcov(gmdense)) + @test isapprox(Matrix(modelmatrix(gmsparse; weighted=true)), modelmatrix(gmdense; weighted=true)) + end + + gmsparsev = [fit(LinearModel, X, y; wts=aweights(wts)), + fit(LinearModel, X, sparse(y); wts=aweights(wts)), + fit(LinearModel, Matrix(X), sparse(y); wts=aweights(wts))] + gmdense = fit(LinearModel, Matrix(X), y; wts=aweights(wts)) + + for gmsparse in gmsparsev + @test isapprox(deviance(gmsparse), deviance(gmdense)) + @test isapprox(coef(gmsparse), coef(gmdense)) + @test isapprox(vcov(gmsparse), vcov(gmdense)) + @test isapprox(Matrix(modelmatrix(gmsparse; weighted=true)), modelmatrix(gmdense; weighted=true)) + end +end + @testset "Predict" begin rng = StableRNG(123) X = rand(rng, 10, 2) From 12121a31b2a3642f065f2e0e152a2de3ef8bd30a Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Wed, 15 Jun 2022 19:20:30 +0200 Subject: [PATCH 003/106] WIP --- src/glmfit.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/glmfit.jl b/src/glmfit.jl index 942ffc18..e9ad3f6c 100644 --- a/src/glmfit.jl +++ b/src/glmfit.jl @@ -666,8 +666,8 @@ when prior weights of type FrequencyWeights are specified, the sum of weights. nobs(obj::LinPredModel) = nobs(obj.rr) nobs(r::LmResp{V,W}) where {V,W} = oftype(sum(one(eltype(r.wts))), length(r.y)) -nobs(r::LmResp{V,W}) where {V,W<:FrequencyWeights} = r.wts.sum +nobs(r::LmResp{V,W}) where {V,W<:FrequencyWeights} = isempty(r.wts) ? oftype(sum(one(eltype(r.wts))), length(r.y)) : r.wts.sum -nobs(r::GlmResp{V,D,L,W}) where {V,D,L,W<:FrequencyWeights} = r.wts.sum +nobs(r::GlmResp{V,D,L,W}) where {V,D,L,W<:FrequencyWeights} = isempty(r.wts) ? oftype(sum(one(eltype(r.wts))), length(r.y)) : r.wts.sum nobs(r::GlmResp{V,D,L,W}) where {V,D,L,W} = oftype(sum(one(eltype(r.wts))), length(r.y)) From 4363ba49364a080d3e388698cf50f6a7d8f995fb Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Fri, 17 Jun 2022 15:48:42 +0200 Subject: [PATCH 004/106] Taking weights seriously --- src/linpred.jl | 104 +++++++++++++++++++++++++------------------------ src/lm.jl | 23 ++++++----- 2 files changed, 67 insertions(+), 60 deletions(-) diff --git a/src/linpred.jl b/src/linpred.jl index 553fb503..972489e1 100644 --- a/src/linpred.jl +++ b/src/linpred.jl @@ -45,24 +45,33 @@ A `LinPred` type with a dense, unpivoted QR decomposition of `X` - `scratchbeta`: scratch vector of length `p`, used in `linpred!` method - `qr`: a `QRCompactWY` object created from `X`, with optional row weights. """ -mutable struct DensePredQR{T<:BlasReal} <: DensePred +mutable struct DensePredQR{T<:BlasReal, W<:AbstractVector{<:Real}} <: DensePred X::Matrix{T} # model matrix + Xw::Matrix{T} # weighted model matrix beta0::Vector{T} # base coefficient vector delbeta::Vector{T} # coefficient increment scratchbeta::Vector{T} qr::QRCompactWY{T} - function DensePredQR{T}(X::Matrix{T}, beta0::Vector{T}) where T + wts::W + function DensePredQR{T}(X::Matrix{T}, beta0::Vector{T}, wts::W) where {T,W<:AbstractWeights{<:Real}} n, p = size(X) length(beta0) == p || throw(DimensionMismatch("length(β0) ≠ size(X,2)")) - new{T}(X, beta0, zeros(T,p), zeros(T,p), qr(X)) + (length(wts) == n || isempty(wts)) || throw(DimensionMismatch("Lenght of weights does not match the dimension of X")) + Xw = isempty(_wt) ? Matrix{T}(undef, 0, 0) : sqrt.(wts).*X + qrX = isempty(_wts) ? qr(X) : qr(Xw) + new{T,W}(X, Xw, beta0, zeros(T,p), zeros(T,p), qrX, wts) end - function DensePredQR{T}(X::Matrix{T}) where T + function DensePredQR{T}(X::Matrix{T}, wts::W) where {T,W} n, p = size(X) - new{T}(X, zeros(T, p), zeros(T,p), zeros(T,p), qr(X)) + DensePredQR(X, zeros(T, p), wts) + end + function DensePredQR(X::Matrix{T}) where T + n, p = size(X) + DensePredQR{T}(X, zeros(T, p), uweights(0)) end end -DensePredQR(X::Matrix, beta0::Vector) = DensePredQR{eltype(X)}(X, beta0) -DensePredQR(X::Matrix{T}) where T = DensePredQR{T}(X, zeros(T, size(X,2))) +DensePredQR(X::Matrix, beta0::Vector, wts::AbstractVector) = DensePredQR{eltype(X)}(X, beta0, wts) +DensePredQR(X::Matrix{T}, wts::AbstractVector) where T = DensePredQR{T}(X, zeros(T, size(X,2)), wts) convert(::Type{DensePredQR{T}}, X::Matrix{T}) where {T} = DensePredQR{T}(X, zeros(T, size(X, 2))) """ @@ -92,29 +101,34 @@ A `LinPred` type with a dense Cholesky factorization of `X'X` - `scratchm1`: scratch Matrix{T} of the same size as `X` - `scratchm2`: scratch Matrix{T} os the same size as `X'X` """ -mutable struct DensePredChol{T<:BlasReal,C} <: DensePred +mutable struct DensePredChol{T<:BlasReal,W<:AbstractVector{<:Real},C} <: DensePred X::Matrix{T} # model matrix + Xw::Matrix{T} # weighted model matrix beta0::Vector{T} # base vector for coefficients delbeta::Vector{T} # coefficient increment scratchbeta::Vector{T} - chol::C + wts::W + chol::C scratchm1::Matrix{T} scratchm2::Matrix{T} end -function DensePredChol(X::AbstractMatrix, pivot::Bool) - F = Hermitian(float(X'X)) +function DensePredChol(X::AbstractMatrix, pivot::Bool, wts::AbstractWeights{<:Real}=uweights(0)) + Xw = isempty(wts) ? Matrix{eltype(X)}(undef, 0, 0) : sqrt.(wts).*X + F = isempty(wts) ? Hermitian(float(X'X)) : Hermitian(float(Xw'Xw)) T = eltype(F) F = pivot ? pivoted_cholesky!(F, tol = -one(T), check = false) : cholesky!(F) DensePredChol(Matrix{T}(X), + Matrix{T}(Xw), zeros(T, size(X, 2)), zeros(T, size(X, 2)), zeros(T, size(X, 2)), + wts, F, similar(X, T), similar(cholfactors(F))) end -cholpred(X::AbstractMatrix, pivot::Bool=false) = DensePredChol(X, pivot) +cholpred(X::AbstractMatrix, pivot::Bool, wts) = DensePredChol(X, pivot, wts) cholfactors(c::Union{Cholesky,CholeskyPivoted}) = c.factors cholesky!(p::DensePredChol{T}) where {T<:FP} = p.chol @@ -131,9 +145,10 @@ function delbeta!(p::DensePredChol{T,<:Cholesky}, r::Vector{T}) where T<:BlasRea p end -function delbeta!(p::DensePredChol{T,<:CholeskyPivoted}, r::Vector{T}) where T<:BlasReal +function delbeta!(p::DensePredChol{T,<:AbstractWeights,<:CholeskyPivoted}, r::Vector{T}) where T<:BlasReal ch = p.chol - delbeta = mul!(p.delbeta, adjoint(p.X), r) + Z = isempty(p.wts) ? p.X : p.Xw + delbeta = mul!(p.delbeta, adjoint(Z), r) rnk = rank(ch) if rnk == length(delbeta) ldiv!(ch, delbeta) @@ -148,36 +163,43 @@ function delbeta!(p::DensePredChol{T,<:CholeskyPivoted}, r::Vector{T}) where T<: p end -function delbeta!(p::DensePredChol{T,<:Cholesky}, r::Vector{T}, wt::Vector{T}) where T<:BlasReal - scr = mul!(p.scratchm1, Diagonal(wt), p.X) - cholesky!(Hermitian(mul!(cholfactors(p.chol), transpose(scr), p.X), :U)) +function delbeta!(p::DensePredChol{T,<:AbstractWeights,<:Cholesky}, r::Vector{T}, wt::Vector{T}) where T<:BlasReal + Z = isempty(p.wts) ? X : Xw + scr = mul!(p.scratchm1, Diagonal(wt), Z) + cholesky!(Hermitian(mul!(cholfactors(p.chol), transpose(scr), Z), :U)) mul!(p.delbeta, transpose(scr), r) ldiv!(p.chol, p.delbeta) p end -function delbeta!(p::DensePredChol{T,<:CholeskyPivoted}, r::Vector{T}, wt::Vector{T}) where T<:BlasReal +function delbeta!(p::DensePredChol{T,<:AbstractWeights,<:CholeskyPivoted}, r::Vector{T}, wt::Vector{T}) where T<:BlasReal + Z = isempty(p.wts) ? p.X : p.Xw cf = cholfactors(p.chol) piv = p.chol.p - cf .= mul!(p.scratchm2, adjoint(LinearAlgebra.mul!(p.scratchm1, Diagonal(wt), p.X)), p.X)[piv, piv] + cf .= mul!(p.scratchm2, adjoint(LinearAlgebra.mul!(p.scratchm1, Diagonal(wt), Z)), Z)[piv, piv] cholesky!(Hermitian(cf, Symbol(p.chol.uplo))) ldiv!(p.chol, mul!(p.delbeta, transpose(p.scratchm1), r)) p end -mutable struct SparsePredChol{T,M<:SparseMatrixCSC,C} <: GLM.LinPred +mutable struct SparsePredChol{T,W<:AbstractWeights{<:Real},M<:SparseMatrixCSC,C} <: GLM.LinPred X::M # model matrix + Xw::M # weighted model matrix Xt::M # X' beta0::Vector{T} # base vector for coefficients delbeta::Vector{T} # coefficient increment scratchbeta::Vector{T} + wts::W chol::C scratch::M end -function SparsePredChol(X::SparseMatrixCSC{T}) where T +function SparsePredChol(X::SparseMatrixCSC{T}, wts::AbstractVector) where T chol = cholesky(sparse(I, size(X, 2), size(X,2))) + sqrtwts = sqrt.(wts) + Xw = isempty(wts) ? SparseMatrixCSC(I, 0, 0) : sqrtwts.*X return SparsePredChol{eltype(X),typeof(X),typeof(chol)}(X, - X', + Xw, + isempty(wts) ? X' : Xw', zeros(T, size(X, 2)), zeros(T, size(X, 2)), zeros(T, size(X, 2)), @@ -185,13 +207,14 @@ function SparsePredChol(X::SparseMatrixCSC{T}) where T similar(X)) end -cholpred(X::SparseMatrixCSC, pivot::Bool=false) = SparsePredChol(X) +cholpred(X::SparseMatrixCSC, pivot::Bool=false, wts::AbstractVector=uweights(0)) = SparsePredChol(X, wts) function delbeta!(p::SparsePredChol{T}, r::Vector{T}, wt::Vector{T}) where T - scr = mul!(p.scratch, Diagonal(wt), p.X) - XtWX = p.Xt*scr + Z = isempty(p.wts) ? X : Xw + #scr = mul!(p.scratch, Diagonal(wt), Z) + XtWX = p.Xt*Z c = p.chol = cholesky(Symmetric{eltype(XtWX),typeof(XtWX)}(XtWX, 'L')) - p.delbeta = c \ mul!(p.delbeta, adjoint(scr), r) + p.delbeta = c \ mul!(p.delbeta, adjoint(Z), r) end function delbeta!(p::SparsePredChol{T}, r::Vector{T}) where T @@ -237,34 +260,16 @@ end modelframe(obj::LinPredModel) = obj.fr -function modelmatrix(obj::LinPredModel; weighted=false) - wts = weights(obj) - X = obj.pp.X - if !weighted - X - elseif !isempty(wts) - wts_times_X(X, wts) +function modelmatrix(obj::LinPredModel; weighted=false) + if !weighted + obj.pp.X + elseif !isempty(weights(obj)) + obj.pp.Xw else throw(ArgumentError("`weighted=true` allowed only for weighted models.")) end end -function wts_times_X(X::AbstractSparseMatrix, wts::AbstractArray) - Z = copy(X) - rows = rowvals(Z) - vals = nonzeros(Z) - m, n = size(Z) - for j = 1:n - for i in nzrange(Z, j) - r = rows[i] - vals[i] *= sqrt(wts[r]) - end - end - return Z -end - -wts_times_X(X::AbstractMatrix, wts::AbstractArray) = sqrt.(wts).*X - response(obj::LinPredModel) = obj.rr.y fitted(m::LinPredModel) = m.rr.mu @@ -279,4 +284,3 @@ coef(obj::LinPredModel) = coef(obj.pp) dof_residual(obj::LinPredModel) = nobs(obj) - dof(obj) + 1 hasintercept(m::LinPredModel) = any(i -> all(==(1), view(m.pp.X , :, i)), 1:size(m.pp.X, 2)) - \ No newline at end of file diff --git a/src/lm.jl b/src/lm.jl index 994ac895..89ba41c8 100644 --- a/src/lm.jl +++ b/src/lm.jl @@ -30,15 +30,8 @@ end function LmResp(y::AbstractVector{<:Real}, wts::Union{Nothing,AbstractVector{<:Real}, AbstractWeights{<:Real}}=nothing) # Instead of convert(Vector{Float64}, y) to be more ForwardDiff friendly - _y = convert(Vector{float(eltype(y))}, y) - _wts = if wts === nothing - aweights(similar(_y, 0)) - elseif isa(wts, Vector) - fweights(wts) - else - wts - end - return LmResp{typeof(_y), typeof(_wts)}(zero(_y), zero(_y), _wts, _y) + _y = convert(Vector{float(eltype(y))}, y) + return LmResp{typeof(_y), typeof(wts)}(zero(_y), zero(_y), wts, _y) end function updateμ!(r::LmResp{V, W}, linPr::V) where {V<:FPVector, W} @@ -187,7 +180,17 @@ function fit(::Type{LinearModel}, X::AbstractMatrix{<:Real}, y::AbstractVector{< if isa(wts, Vector) Base.depwarn("Passing weights as vector is deprecated in favor of explicitely using AnalyticalWeights, ProbabilityWeights, or FrequencyWeights.", :fit) end - fit!(LinearModel(LmResp(y, wts), cholpred(X, dropcollinear))) + _wts = if wts === nothing + uweights(0) + elseif isa(wts, AbstractWeights) + wts + elseif isa(wts, AbstractVector) + fweights(wts) + else + throw(ArgumentError("`wts` should be an AbstractVector coercible to an AbstractWeights")) + end + + fit!(LinearModel(LmResp(y, _wts), cholpred(X, dropcollinear, _wts))) end """ From ca702dcda769bf137ebcf1b86a9c0501cc75e49d Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Sat, 18 Jun 2022 13:07:07 +0200 Subject: [PATCH 005/106] WIP --- src/glmfit.jl | 4 ++-- src/linpred.jl | 52 ++++++++++++++++++++++++++++++++++++++------------ src/lm.jl | 12 +++--------- 3 files changed, 45 insertions(+), 23 deletions(-) diff --git a/src/glmfit.jl b/src/glmfit.jl index e9ad3f6c..760dcb1e 100644 --- a/src/glmfit.jl +++ b/src/glmfit.jl @@ -481,7 +481,7 @@ function fit(::Type{M}, d::UnivariateDistribution, l::Link = canonicallink(d); dofit::Bool = true, - wts::Union{AbstractWeights{<:Real}, AbstractVector{<:Real}} = aweights(similar(y, 0)), + wts::AbstractWeights{<:Real}, offset::AbstractVector{<:Real} = similar(y, 0), fitargs...) where {M<:AbstractGLM} @@ -491,7 +491,7 @@ function fit(::Type{M}, end rr = GlmResp(y, d, l, offset, wts) - res = M(rr, cholpred(X), false) + res = M(rr, cholpred(X, false, wts), false) return dofit ? fit!(res; fitargs...) : res end diff --git a/src/linpred.jl b/src/linpred.jl index 972489e1..b74d4b78 100644 --- a/src/linpred.jl +++ b/src/linpred.jl @@ -112,7 +112,7 @@ mutable struct DensePredChol{T<:BlasReal,W<:AbstractVector{<:Real},C} <: DensePr scratchm1::Matrix{T} scratchm2::Matrix{T} end -function DensePredChol(X::AbstractMatrix, pivot::Bool, wts::AbstractWeights{<:Real}=uweights(0)) +function DensePredChol(X::AbstractMatrix, pivot::Bool, wts::AbstractWeights{<:Real}) Xw = isempty(wts) ? Matrix{eltype(X)}(undef, 0, 0) : sqrt.(wts).*X F = isempty(wts) ? Hermitian(float(X'X)) : Hermitian(float(Xw'Xw)) T = eltype(F) @@ -128,7 +128,8 @@ function DensePredChol(X::AbstractMatrix, pivot::Bool, wts::AbstractWeights{<:Re similar(cholfactors(F))) end -cholpred(X::AbstractMatrix, pivot::Bool, wts) = DensePredChol(X, pivot, wts) +cholpred(X::AbstractMatrix, pivot::Bool, wts::AbstractWeights) = DensePredChol(X, pivot, wts) +cholpred(X::AbstractMatrix, pivot::Bool=false) = DensePredChol(X, pivot, uweights(0)) cholfactors(c::Union{Cholesky,CholeskyPivoted}) = c.factors cholesky!(p::DensePredChol{T}) where {T<:FP} = p.chol @@ -163,22 +164,21 @@ function delbeta!(p::DensePredChol{T,<:AbstractWeights,<:CholeskyPivoted}, r::Ve p end -function delbeta!(p::DensePredChol{T,<:AbstractWeights,<:Cholesky}, r::Vector{T}, wt::Vector{T}) where T<:BlasReal - Z = isempty(p.wts) ? X : Xw - scr = mul!(p.scratchm1, Diagonal(wt), Z) - cholesky!(Hermitian(mul!(cholfactors(p.chol), transpose(scr), Z), :U)) - mul!(p.delbeta, transpose(scr), r) +function delbeta!(p::DensePredChol{T,<:AbstractWeights,<:Cholesky}, r::Vector{T}) where T<:BlasReal + Z = isempty(p.wts) ? p.X : p.Xw + cholesky!(Hermitian(mul!(cholfactors(p.chol), transpose(Z), Z), :U)) + mul!(p.delbeta, transpose(Z), r) ldiv!(p.chol, p.delbeta) p end -function delbeta!(p::DensePredChol{T,<:AbstractWeights,<:CholeskyPivoted}, r::Vector{T}, wt::Vector{T}) where T<:BlasReal +function delbeta!(p::DensePredChol{T,<:AbstractWeights,<:CholeskyPivoted}, r::Vector{T}) where T<:BlasReal Z = isempty(p.wts) ? p.X : p.Xw cf = cholfactors(p.chol) piv = p.chol.p - cf .= mul!(p.scratchm2, adjoint(LinearAlgebra.mul!(p.scratchm1, Diagonal(wt), Z)), Z)[piv, piv] + cf .= mul!(p.scratchm2, adjoint(Z), Z)[piv, piv] cholesky!(Hermitian(cf, Symbol(p.chol.uplo))) - ldiv!(p.chol, mul!(p.delbeta, transpose(p.scratchm1), r)) + ldiv!(p.chol, mul!(p.delbeta, transpose(Z), r)) p end @@ -243,8 +243,36 @@ function invchol(x::DensePredChol{T,<: CholeskyPivoted}) where T res[ipiv, ipiv] end invchol(x::SparsePredChol) = cholesky!(x) \ Matrix{Float64}(I, size(x.X, 2), size(x.X, 2)) -## For ProbabilityWeights the variance is diferent -vcov(x::LinPredModel) = rmul!(invchol(x.pp), dispersion(x, true)) + +function vcov(x::LinPredModel) + d = dispersion(x, true) + B = _covm(x.pp) + rmul!(B, dispersion(x, true)) +end + +_covm(pp::DensePredChol{T, W}) where {T,W} = invchol(pp) + +function _covm(pp::DensePredChol{T, <:ProbabilityWeights, <:Cholesky}) where {T} + wts = pp.wts + Z = pp.scratchm1 .= pp.X.*wts + XtW2X = Z'Z + invXtWX = invchol(pp) + invXtWX*XtW2X*invXtWX +end + +function _covm(pp::DensePredChol{T, <:ProbabilityWeights, <:CholeskyPivoted}) where {T} + wts = pp.wts + Z = pp.scratchm1 .= pp.X.*wts + rnk = rank(pp.chol) + p = length(pp.delbeta) + if rnk == p + XtW2X = Z'Z + else + ## no idea + end + invXtWX = invchol(pp) + invXtWX*XtW2X*invXtWX +end function cor(x::LinPredModel) Σ = vcov(x) diff --git a/src/lm.jl b/src/lm.jl index 89ba41c8..32c6a6d7 100644 --- a/src/lm.jl +++ b/src/lm.jl @@ -124,11 +124,7 @@ end LinearAlgebra.cholesky(x::LinearModel) = cholesky(x.pp) function StatsBase.fit!(obj::LinearModel) - if isempty(obj.rr.wts) - delbeta!(obj.pp, obj.rr.y) - else - delbeta!(obj.pp, obj.rr.y, convert(Vector{eltype(obj.rr.y)}, obj.rr.wts)) - end + delbeta!(obj.pp, obj.rr.y) installbeta!(obj.pp) updateμ!(obj.rr, linpred(obj.pp, zero(eltype(obj.rr.y)))) return obj @@ -177,17 +173,15 @@ function fit(::Type{LinearModel}, X::AbstractMatrix{<:Real}, y::AbstractVector{< "argument `dropcollinear` instead. Proceeding with positional argument value: $allowrankdeficient_dep" dropcollinear = allowrankdeficient_dep end - if isa(wts, Vector) - Base.depwarn("Passing weights as vector is deprecated in favor of explicitely using AnalyticalWeights, ProbabilityWeights, or FrequencyWeights.", :fit) - end _wts = if wts === nothing uweights(0) elseif isa(wts, AbstractWeights) wts elseif isa(wts, AbstractVector) + Base.depwarn("Passing weights as vector is deprecated in favor of explicitely using AnalyticalWeights, ProbabilityWeights, or FrequencyWeights", :fit) fweights(wts) else - throw(ArgumentError("`wts` should be an AbstractVector coercible to an AbstractWeights")) + throw(ArgumentError("`wts` should be an AbstractVector coercible to AbstractWeights")) end fit!(LinearModel(LmResp(y, _wts), cholpred(X, dropcollinear, _wts))) From e2b2d1223220bd9641a10e39422314a58de67527 Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Tue, 21 Jun 2022 17:55:01 +0200 Subject: [PATCH 006/106] Taking weights seriously --- src/glmfit.jl | 46 +++++++++--------- src/linpred.jl | 118 ++++++++++++++++++++++++++++++++--------------- src/lm.jl | 21 +++++---- test/runtests.jl | 31 ++++++++----- 4 files changed, 135 insertions(+), 81 deletions(-) diff --git a/src/glmfit.jl b/src/glmfit.jl index 760dcb1e..277c82ad 100644 --- a/src/glmfit.jl +++ b/src/glmfit.jl @@ -41,8 +41,8 @@ function GlmResp(y::V, d::D, l::L, η::V, μ::V, off::V, wts::W) where {V<:FPVec end # Lengths of wts and off can be either n or 0 - if lw != 0 && lw != n - throw(DimensionMismatch("wts must have length $n or length 0 but was $lw")) + if lw != n + throw(DimensionMismatch("wts must have length $n but was $lw")) end if lo != 0 && lo != n throw(DimensionMismatch("offset must have length $n or length 0 but was $lo")) @@ -51,29 +51,19 @@ function GlmResp(y::V, d::D, l::L, η::V, μ::V, off::V, wts::W) where {V<:FPVec return GlmResp{V,D,L,W}(y, d, l, similar(y), η, μ, off, wts, similar(y), similar(y)) end -function GlmResp(y::FPVector, d::Distribution, l::Link, off::FPVector, wts::AbstractVector{<:Real}) +function GlmResp(y::FPVector, d::Distribution, l::Link, off::FPVector, wts::AbstractWeights{<:Real}) # Instead of convert(Vector{Float64}, y) to be more ForwardDiff friendly _y = convert(Vector{float(eltype(y))}, y) _off = convert(Vector{float(eltype(off))}, off) - _wts = if wts === nothing - ## This should be removed - here for allowing - ## passing a vector (deprecated) - aweights(similar(_y, 0)) - elseif isa(wts, AbstractWeights) - wts - elseif isa(wts, AbstractVector) - ## for backward compatibility - fweights(wts) - end η = similar(_y) μ = similar(_y) - r = GlmResp(_y, d, l, η, μ, _off, _wts) - initialeta!(r.eta, d, l, _y, _wts, _off) + r = GlmResp(_y, d, l, η, μ, _off, wts) + initialeta!(r.eta, d, l, _y, wts, _off) updateμ!(r, r.eta) return r end -function GlmResp(y::AbstractVector{<:Real}, d::D, l::L, off::AbstractVector{<:Real}, wts::AbstractVector{<:Real}) where {D, L} +function GlmResp(y::AbstractVector{<:Real}, d::D, l::L, off::AbstractVector{<:Real}, wts::AbstractWeights{<:Real}) where {D, L} GlmResp(float(y), d, l, float(off), wts) end @@ -296,7 +286,7 @@ function _fit!(m::AbstractGLM, verbose::Bool, maxiter::Integer, minstepfac::Real lp = r.mu # Initialize β, μ, and compute deviance - if start == nothing || isempty(start) + if start === nothing || isempty(start) # Compute beta update based on default response value # if no starting values have been passed delbeta!(p, wrkresp(r), r.wrkwt) @@ -481,17 +471,25 @@ function fit(::Type{M}, d::UnivariateDistribution, l::Link = canonicallink(d); dofit::Bool = true, - wts::AbstractWeights{<:Real}, - offset::AbstractVector{<:Real} = similar(y, 0), + wts::AbstractVector{<:Real} = uweights(length(y)), + offset::AbstractVector{<:Real} = similar(y, 0), fitargs...) where {M<:AbstractGLM} - + println("got you") # Check that X and y have the same number of observations if size(X, 1) != size(y, 1) throw(DimensionMismatch("number of rows in X and y must match")) end - - rr = GlmResp(y, d, l, offset, wts) - res = M(rr, cholpred(X, false, wts), false) + # For backward compatibility accept wts as AbstractArray and coerce them to FrequencyWeights + _wts = if isa(wts, AbstractWeights) + wts + elseif isa(wts, AbstractVector) + Base.depwarn("Passing weights as vector is deprecated in favor of explicitely using AnalyticalWeights, ProbabilityWeights, or FrequencyWeights. Proceeding by coercing wts to `FrequencyWeights`", :fit) + fweights(wts) + else + throw(ArgumentError("`wts` should be an AbstractVector coercible to AbstractWeights")) + end + rr = GlmResp(y, d, l, offset, _wts) + res = M(rr, cholpred(X, false, _wts), false) return dofit ? fit!(res; fitargs...) : res end @@ -500,7 +498,7 @@ fit(::Type{M}, y::AbstractVector, d::UnivariateDistribution, l::Link=canonicallink(d); kwargs...) where {M<:AbstractGLM} = - fit(M, float(X), float(y), d, l; kwargs...) + fit(M, float(X), float(y), d, l; kwargs...) """ glm(formula, data, diff --git a/src/linpred.jl b/src/linpred.jl index b74d4b78..3cd46761 100644 --- a/src/linpred.jl +++ b/src/linpred.jl @@ -45,7 +45,7 @@ A `LinPred` type with a dense, unpivoted QR decomposition of `X` - `scratchbeta`: scratch vector of length `p`, used in `linpred!` method - `qr`: a `QRCompactWY` object created from `X`, with optional row weights. """ -mutable struct DensePredQR{T<:BlasReal, W<:AbstractVector{<:Real}} <: DensePred +mutable struct DensePredQR{T<:BlasReal, W<:AbstractWeights{<:Real}} <: DensePred X::Matrix{T} # model matrix Xw::Matrix{T} # weighted model matrix beta0::Vector{T} # base coefficient vector @@ -53,13 +53,14 @@ mutable struct DensePredQR{T<:BlasReal, W<:AbstractVector{<:Real}} <: DensePred scratchbeta::Vector{T} qr::QRCompactWY{T} wts::W + wresponse::Vector{T} function DensePredQR{T}(X::Matrix{T}, beta0::Vector{T}, wts::W) where {T,W<:AbstractWeights{<:Real}} n, p = size(X) length(beta0) == p || throw(DimensionMismatch("length(β0) ≠ size(X,2)")) (length(wts) == n || isempty(wts)) || throw(DimensionMismatch("Lenght of weights does not match the dimension of X")) - Xw = isempty(_wt) ? Matrix{T}(undef, 0, 0) : sqrt.(wts).*X - qrX = isempty(_wts) ? qr(X) : qr(Xw) - new{T,W}(X, Xw, beta0, zeros(T,p), zeros(T,p), qrX, wts) + Xw = wts isa UnitWeights ? Matrix{T}(undef, 0, 0) : sqrt.(wts).*X + qrX = wts isa UnitWeights ? qr(X) : qr(Xw) + new{T,W}(X, Xw, beta0, zeros(T,p), zeros(T,p), qrX, wts, similar(X, T, (size(X,1),) )) end function DensePredQR{T}(X::Matrix{T}, wts::W) where {T,W} n, p = size(X) @@ -70,9 +71,10 @@ mutable struct DensePredQR{T<:BlasReal, W<:AbstractVector{<:Real}} <: DensePred DensePredQR{T}(X, zeros(T, p), uweights(0)) end end +DensePredQR{T}(X::Matrix) where T = DensePredQR{eltype(X)}(X, zeros(T, size(X, 2)), uweights(size(X,1))) DensePredQR(X::Matrix, beta0::Vector, wts::AbstractVector) = DensePredQR{eltype(X)}(X, beta0, wts) DensePredQR(X::Matrix{T}, wts::AbstractVector) where T = DensePredQR{T}(X, zeros(T, size(X,2)), wts) -convert(::Type{DensePredQR{T}}, X::Matrix{T}) where {T} = DensePredQR{T}(X, zeros(T, size(X, 2))) +convert(::Type{DensePredQR{T}}, X::Matrix{T}) where {T} = DensePredQR{T}(X) """ delbeta!(p::LinPred, r::Vector) @@ -99,18 +101,20 @@ A `LinPred` type with a dense Cholesky factorization of `X'X` - `scratchbeta`: scratch vector of length `p`, used in `linpred!` method - `chol`: a `Cholesky` object created from `X'X`, possibly using row weights. - `scratchm1`: scratch Matrix{T} of the same size as `X` -- `scratchm2`: scratch Matrix{T} os the same size as `X'X` +- `scratchm2`: scratch Matrix{T} of the same size as `X'X` +- `scratchv1`: scratch Vector{T} of the same size of `y` """ -mutable struct DensePredChol{T<:BlasReal,W<:AbstractVector{<:Real},C} <: DensePred +mutable struct DensePredChol{T<:BlasReal,C,W<:AbstractVector{<:Real}} <: DensePred X::Matrix{T} # model matrix Xw::Matrix{T} # weighted model matrix beta0::Vector{T} # base vector for coefficients delbeta::Vector{T} # coefficient increment scratchbeta::Vector{T} + chol::C wts::W - chol::C scratchm1::Matrix{T} scratchm2::Matrix{T} + scratchv1::Vector{T} end function DensePredChol(X::AbstractMatrix, pivot::Bool, wts::AbstractWeights{<:Real}) Xw = isempty(wts) ? Matrix{eltype(X)}(undef, 0, 0) : sqrt.(wts).*X @@ -122,14 +126,16 @@ function DensePredChol(X::AbstractMatrix, pivot::Bool, wts::AbstractWeights{<:Re zeros(T, size(X, 2)), zeros(T, size(X, 2)), zeros(T, size(X, 2)), - wts, F, + wts, similar(X, T), - similar(cholfactors(F))) + similar(cholfactors(F)), + similar(X, T, (size(X,1),)) + ) end cholpred(X::AbstractMatrix, pivot::Bool, wts::AbstractWeights) = DensePredChol(X, pivot, wts) -cholpred(X::AbstractMatrix, pivot::Bool=false) = DensePredChol(X, pivot, uweights(0)) +cholpred(X::AbstractMatrix, pivot::Bool=false) = DensePredChol(X, pivot, uweights(size(X,1))) cholfactors(c::Union{Cholesky,CholeskyPivoted}) = c.factors cholesky!(p::DensePredChol{T}) where {T<:FP} = p.chol @@ -141,15 +147,37 @@ function cholesky(p::DensePredChol{T}) where T<:FP end cholesky!(p::DensePredQR{T}) where {T<:FP} = Cholesky{T,typeof(p.X)}(p.qr.R, 'U', 0) -function delbeta!(p::DensePredChol{T,<:Cholesky}, r::Vector{T}) where T<:BlasReal +function delbeta!(p::DensePredChol{T,<:Cholesky, <:UnitWeights}, r::Vector{T}) where T<:BlasReal ldiv!(p.chol, mul!(p.delbeta, transpose(p.X), r)) +end + +function delbeta!(p::DensePredChol{T,<:Cholesky, <:AbstractWeights}, r::Vector{T}) where T<:BlasReal + p.scratchv1 .= r.*sqrt(p.wts) + ldiv!(p.chol, mul!(p.delbeta, transpose(p.Xw), p.scratchv1)) +end + +function delbeta!(p::DensePredChol{T,<:CholeskyPivoted,<:UnitWeights}, r::Vector{T}) where T<:BlasReal + ch = p.chol + delbeta = mul!(p.delbeta, adjoint(p.X), r) + rnk = rank(ch) + if rnk == length(delbeta) + ldiv!(ch, delbeta) + else + permute!(delbeta, ch.p) + for k=(rnk+1):length(delbeta) + delbeta[k] = -zero(T) + end + LAPACK.potrs!(ch.uplo, view(ch.factors, 1:rnk, 1:rnk), view(delbeta, 1:rnk)) + invpermute!(delbeta, ch.p) + end p end -function delbeta!(p::DensePredChol{T,<:AbstractWeights,<:CholeskyPivoted}, r::Vector{T}) where T<:BlasReal +function delbeta!(p::DensePredChol{T,<:CholeskyPivoted,<:AbstractWeights}, r::Vector{T}) where T<:BlasReal ch = p.chol - Z = isempty(p.wts) ? p.X : p.Xw - delbeta = mul!(p.delbeta, adjoint(Z), r) + Z = p.Xw + p.scratchv1 .= r.*sqrt.(p.wts) + delbeta = mul!(p.delbeta, adjoint(p.Xw), p.scratchv1) rnk = rank(ch) if rnk == length(delbeta) ldiv!(ch, delbeta) @@ -164,66 +192,83 @@ function delbeta!(p::DensePredChol{T,<:AbstractWeights,<:CholeskyPivoted}, r::Ve p end -function delbeta!(p::DensePredChol{T,<:AbstractWeights,<:Cholesky}, r::Vector{T}) where T<:BlasReal - Z = isempty(p.wts) ? p.X : p.Xw - cholesky!(Hermitian(mul!(cholfactors(p.chol), transpose(Z), Z), :U)) - mul!(p.delbeta, transpose(Z), r) +function delbeta!(p::DensePredChol{T,<:Cholesky,<:AbstractWeights}, r::Vector{T}, wt::Vector{T}) where T<:BlasReal + p.scratchm1 .= wt.*p.X + cholesky!(Hermitian(mul!(cholfactors(p.chol), transpose(p.scratchm1), p.X), :U)) + mul!(p.delbeta, transpose(p.scratchm1), r) ldiv!(p.chol, p.delbeta) p end -function delbeta!(p::DensePredChol{T,<:AbstractWeights,<:CholeskyPivoted}, r::Vector{T}) where T<:BlasReal - Z = isempty(p.wts) ? p.X : p.Xw +function delbeta!(p::DensePredChol{T,<:CholeskyPivoted,<:AbstractWeights}, r::Vector{T}, wt::Vector{T}) where T<:BlasReal cf = cholfactors(p.chol) piv = p.chol.p - cf .= mul!(p.scratchm2, adjoint(Z), Z)[piv, piv] + p.scratchm1 .= wt.*p.X + cf .= mul!(p.scratchm2, adjoint(p.scratchm1), p.X)[piv, piv] cholesky!(Hermitian(cf, Symbol(p.chol.uplo))) - ldiv!(p.chol, mul!(p.delbeta, transpose(Z), r)) + ldiv!(p.chol, mul!(p.delbeta, transpose(p.scratchm1), r)) p end -mutable struct SparsePredChol{T,W<:AbstractWeights{<:Real},M<:SparseMatrixCSC,C} <: GLM.LinPred +mutable struct SparsePredChol{T,M<:SparseMatrixCSC,C,W<:AbstractWeights{<:Real}} <: GLM.LinPred X::M # model matrix Xw::M # weighted model matrix Xt::M # X' beta0::Vector{T} # base vector for coefficients delbeta::Vector{T} # coefficient increment scratchbeta::Vector{T} - wts::W chol::C scratch::M + wts::W end + function SparsePredChol(X::SparseMatrixCSC{T}, wts::AbstractVector) where T chol = cholesky(sparse(I, size(X, 2), size(X,2))) sqrtwts = sqrt.(wts) Xw = isempty(wts) ? SparseMatrixCSC(I, 0, 0) : sqrtwts.*X - return SparsePredChol{eltype(X),typeof(X),typeof(chol)}(X, + return SparsePredChol{eltype(X),typeof(X),typeof(chol), typeof(wts)}(X, Xw, - isempty(wts) ? X' : Xw', + X', zeros(T, size(X, 2)), zeros(T, size(X, 2)), zeros(T, size(X, 2)), chol, - similar(X)) + similar(X), + wts) end -cholpred(X::SparseMatrixCSC, pivot::Bool=false, wts::AbstractVector=uweights(0)) = SparsePredChol(X, wts) +cholpred(X::SparseMatrixCSC, pivot::Bool=false) = SparsePredChol(X, uweights(size(X,1))) +cholpred(X::SparseMatrixCSC, pivot::Bool, wts::AbstractWeights) = SparsePredChol(X, wts) -function delbeta!(p::SparsePredChol{T}, r::Vector{T}, wt::Vector{T}) where T - Z = isempty(p.wts) ? X : Xw - #scr = mul!(p.scratch, Diagonal(wt), Z) - XtWX = p.Xt*Z +function delbeta!(p::SparsePredChol{T,M,C,<:UnitWeights}, r::Vector{T}, wt::Vector{T}) where {T,M,C} + scr = mul!(p.scratch, Diagonal(wt), p.X) + XtWX = p.Xt*scr c = p.chol = cholesky(Symmetric{eltype(XtWX),typeof(XtWX)}(XtWX, 'L')) - p.delbeta = c \ mul!(p.delbeta, adjoint(Z), r) + p.delbeta = c \ mul!(p.delbeta, adjoint(scr), r) end -function delbeta!(p::SparsePredChol{T}, r::Vector{T}) where T +function delbeta!(p::SparsePredChol{T,M,C,<:AbstractWeights}, r::Vector{T}, wt::Vector{T}) where {T,M,C} + scr = mul!(p.scratch, Diagonal(wt.*p.wts), p.X) + XtWX = p.Xt*scr + c = p.chol = cholesky(Symmetric{eltype(XtWX),typeof(XtWX)}(XtWX, 'L')) + p.delbeta = c \ mul!(p.delbeta, adjoint(scr), r) +end + +function delbeta!(p::SparsePredChol{T,M,C,<:UnitWeights}, r::Vector{T}) where {T,M,C} scr = p.scratch = p.X XtWX = p.Xt*scr c = p.chol = cholesky(Symmetric{eltype(XtWX),typeof(XtWX)}(XtWX, 'L')) p.delbeta = c \ mul!(p.delbeta, adjoint(scr), r) end +function delbeta!(p::SparsePredChol{T,M,C,<:AbstractWeights}, r::Vector{T}) where {T,M,C} + scr = p.scratch .= p.X.*p.wts + XtWX = p.Xt*scr + @show XtWX + c = p.chol = cholesky(Symmetric{eltype(XtWX),typeof(XtWX)}(XtWX, 'L')) + p.delbeta = c \ mul!(p.delbeta, adjoint(scr), r) +end + LinearAlgebra.cholesky(p::SparsePredChol{T}) where {T} = copy(p.chol) LinearAlgebra.cholesky!(p::SparsePredChol{T}) where {T} = p.chol @@ -242,6 +287,7 @@ function invchol(x::DensePredChol{T,<: CholeskyPivoted}) where T ipiv = invperm(ch.p) res[ipiv, ipiv] end + invchol(x::SparsePredChol) = cholesky!(x) \ Matrix{Float64}(I, size(x.X, 2), size(x.X, 2)) function vcov(x::LinPredModel) @@ -250,7 +296,7 @@ function vcov(x::LinPredModel) rmul!(B, dispersion(x, true)) end -_covm(pp::DensePredChol{T, W}) where {T,W} = invchol(pp) +_covm(pp::LinPred) = invchol(pp) function _covm(pp::DensePredChol{T, <:ProbabilityWeights, <:Cholesky}) where {T} wts = pp.wts diff --git a/src/lm.jl b/src/lm.jl index 32c6a6d7..9d742958 100644 --- a/src/lm.jl +++ b/src/lm.jl @@ -12,7 +12,7 @@ Encapsulates the response for a linear model Either or both `offset` and `wts` may be of length 0 """ -mutable struct LmResp{V<:FPVector, W<:Union{AbstractWeights{<:Real}, AbstractVector{<:Real}}} <: ModResp # response in a linear model +mutable struct LmResp{V<:FPVector, W<:AbstractWeights{<:Real}} <: ModResp # response in a linear model mu::V # mean response offset::V # offset added to linear predictor (may have length 0) wts::W # prior weights (may have length 0) @@ -28,9 +28,9 @@ mutable struct LmResp{V<:FPVector, W<:Union{AbstractWeights{<:Real}, AbstractVec end end -function LmResp(y::AbstractVector{<:Real}, wts::Union{Nothing,AbstractVector{<:Real}, AbstractWeights{<:Real}}=nothing) +function LmResp(y::AbstractVector{<:Real}, wts::AbstractWeights{<:Real}) # Instead of convert(Vector{Float64}, y) to be more ForwardDiff friendly - _y = convert(Vector{float(eltype(y))}, y) + _y = convert(Vector{float(eltype(y))}, y) return LmResp{typeof(_y), typeof(wts)}(zero(_y), zero(_y), wts, _y) end @@ -124,8 +124,8 @@ end LinearAlgebra.cholesky(x::LinearModel) = cholesky(x.pp) function StatsBase.fit!(obj::LinearModel) - delbeta!(obj.pp, obj.rr.y) - installbeta!(obj.pp) + delbeta!(obj.pp, obj.rr.y) + installbeta!(obj.pp) updateμ!(obj.rr, linpred(obj.pp, zero(eltype(obj.rr.y)))) return obj end @@ -166,19 +166,20 @@ $FIT_LM_DOC """ function fit(::Type{LinearModel}, X::AbstractMatrix{<:Real}, y::AbstractVector{<:Real}, allowrankdeficient_dep::Union{Bool,Nothing}=nothing; - wts::AbstractVector{<:Real}=similar(y, 0), + wts::AbstractVector{<:Real}=uweights(length(y)), dropcollinear::Bool=true) if allowrankdeficient_dep !== nothing @warn "Positional argument `allowrankdeficient` is deprecated, use keyword " * "argument `dropcollinear` instead. Proceeding with positional argument value: $allowrankdeficient_dep" dropcollinear = allowrankdeficient_dep end - _wts = if wts === nothing - uweights(0) - elseif isa(wts, AbstractWeights) + # For backward compatibility accept wts as AbstractArray and coerce them to FrequencyWeights + _wts = if isa(wts, AbstractWeights) wts elseif isa(wts, AbstractVector) - Base.depwarn("Passing weights as vector is deprecated in favor of explicitely using AnalyticalWeights, ProbabilityWeights, or FrequencyWeights", :fit) + Base.depwarn("Passing weights as vector is deprecated in favor of explicitely using " * + "AnalyticalWeights, ProbabilityWeights, or FrequencyWeights. Proceeding " * + "by coercing wts to `FrequencyWeights`", :fit) fweights(wts) else throw(ArgumentError("`wts` should be an AbstractVector coercible to AbstractWeights")) diff --git a/test/runtests.jl b/test/runtests.jl index 90bd33a0..45663c74 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -152,9 +152,8 @@ end @test all(isnan, hcat(coeftable(m2p).cols[2:end]...)[7,:]) m2p_dep_pos = fit(LinearModel, Xmissingcell, ymissingcell, true) - @test_logs (:warn, "Positional argument `allowrankdeficient` is deprecated, use keyword " * "argument `dropcollinear` instead. Proceeding with positional argument value: true") (:warn, "Passing weights as vector is deprecated in favor of explicitely using " * "AnalyticalWeights, ProbabilityWeights, or FrequencyWeights.") fit(LinearModel, Xmissingcell, ymissingcell, true) - # @test_logs (:warn, "Positional argument `allowrankdeficient` is deprecated, use keyword " * - # "argument `dropcollinear` instead. Proceeding with positional argument value: true") fit(LinearModel, Xmissingcell, ymissingcell, true) + @test_logs (:warn, "Positional argument `allowrankdeficient` is deprecated, use keyword " * + "argument `dropcollinear` instead. Proceeding with positional argument value: true") fit(LinearModel, Xmissingcell, ymissingcell, true) @test isa(m2p_dep_pos.pp.chol, CholeskyPivoted) @test rank(m2p_dep_pos.pp.chol) == rank(m2p.pp.chol) @test isapprox(deviance(m2p_dep_pos), deviance(m2p)) @@ -167,6 +166,16 @@ end @test isapprox(coef(m2p_dep_pos_kw), coef(m2p)) end +@testset "Passing wts (depwarn)" begin + df = DataFrame(x=["a", "b", "c"], y=[1, 2, 3], wts = [3,3,3]) + @test_logs (:warn, "Passing weights as vector is deprecated in favor of explicitely using " * + "AnalyticalWeights, ProbabilityWeights, or FrequencyWeights. Proceeding " * + "by coercing wts to `FrequencyWeights`") lm(@formula(y~x), df; wts=wts) + @test_logs (:warn, "Passing weights as vector is deprecated in favor of explicitely using " * + "AnalyticalWeights, ProbabilityWeights, or FrequencyWeights. Proceeding " * + "by coercing wts to `FrequencyWeights`") glm(@formula(y~x), Normal(), IdentityLink(), df; wts=wts) +end + @testset "saturated linear model" begin df = DataFrame(x=["a", "b", "c"], y=[1, 2, 3]) model = lm(@formula(y ~ x), df) @@ -1206,14 +1215,14 @@ end glm4 = glm(view(x, :, :), view(y, :), Binomial()) @test coef(glm1) == coef(glm2) == coef(glm3) == coef(glm4) - glm5 = glm(x, y, Binomial(), wts=w) - glm6 = glm(x, view(y, :), Binomial(), wts=w) - glm7 = glm(view(x, :, :), y, Binomial(), wts=w) - glm8 = glm(view(x, :, :), view(y, :), Binomial(), wts=w) - glm9 = glm(x, y, Binomial(), wts=view(w, :)) - glm10 = glm(x, view(y, :), Binomial(), wts=view(w, :)) - glm11 = glm(view(x, :, :), y, Binomial(), wts=view(w, :)) - glm12 = glm(view(x, :, :), view(y, :), Binomial(), wts=view(w, :)) + glm5 = glm(x, y, Binomial(), wts=fweights(w)) + glm6 = glm(x, view(y, :), Binomial(), wts=fweights(w)) + glm7 = glm(view(x, :, :), y, Binomial(), wts=fweights(w)) + glm8 = glm(view(x, :, :), view(y, :), Binomial(), wts=fweights(w)) + glm9 = glm(x, y, Binomial(), wts=fweights(view(w, :))) + glm10 = glm(x, view(y, :), Binomial(), wts=fweights(view(w, :))) + glm11 = glm(view(x, :, :), y, Binomial(), wts=fweights(view(w, :))) + glm12 = glm(view(x, :, :), view(y, :), Binomial(), wts=fweights(view(w, :))) @test coef(glm5) == coef(glm6) == coef(glm7) == coef(glm8) == coef(glm9) == coef(glm10) == coef(glm11) == coef(glm12) end From 84cd9901a6b71fce43446f504624c5727821226f Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Wed, 22 Jun 2022 12:09:12 +0200 Subject: [PATCH 007/106] Add depwarn for passing wts with Vector --- src/glmfit.jl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/glmfit.jl b/src/glmfit.jl index 277c82ad..909d3210 100644 --- a/src/glmfit.jl +++ b/src/glmfit.jl @@ -474,7 +474,6 @@ function fit(::Type{M}, wts::AbstractVector{<:Real} = uweights(length(y)), offset::AbstractVector{<:Real} = similar(y, 0), fitargs...) where {M<:AbstractGLM} - println("got you") # Check that X and y have the same number of observations if size(X, 1) != size(y, 1) throw(DimensionMismatch("number of rows in X and y must match")) @@ -483,7 +482,9 @@ function fit(::Type{M}, _wts = if isa(wts, AbstractWeights) wts elseif isa(wts, AbstractVector) - Base.depwarn("Passing weights as vector is deprecated in favor of explicitely using AnalyticalWeights, ProbabilityWeights, or FrequencyWeights. Proceeding by coercing wts to `FrequencyWeights`", :fit) + Base.depwarn("Passing weights as vector is deprecated in favor of explicitely using " * + "AnalyticalWeights, ProbabilityWeights, or FrequencyWeights. Proceeding " * + "by coercing wts to `FrequencyWeights`", :fit) fweights(wts) else throw(ArgumentError("`wts` should be an AbstractVector coercible to AbstractWeights")) From cbc329f9e0e865b18f3a0a35baad2df7aed2a48f Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Wed, 22 Jun 2022 13:00:39 +0200 Subject: [PATCH 008/106] Cosmettic changes --- src/glmfit.jl | 17 ++++++++--------- src/linpred.jl | 51 +++++++++++++++++++++++++------------------------- src/lm.jl | 21 ++++++++++----------- 3 files changed, 43 insertions(+), 46 deletions(-) diff --git a/src/glmfit.jl b/src/glmfit.jl index 909d3210..94058ef8 100644 --- a/src/glmfit.jl +++ b/src/glmfit.jl @@ -17,7 +17,7 @@ struct GlmResp{V<:FPVector, D<:UnivariateDistribution,L<:Link,W<:AbstractWeights mu::V "`offset:` offset added to `Xβ` to form `eta`. Can be of length 0" offset::V - "`wts:` prior case weights. Can be of length 0." + "`wts`: prior case weights. Can be of length 0." wts::W "`wrkwt`: working case weights for the Iteratively Reweighted Least Squares (IRLS) algorithm" wrkwt::V @@ -372,7 +372,7 @@ function StatsBase.fit!(m::AbstractGLM; if haskey(kwargs, :tol) Base.depwarn("`tol` argument is deprecated, use `atol` and `rtol` instead", :fit!) rtol = kwargs[:tol] - end + end _fit!(m, verbose, maxiter, minstepfac, atol, rtol, start) end @@ -438,9 +438,9 @@ const FIT_GLM_DOC = """ # Keyword Arguments - `dofit::Bool=true`: Determines whether model will be fit - - `wts::AbstractWeights=aweights(similar(y,0))`: Weights of observations. - Allowed weights are `AnalyticalWeights`, `FrequencyWeights`, or `ProbabilityWeights`. - If a vector is passed (deprecated) it is coerced to FrequencyWeights. + - `wts::AbstractWeights=aweights(similar(y,0))`: Weights of observations. + Allowed weights are `AnalyticalWeights`, `FrequencyWeights`, or `ProbabilityWeights`. + If a vector is passed (deprecated) it is coerced to FrequencyWeights. Can be length 0 to indicate no weighting (default). - `offset::Vector=similar(y,0)`: offset added to `Xβ` to form `eta`. Can be of length 0 @@ -482,8 +482,8 @@ function fit(::Type{M}, _wts = if isa(wts, AbstractWeights) wts elseif isa(wts, AbstractVector) - Base.depwarn("Passing weights as vector is deprecated in favor of explicitely using " * - "AnalyticalWeights, ProbabilityWeights, or FrequencyWeights. Proceeding " * + Base.depwarn("Passing weights as vector is deprecated in favor of explicitely using " * + "AnalyticalWeights, ProbabilityWeights, or FrequencyWeights. Proceeding " * "by coercing wts to `FrequencyWeights`", :fit) fweights(wts) else @@ -499,7 +499,7 @@ fit(::Type{M}, y::AbstractVector, d::UnivariateDistribution, l::Link=canonicallink(d); kwargs...) where {M<:AbstractGLM} = - fit(M, float(X), float(y), d, l; kwargs...) + fit(M, float(X), float(y), d, l; kwargs...) """ glm(formula, data, @@ -669,4 +669,3 @@ nobs(r::LmResp{V,W}) where {V,W<:FrequencyWeights} = isempty(r.wts) ? oftype(sum nobs(r::GlmResp{V,D,L,W}) where {V,D,L,W<:FrequencyWeights} = isempty(r.wts) ? oftype(sum(one(eltype(r.wts))), length(r.y)) : r.wts.sum nobs(r::GlmResp{V,D,L,W}) where {V,D,L,W} = oftype(sum(one(eltype(r.wts))), length(r.y)) - diff --git a/src/linpred.jl b/src/linpred.jl index 3cd46761..441f1211 100644 --- a/src/linpred.jl +++ b/src/linpred.jl @@ -1,5 +1,5 @@ """ -linpred!(out, p::LinPred, f::Real=1.0) + linpred!(out, p::LinPred, f::Real=1.0) Overwrite `out` with the linear predictor from `p` with factor `f` @@ -11,14 +11,14 @@ function linpred!(out, p::LinPred, f::Real=1.) end """ -linpred(p::LinPred, f::Real=1.0) + linpred(p::LinPred, f::Real=1.0) Return the linear predictor `p.X * (p.beta0 .+ f * p.delbeta)` """ linpred(p::LinPred, f::Real=1.) = linpred!(Vector{eltype(p.X)}(undef, size(p.X, 1)), p, f) """ -installbeta!(p::LinPred, f::Real=1.0) + installbeta!(p::LinPred, f::Real=1.0) Install `pbeta0 .+= f * p.delbeta` and zero out `p.delbeta`. Return the updated `p.beta0`. """ @@ -33,7 +33,7 @@ function installbeta!(p::LinPred, f::Real=1.) end """ -DensePredQR + DensePredQR A `LinPred` type with a dense, unpivoted QR decomposition of `X` @@ -122,16 +122,15 @@ function DensePredChol(X::AbstractMatrix, pivot::Bool, wts::AbstractWeights{<:Re T = eltype(F) F = pivot ? pivoted_cholesky!(F, tol = -one(T), check = false) : cholesky!(F) DensePredChol(Matrix{T}(X), - Matrix{T}(Xw), - zeros(T, size(X, 2)), - zeros(T, size(X, 2)), - zeros(T, size(X, 2)), - F, - wts, - similar(X, T), - similar(cholfactors(F)), - similar(X, T, (size(X,1),)) - ) + Matrix{T}(Xw), + zeros(T, size(X, 2)), + zeros(T, size(X, 2)), + zeros(T, size(X, 2)), + F, + wts, + similar(X, T), + similar(cholfactors(F)), + similar(X, T, (size(X,1),))) end cholpred(X::AbstractMatrix, pivot::Bool, wts::AbstractWeights) = DensePredChol(X, pivot, wts) @@ -227,14 +226,14 @@ function SparsePredChol(X::SparseMatrixCSC{T}, wts::AbstractVector) where T sqrtwts = sqrt.(wts) Xw = isempty(wts) ? SparseMatrixCSC(I, 0, 0) : sqrtwts.*X return SparsePredChol{eltype(X),typeof(X),typeof(chol), typeof(wts)}(X, - Xw, - X', - zeros(T, size(X, 2)), - zeros(T, size(X, 2)), - zeros(T, size(X, 2)), - chol, - similar(X), - wts) + Xw, + X', + zeros(T, size(X, 2)), + zeros(T, size(X, 2)), + zeros(T, size(X, 2)), + chol, + similar(X), + wts) end cholpred(X::SparseMatrixCSC, pivot::Bool=false) = SparsePredChol(X, uweights(size(X,1))) @@ -290,7 +289,7 @@ end invchol(x::SparsePredChol) = cholesky!(x) \ Matrix{Float64}(I, size(x.X, 2), size(x.X, 2)) -function vcov(x::LinPredModel) +function vcov(x::LinPredModel) d = dispersion(x, true) B = _covm(x.pp) rmul!(B, dispersion(x, true)) @@ -298,7 +297,7 @@ end _covm(pp::LinPred) = invchol(pp) -function _covm(pp::DensePredChol{T, <:ProbabilityWeights, <:Cholesky}) where {T} +function _covm(pp::DensePredChol{T, <:ProbabilityWeights, <:Cholesky}) where {T} wts = pp.wts Z = pp.scratchm1 .= pp.X.*wts XtW2X = Z'Z @@ -306,7 +305,7 @@ function _covm(pp::DensePredChol{T, <:ProbabilityWeights, <:Cholesky}) where {T} invXtWX*XtW2X*invXtWX end -function _covm(pp::DensePredChol{T, <:ProbabilityWeights, <:CholeskyPivoted}) where {T} +function _covm(pp::DensePredChol{T, <:ProbabilityWeights, <:CholeskyPivoted}) where {T} wts = pp.wts Z = pp.scratchm1 .= pp.X.*wts rnk = rank(pp.chol) @@ -334,7 +333,7 @@ end modelframe(obj::LinPredModel) = obj.fr -function modelmatrix(obj::LinPredModel; weighted=false) +function modelmatrix(obj::LinPredModel; weighted=false) if !weighted obj.pp.X elseif !isempty(weights(obj)) diff --git a/src/lm.jl b/src/lm.jl index b49d6d8d..42a419c5 100644 --- a/src/lm.jl +++ b/src/lm.jl @@ -67,14 +67,14 @@ end function nullloglikelihood(r::LmResp) n = isempty(r.wts) ? length(r.y) : sum(r.wts) - -n/2 * (log(2π * nulldeviance(r)/n) + 1) + -n/2 * (log(2π * nulldeviance(r)/n) + 1) end -function residuals(r::LmResp; weighted=false) +function residuals(r::LmResp; weighted=false) wts = weights(r) res = r.y - r.mu - if !weighted - res + if !weighted + res elseif !isempty(wts) sqrt.(wts).*res else @@ -118,14 +118,14 @@ const FIT_LM_DOC = """ values of the dependent variable. The keyword argument `wts` can be an `AbstractWeights` specifying frequency weights for observations. - Weights allowed are: + Weights allowed are: - `AnalyticaWeights`: describe a non-random relative importance (usually between 0 and 1) for each observation. - `FrequencyWeights`: describe the number of times (or frequency) each observation was observed. - `ProbabilityWeights`: represent the inverse of the sampling probability for each observation, providing a correction mechanism for under- or over-sampling certain population groups - These weights gives equal point estimates but different standard errors. - If a vector is passed (deprecated), it is coerced to `FrequencyWeights`. + These weights gives equal point estimates but different standard errors. + If a vector is passed (deprecated), it is coerced to `FrequencyWeights`. `dropcollinear` controls whether or not `lm` accepts a model matrix which is less-than-full rank. If `true` (the default), only the first of each set of @@ -151,19 +151,18 @@ function fit(::Type{LinearModel}, X::AbstractMatrix{<:Real}, y::AbstractVector{< @warn "Positional argument `allowrankdeficient` is deprecated, use keyword " * "argument `dropcollinear` instead. Proceeding with positional argument value: $allowrankdeficient_dep" dropcollinear = allowrankdeficient_dep - end + end # For backward compatibility accept wts as AbstractArray and coerce them to FrequencyWeights _wts = if isa(wts, AbstractWeights) wts elseif isa(wts, AbstractVector) - Base.depwarn("Passing weights as vector is deprecated in favor of explicitely using " * - "AnalyticalWeights, ProbabilityWeights, or FrequencyWeights. Proceeding " * + Base.depwarn("Passing weights as vector is deprecated in favor of explicitely using " * + "AnalyticalWeights, ProbabilityWeights, or FrequencyWeights. Proceeding " * "by coercing wts to `FrequencyWeights`", :fit) fweights(wts) else throw(ArgumentError("`wts` should be an AbstractVector coercible to AbstractWeights")) end - fit!(LinearModel(LmResp(y, _wts), cholpred(X, dropcollinear, _wts))) end From 23d67f5d84f9110ab03b9a64de144386c717ddb1 Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Thu, 23 Jun 2022 15:31:05 +0200 Subject: [PATCH 009/106] WIP --- .vscode/settings.json | 3 ++ src/glmfit.jl | 103 ++++++++++++++++++++++++++---------------- src/linpred.jl | 87 ++++++++++++++++++++--------------- src/lm.jl | 70 ++++++++++++++++------------ test/runtests.jl | 12 +++-- 5 files changed, 166 insertions(+), 109 deletions(-) create mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 00000000..9419f557 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "julia.persistentSession.alwaysCopy": true +} \ No newline at end of file diff --git a/src/glmfit.jl b/src/glmfit.jl index 94058ef8..59623183 100644 --- a/src/glmfit.jl +++ b/src/glmfit.jl @@ -92,16 +92,21 @@ the linear predictor, `linPr`. """ function updateμ! end -function updateμ!(r::GlmResp{T}, linPr::T) where T<:FPVector +function updateμ!(r::GlmResp{T,D,L,<:AbstractWeights}, linPr::T) where {T<:FPVector,D,L} isempty(r.offset) ? copyto!(r.eta, linPr) : broadcast!(+, r.eta, linPr, r.offset) updateμ!(r) - if !isempty(r.wts) - map!(*, r.devresid, r.devresid, r.wts) - map!(*, r.wrkwt, r.wrkwt, r.wts) - end + map!(*, r.devresid, r.devresid, r.wts) + map!(*, r.wrkwt, r.wrkwt, r.wts) r end +function updateμ!(r::GlmResp{T,D,L,<:UnitWeights}, linPr::T) where {T<:FPVector,D,L} + isempty(r.offset) ? copyto!(r.eta, linPr) : broadcast!(+, r.eta, linPr, r.offset) + updateμ!(r) + r +end + + function updateμ!(r::GlmResp{V,D,L}) where {V<:FPVector,D,L} y, η, μ, wrkres, wrkwt, dres = r.y, r.eta, r.mu, r.wrkresid, r.wrkwt, r.devresid @@ -248,25 +253,30 @@ end deviance(m::AbstractGLM) = deviance(m.rr) -function loglikelihood(m::AbstractGLM) - r = m.rr - wts = r.wts +loglikelihood(m::AbstractGLM) = loglikelihood(m.rr) + +function loglikelihood(r::GlmResp{T,D,L,<:UnitWeights}) where {T,D,L} y = r.y mu = r.mu d = r.d ll = zero(eltype(mu)) - if length(wts) == length(y) - ϕ = deviance(m)/sum(wts) - @inbounds for i in eachindex(y, mu, wts) - ll += loglik_obs(d, y[i], mu[i], wts[i], ϕ) - end - else - ϕ = deviance(m)/length(y) - @inbounds for i in eachindex(y, mu) - ll += loglik_obs(d, y[i], mu[i], 1, ϕ) - end + ϕ = deviance(r)/nobs(r) + @inbounds for i in eachindex(y, mu) + ll += loglik_obs(d, y[i], mu[i], 1, ϕ) + end +end + +function loglikelihood(r::GlmResp{T,D,L,<:AbstractWeights}) where {T,D,L} + whf = sqrt.(r.wts) + y = r.y + mu = r.mu + d = r.d + ll = zero(eltype(mu)) + ϕ = deviance(r)/nobs(r) + @inbounds for i in eachindex(y, mu, whf) + ll += loglik_obs(d, whf[i]*y[i], whf[i]*mu[i], 1, ϕ) end - ll + ll + sum(log.(weights(r)))/2 end dof(x::GeneralizedLinearModel) = dispersion_parameter(x.rr.d) ? length(coef(x)) + 1 : length(coef(x)) @@ -389,6 +399,7 @@ function StatsBase.fit!(m::AbstractGLM, rtol::Real=1e-6, start=nothing, kwargs...) + if haskey(kwargs, :maxIter) Base.depwarn("'maxIter' argument is deprecated, use 'maxiter' instead", :fit!) maxiter = kwargs[:maxIter] @@ -409,10 +420,20 @@ function StatsBase.fit!(m::AbstractGLM, rtol = kwargs[:tol] end - r = m.rr - V = typeof(r.y) - r.y = copy!(r.y, y) - isa(wts, Nothing) || copy!(r.wts, wts) + # r = m.rr + # V = typeof(r.y) + # r.y = copy!(r.y, y) + # if !isa(wts, Nothing) + # if wts isa typeof(r.wts) + # copy!(r.wts, wts) + # else + + # end + # else + # if typeof(r.wts) === UnitWeights + + + isa(offset, Nothing) || copy!(r.offset, offset) initialeta!(r.eta, r.d, r.l, r.y, r.wts, r.offset) updateμ!(r, r.eta) @@ -608,27 +629,14 @@ function initialeta!(eta::AbstractVector, dist::UnivariateDistribution, link::Link, y::AbstractVector, - wts::AbstractVector, + wts::AbstractWeights, off::AbstractVector) n = length(y) - lw = length(wts) lo = length(off) - if lw == n - @inbounds @simd for i = eachindex(y, eta, wts) - μ = mustart(dist, y[i], wts[i]) - eta[i] = linkfun(link, μ) - end - elseif lw == 0 - @inbounds @simd for i = eachindex(y, eta) - μ = mustart(dist, y[i], 1) - eta[i] = linkfun(link, μ) - end - else - throw(ArgumentError("length of wts must be either $n or 0 but was $lw")) - end + _initialeta!(eta, dist, link, y, wts) if lo == n @inbounds @simd for i = eachindex(eta, off) @@ -641,6 +649,21 @@ function initialeta!(eta::AbstractVector, return eta end +function _initialeta!(eta, dist, link, y, wts::UnitWeights) + @inbounds @simd for i in eachindex(y, eta) + μ = mustart(dist, y[i], 1) + eta[i] = linkfun(link, μ) + end +end + +function _initialeta!(eta, dist, link, y, wts::AbstractWeights) + @inbounds @simd for i in eachindex(y, eta) + μ = mustart(dist, y[i], wts[i]) + eta[i] = linkfun(link, μ) + end +end + + # Helper function to check that the values of y are in the allowed domain function checky(y, d::Distribution) if any(x -> !insupport(d, x), y) @@ -665,7 +688,7 @@ when prior weights of type FrequencyWeights are specified, the sum of weights. nobs(obj::LinPredModel) = nobs(obj.rr) nobs(r::LmResp{V,W}) where {V,W} = oftype(sum(one(eltype(r.wts))), length(r.y)) -nobs(r::LmResp{V,W}) where {V,W<:FrequencyWeights} = isempty(r.wts) ? oftype(sum(one(eltype(r.wts))), length(r.y)) : r.wts.sum +nobs(r::LmResp{V,W}) where {V,W<:FrequencyWeights} = r.wts.sum -nobs(r::GlmResp{V,D,L,W}) where {V,D,L,W<:FrequencyWeights} = isempty(r.wts) ? oftype(sum(one(eltype(r.wts))), length(r.y)) : r.wts.sum +nobs(r::GlmResp{V,D,L,W}) where {V,D,L,W<:FrequencyWeights} = r.wts.sum nobs(r::GlmResp{V,D,L,W}) where {V,D,L,W} = oftype(sum(one(eltype(r.wts))), length(r.y)) diff --git a/src/linpred.jl b/src/linpred.jl index 441f1211..2f9977f1 100644 --- a/src/linpred.jl +++ b/src/linpred.jl @@ -47,28 +47,35 @@ A `LinPred` type with a dense, unpivoted QR decomposition of `X` """ mutable struct DensePredQR{T<:BlasReal, W<:AbstractWeights{<:Real}} <: DensePred X::Matrix{T} # model matrix - Xw::Matrix{T} # weighted model matrix beta0::Vector{T} # base coefficient vector delbeta::Vector{T} # coefficient increment scratchbeta::Vector{T} qr::QRCompactWY{T} wts::W wresponse::Vector{T} + function DensePredQR{T}(X::Matrix{T}, beta0::Vector{T}, wts::W) where {T,W<:UnitWeights} + n, p = size(X) + length(beta0) == p || throw(DimensionMismatch("length(β0) ≠ size(X,2)")) + length(wts) == n || throw(DimensionMismatch("Lenght of weights does not match the dimension of X")) + qrX = qr(X) + new{T,W}(X, beta0, zeros(T,p), zeros(T,p), qrX, wts, similar(X, T, (size(X,1),) )) + end function DensePredQR{T}(X::Matrix{T}, beta0::Vector{T}, wts::W) where {T,W<:AbstractWeights{<:Real}} n, p = size(X) length(beta0) == p || throw(DimensionMismatch("length(β0) ≠ size(X,2)")) - (length(wts) == n || isempty(wts)) || throw(DimensionMismatch("Lenght of weights does not match the dimension of X")) - Xw = wts isa UnitWeights ? Matrix{T}(undef, 0, 0) : sqrt.(wts).*X - qrX = wts isa UnitWeights ? qr(X) : qr(Xw) - new{T,W}(X, Xw, beta0, zeros(T,p), zeros(T,p), qrX, wts, similar(X, T, (size(X,1),) )) + length(wts) == n || throw(DimensionMismatch("Lenght of weights does not match the dimension of X")) + + qrX = qr(Diagonal(sqrt.(wts))*X) + new{T,W}(X, beta0, zeros(T,p), zeros(T,p), qrX, wts, similar(X, T, (size(X,1),) )) end + function DensePredQR{T}(X::Matrix{T}, wts::W) where {T,W} n, p = size(X) DensePredQR(X, zeros(T, p), wts) end function DensePredQR(X::Matrix{T}) where T n, p = size(X) - DensePredQR{T}(X, zeros(T, p), uweights(0)) + DensePredQR{T}(X, zeros(T, p), uweights(size(X,1))) end end DensePredQR{T}(X::Matrix) where T = DensePredQR{eltype(X)}(X, zeros(T, size(X, 2)), uweights(size(X,1))) @@ -102,11 +109,9 @@ A `LinPred` type with a dense Cholesky factorization of `X'X` - `chol`: a `Cholesky` object created from `X'X`, possibly using row weights. - `scratchm1`: scratch Matrix{T} of the same size as `X` - `scratchm2`: scratch Matrix{T} of the same size as `X'X` -- `scratchv1`: scratch Vector{T} of the same size of `y` """ mutable struct DensePredChol{T<:BlasReal,C,W<:AbstractVector{<:Real}} <: DensePred X::Matrix{T} # model matrix - Xw::Matrix{T} # weighted model matrix beta0::Vector{T} # base vector for coefficients delbeta::Vector{T} # coefficient increment scratchbeta::Vector{T} @@ -114,23 +119,36 @@ mutable struct DensePredChol{T<:BlasReal,C,W<:AbstractVector{<:Real}} <: DensePr wts::W scratchm1::Matrix{T} scratchm2::Matrix{T} - scratchv1::Vector{T} end -function DensePredChol(X::AbstractMatrix, pivot::Bool, wts::AbstractWeights{<:Real}) - Xw = isempty(wts) ? Matrix{eltype(X)}(undef, 0, 0) : sqrt.(wts).*X - F = isempty(wts) ? Hermitian(float(X'X)) : Hermitian(float(Xw'Xw)) + +function DensePredChol(X::AbstractMatrix, pivot::Bool, wts::AbstractWeights) + scr = similar(X) + mul!(scr, Diagonal(wts), X) + F = Hermitian(float(scr'X)) + T = eltype(F) + F = pivot ? pivoted_cholesky!(F, tol = -one(T), check = false) : cholesky!(F) + DensePredChol(Matrix{T}(X), + zeros(T, size(X, 2)), + zeros(T, size(X, 2)), + zeros(T, size(X, 2)), + F, + wts, + scr, + similar(cholfactors(F))) +end + +function DensePredChol(X::AbstractMatrix, pivot::Bool, wts::UnitWeights) + F = Hermitian(float(X'X)) T = eltype(F) F = pivot ? pivoted_cholesky!(F, tol = -one(T), check = false) : cholesky!(F) DensePredChol(Matrix{T}(X), - Matrix{T}(Xw), zeros(T, size(X, 2)), zeros(T, size(X, 2)), zeros(T, size(X, 2)), F, wts, similar(X, T), - similar(cholfactors(F)), - similar(X, T, (size(X,1),))) + similar(cholfactors(F))) end cholpred(X::AbstractMatrix, pivot::Bool, wts::AbstractWeights) = DensePredChol(X, pivot, wts) @@ -151,8 +169,8 @@ function delbeta!(p::DensePredChol{T,<:Cholesky, <:UnitWeights}, r::Vector{T}) w end function delbeta!(p::DensePredChol{T,<:Cholesky, <:AbstractWeights}, r::Vector{T}) where T<:BlasReal - p.scratchv1 .= r.*sqrt(p.wts) - ldiv!(p.chol, mul!(p.delbeta, transpose(p.Xw), p.scratchv1)) + X = mul!(p.scratchm1, Diagonal(p.wts), p.X) + ldiv!(p.chol, mul!(p.delbeta, transpose(X),r)) end function delbeta!(p::DensePredChol{T,<:CholeskyPivoted,<:UnitWeights}, r::Vector{T}) where T<:BlasReal @@ -174,9 +192,8 @@ end function delbeta!(p::DensePredChol{T,<:CholeskyPivoted,<:AbstractWeights}, r::Vector{T}) where T<:BlasReal ch = p.chol - Z = p.Xw - p.scratchv1 .= r.*sqrt.(p.wts) - delbeta = mul!(p.delbeta, adjoint(p.Xw), p.scratchv1) + X = mul!(p.scratchm1, Diagonal(p.wts), p.X) + delbeta = mul!(p.delbeta, adjoint(X), r) rnk = rank(ch) if rnk == length(delbeta) ldiv!(ch, delbeta) @@ -192,9 +209,9 @@ function delbeta!(p::DensePredChol{T,<:CholeskyPivoted,<:AbstractWeights}, r::Ve end function delbeta!(p::DensePredChol{T,<:Cholesky,<:AbstractWeights}, r::Vector{T}, wt::Vector{T}) where T<:BlasReal - p.scratchm1 .= wt.*p.X - cholesky!(Hermitian(mul!(cholfactors(p.chol), transpose(p.scratchm1), p.X), :U)) - mul!(p.delbeta, transpose(p.scratchm1), r) + scr = mul!(p.scratchm1, Diagonal(wt), p.X) + cholesky!(Hermitian(mul!(cholfactors(p.chol), transpose(scr), p.X), :U)) + mul!(p.delbeta, transpose(scr), r) ldiv!(p.chol, p.delbeta) p end @@ -202,8 +219,7 @@ end function delbeta!(p::DensePredChol{T,<:CholeskyPivoted,<:AbstractWeights}, r::Vector{T}, wt::Vector{T}) where T<:BlasReal cf = cholfactors(p.chol) piv = p.chol.p - p.scratchm1 .= wt.*p.X - cf .= mul!(p.scratchm2, adjoint(p.scratchm1), p.X)[piv, piv] + cf .= mul!(p.scratchm2, adjoint(LinearAlgebra.mul!(p.scratchm1, Diagonal(wt), p.X)), p.X)[piv, piv] cholesky!(Hermitian(cf, Symbol(p.chol.uplo))) ldiv!(p.chol, mul!(p.delbeta, transpose(p.scratchm1), r)) p @@ -211,7 +227,6 @@ end mutable struct SparsePredChol{T,M<:SparseMatrixCSC,C,W<:AbstractWeights{<:Real}} <: GLM.LinPred X::M # model matrix - Xw::M # weighted model matrix Xt::M # X' beta0::Vector{T} # base vector for coefficients delbeta::Vector{T} # coefficient increment @@ -223,10 +238,7 @@ end function SparsePredChol(X::SparseMatrixCSC{T}, wts::AbstractVector) where T chol = cholesky(sparse(I, size(X, 2), size(X,2))) - sqrtwts = sqrt.(wts) - Xw = isempty(wts) ? SparseMatrixCSC(I, 0, 0) : sqrtwts.*X return SparsePredChol{eltype(X),typeof(X),typeof(chol), typeof(wts)}(X, - Xw, X', zeros(T, size(X, 2)), zeros(T, size(X, 2)), @@ -263,7 +275,6 @@ end function delbeta!(p::SparsePredChol{T,M,C,<:AbstractWeights}, r::Vector{T}) where {T,M,C} scr = p.scratch .= p.X.*p.wts XtWX = p.Xt*scr - @show XtWX c = p.chol = cholesky(Symmetric{eltype(XtWX),typeof(XtWX)}(XtWX, 'L')) p.delbeta = c \ mul!(p.delbeta, adjoint(scr), r) end @@ -292,14 +303,14 @@ invchol(x::SparsePredChol) = cholesky!(x) \ Matrix{Float64}(I, size(x.X, 2), siz function vcov(x::LinPredModel) d = dispersion(x, true) B = _covm(x.pp) - rmul!(B, dispersion(x, true)) + rmul!(B, d) end -_covm(pp::LinPred) = invchol(pp) +@inline _covm(pp::LinPred) = invchol(pp) function _covm(pp::DensePredChol{T, <:ProbabilityWeights, <:Cholesky}) where {T} wts = pp.wts - Z = pp.scratchm1 .= pp.X.*wts + Z = mul!(pp.scratchm1, Diagonal(wts), pp.X) XtW2X = Z'Z invXtWX = invchol(pp) invXtWX*XtW2X*invXtWX @@ -307,7 +318,7 @@ end function _covm(pp::DensePredChol{T, <:ProbabilityWeights, <:CholeskyPivoted}) where {T} wts = pp.wts - Z = pp.scratchm1 .= pp.X.*wts + mul!(pp.scratchm1, Diagonal(wts), pp.X) rnk = rank(pp.chol) p = length(pp.delbeta) if rnk == p @@ -336,8 +347,8 @@ modelframe(obj::LinPredModel) = obj.fr function modelmatrix(obj::LinPredModel; weighted=false) if !weighted obj.pp.X - elseif !isempty(weights(obj)) - obj.pp.Xw + elseif isweighted(obj) + mul!(obj.pp.scratchm1, Diagonal(sqrt.(obj.pp.wts)), obj.pp.X) else throw(ArgumentError("`weighted=true` allowed only for weighted models.")) end @@ -349,8 +360,12 @@ fitted(m::LinPredModel) = m.rr.mu predict(mm::LinPredModel) = fitted(mm) StatsModels.formula(obj::LinPredModel) = modelframe(obj).formula residuals(obj::LinPredModel; kwarg...) = residuals(obj.rr; kwarg...) + +weights(obj::RegressionModel) = weights(obj.model) weights(obj::LinPredModel) = weights(obj.rr) +isweighted(obj::RegressionModel) = isweighted(obj.model) +isweighted(obj::LinPredModel) = weights(obj) isa Union{FrequencyWeights, AnalyticWeights, ProbabilityWeights} coef(x::LinPred) = x.beta0 coef(obj::LinPredModel) = coef(obj.pp) diff --git a/src/lm.jl b/src/lm.jl index 42a419c5..5404274d 100644 --- a/src/lm.jl +++ b/src/lm.jl @@ -23,7 +23,7 @@ mutable struct LmResp{V<:FPVector, W<:AbstractWeights{<:Real}} <: ModResp # res ll = length(off) ll == 0 || ll == n || error("length of offset is $ll, must be $n or 0") ll = length(wts) - ll == 0 || ll == n || error("length of wts is $ll, must be $n or 0") + ll == n || error("length of wts is $ll, must be $n") new{V,W}(mu, off, wts, y) end end @@ -43,48 +43,63 @@ end updateμ!(r::LmResp{V, W}, linPr) where {V<:FPVector, W} = updateμ!(r, convert(V, vec(linPr))) -function deviance(r::LmResp) +function deviance(r::LmResp{T,<:UnitWeights}) where T + y = r.y + mu = r.mu + v = zero(eltype(y)) + zero(eltype(y)) + @inbounds @simd for i in eachindex(y,mu) + v += abs2(y[i] - mu[i]) + end + return v +end + +function deviance(r::LmResp{T,<:AbstractWeights}) where T y = r.y mu = r.mu wts = r.wts v = zero(eltype(y)) + zero(eltype(y)) * zero(eltype(wts)) - if isempty(wts) - @inbounds @simd for i = eachindex(y,mu) - v += abs2(y[i] - mu[i]) - end - else - @inbounds @simd for i = eachindex(y,mu,wts) - v += abs2(y[i] - mu[i])*wts[i] - end + @inbounds @simd for i in eachindex(y,mu,wts) + v += abs2(y[i] - mu[i])*wts[i] end - v + return v end -function loglikelihood(r::LmResp) - n = isempty(r.wts) ? length(r.y) : sum(r.wts) +weights(r::LmResp) = r.wts + +function loglikelihood(r::LmResp{T,<:Union{UnitWeights, FrequencyWeights}}) where T + n = nobs(r) -n/2 * (log(2π * deviance(r)/n) + 1) end -function nullloglikelihood(r::LmResp) - n = isempty(r.wts) ? length(r.y) : sum(r.wts) +function loglikelihood(r::LmResp{T,<:AbstractWeights}) where T + N = nobs(r) + n = sum(log.(weights(r))) + 0.5*(n - N * (log(2π * deviance(r)/N) + 1)) +end + +function nullloglikelihood(r::LmResp{T,<:Union{UnitWeights, FrequencyWeights}}) where T + n = nobs(r) -n/2 * (log(2π * nulldeviance(r)/n) + 1) end +function nullloglikelihood(r::LmResp{T,<:AbstractWeights}) where T + N = nobs(r) + n = sum(log.(weights(r))) + 0.5*(n - N * (log(2π * nulldeviance(r)/N) + 1)) +end + function residuals(r::LmResp; weighted=false) wts = weights(r) res = r.y - r.mu if !weighted res - elseif !isempty(wts) + elseif isweighted(r) sqrt.(wts).*res else throw(ArgumentError("`weighted=true` allowed only for weighted models.")) end end -weights(r::LmResp) = r.wts - - """ LinearModel @@ -198,13 +213,12 @@ For linear models, the deviance of the null model is equal to the total sum of s """ function nulldeviance(obj::LinearModel) y = obj.rr.y - wts = obj.rr.wts - + wts = weights(obj) if hasintercept(obj) - if isempty(wts) + if !isweighted(obj) m = mean(y) else - m = mean(y, weights(wts)) + m = mean(y, wts) end else @warn("Starting from GLM.jl 1.8, null model is defined as having no predictor at all " * @@ -213,7 +227,7 @@ function nulldeviance(obj::LinearModel) end v = zero(eltype(y))*zero(eltype(wts)) - if isempty(wts) + if !isweighted(obj) @inbounds @simd for yi in y v += abs2(yi - m) end @@ -229,7 +243,7 @@ loglikelihood(obj::LinearModel) = loglikelihood(obj.rr) function nullloglikelihood(obj::LinearModel) r = obj.rr - n = isempty(r.wts) ? length(r.y) : sum(r.wts) + n = nobs(r) -n/2 * (log(2π * nulldeviance(obj)/n) + 1) end @@ -323,12 +337,12 @@ Currently only implemented for linear models without weights. """ function StatsBase.cooksdistance(obj::LinearModel) wts = weights(obj) - u = residuals(obj; weighted=!isempty(wts)) + u = residuals(obj; weighted=isweighted(obj)) mse = GLM.dispersion(obj,true) k = dof(obj)-1 d_res = dof_residual(obj) - X = modelmatrix(obj; weighted=!isempty(wts)) - XtX = crossmodelmatrix(obj; weighted=!isempty(wts)) + X = modelmatrix(obj; weighted=isweighted(obj)) + XtX = crossmodelmatrix(obj; weighted=isweighted(obj)) k == size(X,2) || throw(ArgumentError("Models with collinear terms are not currently supported.")) hii = diag(X * inv(XtX) * X') D = @. u^2 * (hii / (1 - hii)^2) / (k*mse) diff --git a/test/runtests.jl b/test/runtests.jl index b85ee5c8..c7e5cf43 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -121,9 +121,11 @@ end @test isapprox(vcov(lm_model), [265.59401084217296 -0.20434035947652907; -0.20434035947652907 0.00020126512195323495]) @test isapprox(first(predict(lm_model)), 357.57694841780994) - @test isapprox(loglikelihood(lm_model), -4353.946729075838) - @test isapprox(loglikelihood(glm_model), -4353.946729075838) - @test isapprox(nullloglikelihood(lm_model), -4984.892139711452) + @test isapprox(loglikelihood(lm_model), -1467.8964643217373) + @test isapprox(loglikelihood(glm_model), -1467.8964643217373) + @test isapprox(nullloglikelihood(lm_model), -1790.7176571556527) + #@test isapprox(nullloglikelihood(glm_model), -1790.7176571556527) + @test isapprox(nullloglikelihood(lm_model), -1790.7176571556527) @test isapprox(mean(residuals(lm_model)), -5.412966629787718) end @@ -170,10 +172,10 @@ end df = DataFrame(x=["a", "b", "c"], y=[1, 2, 3], wts = [3,3,3]) @test_logs (:warn, "Passing weights as vector is deprecated in favor of explicitely using " * "AnalyticalWeights, ProbabilityWeights, or FrequencyWeights. Proceeding " * - "by coercing wts to `FrequencyWeights`") lm(@formula(y~x), df; wts=wts) + "by coercing wts to `FrequencyWeights`") lm(@formula(y~x), df; wts=df.wts) @test_logs (:warn, "Passing weights as vector is deprecated in favor of explicitely using " * "AnalyticalWeights, ProbabilityWeights, or FrequencyWeights. Proceeding " * - "by coercing wts to `FrequencyWeights`") glm(@formula(y~x), Normal(), IdentityLink(), df; wts=wts) + "by coercing wts to `FrequencyWeights`") glm(@formula(y~x), Normal(), IdentityLink(), df; wts=df.wts) end @testset "saturated linear model" begin From f4d90a9d2966d5e09cef3713638e2e4bd1e24c6d Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Mon, 4 Jul 2022 19:58:33 +0200 Subject: [PATCH 010/106] Fix loglik for weighted models --- src/glmfit.jl | 24 ++++++++++++++++++++---- src/glmtools.jl | 14 ++++++++++++++ 2 files changed, 34 insertions(+), 4 deletions(-) diff --git a/src/glmfit.jl b/src/glmfit.jl index 59623183..04235c7c 100644 --- a/src/glmfit.jl +++ b/src/glmfit.jl @@ -266,19 +266,35 @@ function loglikelihood(r::GlmResp{T,D,L,<:UnitWeights}) where {T,D,L} end end -function loglikelihood(r::GlmResp{T,D,L,<:AbstractWeights}) where {T,D,L} - whf = sqrt.(r.wts) +function loglikelihood(r::GlmResp{T,D,L,<:FrequencyWeights}) where {T,D,L} + wts = r.wts y = r.y mu = r.mu d = r.d ll = zero(eltype(mu)) ϕ = deviance(r)/nobs(r) @inbounds for i in eachindex(y, mu, whf) - ll += loglik_obs(d, whf[i]*y[i], whf[i]*mu[i], 1, ϕ) + ll += loglik_obs(d, y[i], mu[i], wts[i], ϕ) + end + ll +end + +function loglikelihood(r::GlmResp{T,D,L,<:AbstractWeights}) where {T,D,L} + wts = r.wts + sumwt = sum(wts) + y = r.y + mu = r.mu + d = r.d + ll = zero(eltype(mu)) + ϕ = deviance(r) + n = length(y) + @inbounds for i in eachindex(y, mu, wts) + ll += loglik_aweights_obs(d, y[i], mu[i], wts[i], ϕ, sumwt, n) end - ll + sum(log.(weights(r)))/2 + ll end + dof(x::GeneralizedLinearModel) = dispersion_parameter(x.rr.d) ? length(coef(x)) + 1 : length(coef(x)) function _fit!(m::AbstractGLM, verbose::Bool, maxiter::Integer, minstepfac::Real, diff --git a/src/glmtools.jl b/src/glmtools.jl index b6ec0008..a76836c0 100644 --- a/src/glmtools.jl +++ b/src/glmtools.jl @@ -528,3 +528,17 @@ loglik_obs(::Poisson, y, μ, wt, ϕ) = wt*logpdf(Poisson(μ), y) # Γ(θ+y) / (y! * Γ(θ)) * p^θ(1-p)^y # Hence, p = θ/(μ+θ) loglik_obs(d::NegativeBinomial, y, μ, wt, ϕ) = wt*logpdf(NegativeBinomial(d.r, d.r/(μ+d.r)), y) + + +## Slight different interface for analytic and probability weights +## ϕ: is the deviance - not the deviance / n or sum(wt) +## sumwt: sum(wt) +## n is the numer of observations +loglik_aweights_obs(::Bernoulli, y, μ, wt, ϕ, sumwt, n) = logpdf(Bernoulli(μ), round(wt*y)) +loglik_aweights_obs(::Binomial, y, μ, wt, ϕ, sumwt, n) = logpdf(Binomial(round(wt), μ), round(wt*y)) +loglik_aweights_obs(::Gamma, y, μ, wt, ϕ, sumwt, n) = wt*logpdf(Gamma(inv(ϕ/sumwt), μ*ϕ/sumwt), y) +loglik_aweights_obs(::Geometric, y, μ, wt, ϕ, sumwt, n) = wt*logpdf(Geometric(1 / (μ + 1)), y) +loglik_aweights_obs(::InverseGaussian, y, μ, wt, ϕ, sumwt, n) = -(wt*(1 + log(2π*(ϕ/sumwt))) + 3*log(y)*wt)/2 +loglik_aweights_obs(::Normal, y, μ, wt, ϕ, sumwt, n) = ((-log(2π*ϕ/n) - 1) + log(wt))/2 +loglik_aweights_obs(::Poisson, y, μ, wt, ϕ, sumwt, n) = wt*logpdf(Poisson(μ), y) +loglik_aweights_obs(d::NegativeBinomial, y, μ, wt, ϕ, sumwt, n) = wt*logpdf(NegativeBinomial(d.r, d.r/(μ+d.r)), y) From 6b7d95c1a1711979f92ead67998b243bab38d0b5 Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Fri, 15 Jul 2022 10:23:19 +0200 Subject: [PATCH 011/106] Fix remaining issues --- src/glmfit.jl | 22 ++++------------------ src/glmtools.jl | 4 ++-- src/linpred.jl | 10 +++++----- src/lm.jl | 2 +- test/runtests.jl | 2 +- 5 files changed, 13 insertions(+), 27 deletions(-) diff --git a/src/glmfit.jl b/src/glmfit.jl index 04235c7c..0d7fe300 100644 --- a/src/glmfit.jl +++ b/src/glmfit.jl @@ -264,6 +264,7 @@ function loglikelihood(r::GlmResp{T,D,L,<:UnitWeights}) where {T,D,L} @inbounds for i in eachindex(y, mu) ll += loglik_obs(d, y[i], mu[i], 1, ϕ) end + return ll end function loglikelihood(r::GlmResp{T,D,L,<:FrequencyWeights}) where {T,D,L} @@ -273,10 +274,10 @@ function loglikelihood(r::GlmResp{T,D,L,<:FrequencyWeights}) where {T,D,L} d = r.d ll = zero(eltype(mu)) ϕ = deviance(r)/nobs(r) - @inbounds for i in eachindex(y, mu, whf) + @inbounds for i in eachindex(y, mu, wts) ll += loglik_obs(d, y[i], mu[i], wts[i], ϕ) end - ll + return ll end function loglikelihood(r::GlmResp{T,D,L,<:AbstractWeights}) where {T,D,L} @@ -291,10 +292,9 @@ function loglikelihood(r::GlmResp{T,D,L,<:AbstractWeights}) where {T,D,L} @inbounds for i in eachindex(y, mu, wts) ll += loglik_aweights_obs(d, y[i], mu[i], wts[i], ϕ, sumwt, n) end - ll + return ll end - dof(x::GeneralizedLinearModel) = dispersion_parameter(x.rr.d) ? length(coef(x)) + 1 : length(coef(x)) function _fit!(m::AbstractGLM, verbose::Bool, maxiter::Integer, minstepfac::Real, @@ -436,20 +436,6 @@ function StatsBase.fit!(m::AbstractGLM, rtol = kwargs[:tol] end - # r = m.rr - # V = typeof(r.y) - # r.y = copy!(r.y, y) - # if !isa(wts, Nothing) - # if wts isa typeof(r.wts) - # copy!(r.wts, wts) - # else - - # end - # else - # if typeof(r.wts) === UnitWeights - - - isa(offset, Nothing) || copy!(r.offset, offset) initialeta!(r.eta, r.d, r.l, r.y, r.wts, r.offset) updateμ!(r, r.eta) diff --git a/src/glmtools.jl b/src/glmtools.jl index a76836c0..3390bde6 100644 --- a/src/glmtools.jl +++ b/src/glmtools.jl @@ -534,8 +534,8 @@ loglik_obs(d::NegativeBinomial, y, μ, wt, ϕ) = wt*logpdf(NegativeBinomial(d.r, ## ϕ: is the deviance - not the deviance / n or sum(wt) ## sumwt: sum(wt) ## n is the numer of observations -loglik_aweights_obs(::Bernoulli, y, μ, wt, ϕ, sumwt, n) = logpdf(Bernoulli(μ), round(wt*y)) -loglik_aweights_obs(::Binomial, y, μ, wt, ϕ, sumwt, n) = logpdf(Binomial(round(wt), μ), round(wt*y)) +loglik_aweights_obs(::Bernoulli, y, μ, wt, ϕ, sumwt, n) = logpdf(Binomial(round(Int, wt), μ), round(wt*y)) +loglik_aweights_obs(::Binomial, y, μ, wt, ϕ, sumwt, n) = logpdf(Binomial(round(Int, wt), μ), round(wt*y)) loglik_aweights_obs(::Gamma, y, μ, wt, ϕ, sumwt, n) = wt*logpdf(Gamma(inv(ϕ/sumwt), μ*ϕ/sumwt), y) loglik_aweights_obs(::Geometric, y, μ, wt, ϕ, sumwt, n) = wt*logpdf(Geometric(1 / (μ + 1)), y) loglik_aweights_obs(::InverseGaussian, y, μ, wt, ϕ, sumwt, n) = -(wt*(1 + log(2π*(ϕ/sumwt))) + 3*log(y)*wt)/2 diff --git a/src/linpred.jl b/src/linpred.jl index 2f9977f1..1e745bbc 100644 --- a/src/linpred.jl +++ b/src/linpred.jl @@ -232,7 +232,7 @@ mutable struct SparsePredChol{T,M<:SparseMatrixCSC,C,W<:AbstractWeights{<:Real}} delbeta::Vector{T} # coefficient increment scratchbeta::Vector{T} chol::C - scratch::M + scratchm1::M wts::W end @@ -252,28 +252,28 @@ cholpred(X::SparseMatrixCSC, pivot::Bool=false) = SparsePredChol(X, uweights(siz cholpred(X::SparseMatrixCSC, pivot::Bool, wts::AbstractWeights) = SparsePredChol(X, wts) function delbeta!(p::SparsePredChol{T,M,C,<:UnitWeights}, r::Vector{T}, wt::Vector{T}) where {T,M,C} - scr = mul!(p.scratch, Diagonal(wt), p.X) + scr = mul!(p.scratchm1, Diagonal(wt), p.X) XtWX = p.Xt*scr c = p.chol = cholesky(Symmetric{eltype(XtWX),typeof(XtWX)}(XtWX, 'L')) p.delbeta = c \ mul!(p.delbeta, adjoint(scr), r) end function delbeta!(p::SparsePredChol{T,M,C,<:AbstractWeights}, r::Vector{T}, wt::Vector{T}) where {T,M,C} - scr = mul!(p.scratch, Diagonal(wt.*p.wts), p.X) + scr = mul!(p.scratchm1, Diagonal(wt.*p.wts), p.X) XtWX = p.Xt*scr c = p.chol = cholesky(Symmetric{eltype(XtWX),typeof(XtWX)}(XtWX, 'L')) p.delbeta = c \ mul!(p.delbeta, adjoint(scr), r) end function delbeta!(p::SparsePredChol{T,M,C,<:UnitWeights}, r::Vector{T}) where {T,M,C} - scr = p.scratch = p.X + scr = p.X XtWX = p.Xt*scr c = p.chol = cholesky(Symmetric{eltype(XtWX),typeof(XtWX)}(XtWX, 'L')) p.delbeta = c \ mul!(p.delbeta, adjoint(scr), r) end function delbeta!(p::SparsePredChol{T,M,C,<:AbstractWeights}, r::Vector{T}) where {T,M,C} - scr = p.scratch .= p.X.*p.wts + scr = p.scratchm1 .= p.X.*p.wts XtWX = p.Xt*scr c = p.chol = cholesky(Symmetric{eltype(XtWX),typeof(XtWX)}(XtWX, 'L')) p.delbeta = c \ mul!(p.delbeta, adjoint(scr), r) diff --git a/src/lm.jl b/src/lm.jl index 5404274d..d3f0d3ed 100644 --- a/src/lm.jl +++ b/src/lm.jl @@ -301,7 +301,7 @@ function predict(mm::LinearModel, newx::AbstractMatrix; "when some independent variables have been dropped " * "from the model due to collinearity")) end - length(mm.rr.wts) == 0 || error("prediction with confidence intervals not yet implemented for weighted regression") + !isweighted(mm) || error("prediction with confidence intervals not yet implemented for weighted regression") chol = cholesky!(mm.pp) # get the R matrix from the QR factorization if chol isa CholeskyPivoted diff --git a/test/runtests.jl b/test/runtests.jl index c7e5cf43..58976a55 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -175,7 +175,7 @@ end "by coercing wts to `FrequencyWeights`") lm(@formula(y~x), df; wts=df.wts) @test_logs (:warn, "Passing weights as vector is deprecated in favor of explicitely using " * "AnalyticalWeights, ProbabilityWeights, or FrequencyWeights. Proceeding " * - "by coercing wts to `FrequencyWeights`") glm(@formula(y~x), Normal(), IdentityLink(), df; wts=df.wts) + "by coercing wts to `FrequencyWeights`") glm(@formula(y~x), df, Normal(), IdentityLink(); wts=df.wts) end @testset "saturated linear model" begin From c236b82f09bb2224bc5ded751847128a4dd7ad88 Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Fri, 15 Jul 2022 17:41:16 +0200 Subject: [PATCH 012/106] Final commit --- src/glmfit.jl | 47 +++++++++++++++++++++++++++++++++++++++++++++ src/linpred.jl | 24 ++++++++++++++--------- src/lm.jl | 2 +- src/momentmatrix.jl | 27 ++++++++++++++++++++++++++ 4 files changed, 90 insertions(+), 10 deletions(-) create mode 100644 src/momentmatrix.jl diff --git a/src/glmfit.jl b/src/glmfit.jl index 0d7fe300..48604f6d 100644 --- a/src/glmfit.jl +++ b/src/glmfit.jl @@ -694,3 +694,50 @@ nobs(r::LmResp{V,W}) where {V,W<:FrequencyWeights} = r.wts.sum nobs(r::GlmResp{V,D,L,W}) where {V,D,L,W<:FrequencyWeights} = r.wts.sum nobs(r::GlmResp{V,D,L,W}) where {V,D,L,W} = oftype(sum(one(eltype(r.wts))), length(r.y)) + +## To be reviewed! +Base.sqrt(::UnitWeights{T}) where T = one(T) + +function residuals(r::GlmResp; weighted::Bool = false) + ## Note: this is necessary if we want to be able to evsaluate + ## unweighted residuls when the model is weighted. Otherwise, if we + ## agree that the residulas should follow the specification of the model + ## we could use the object devresid + y, η, μ = r.y, r.eta, r.mu + dres = similar(μ) + + @inbounds for i in eachindex(y, μ) + μi = μ[i] + yi = y[i] + dres[i] = sqrt(max(0, devresid(r.d, yi, μi)))*sign(yi-μi) + end + + if weighted + dres .= dres.*sqrt.(r.wts) + end + + return dres +end + +mdisp(rr::GlmResp{T1, <: Union{Normal, Poisson, Binomial, Bernoulli, NegativeBinomial}, T2, T3}) where {T1, T2, T3} = one(1) +mdisp(rr::GlmResp{T1, <: Union{Gamma, Geometric, InverseGaussian}, T2, T3}) where {T1, T2, T3} = sum(abs2, rr.wrkwt.*rr.wrkresid)/sum(rr.wrkwt) + +momentmatrix(m::RegressionModel) = momentmatrix(m.model) + +function momentmatrix(m::GeneralizedLinearModel) + X = modelmatrix(m; weighted=false) + d = mdisp(m.rr) + r = m.rr.wrkwt.*m.rr.wrkresid + return (X.*r)./d +end + +function momentmatrix(m::LinearModel) + X = modelmatrix(m; weighted=false) + r = residuals(m; weighted=false) + mm = (X.*r) + if isweighted(m) + mm.*weights(m) + else + mm + end +end diff --git a/src/linpred.jl b/src/linpred.jl index 1e745bbc..3ed17c28 100644 --- a/src/linpred.jl +++ b/src/linpred.jl @@ -302,23 +302,26 @@ invchol(x::SparsePredChol) = cholesky!(x) \ Matrix{Float64}(I, size(x.X, 2), siz function vcov(x::LinPredModel) d = dispersion(x, true) - B = _covm(x.pp) - rmul!(B, d) + u = residuals(x; weighted = isweighted(x)) + _vcov(x.pp, u, d) end -@inline _covm(pp::LinPred) = invchol(pp) +_vcov(pp::LinPred, u, d) = rmul!(invchol(pp), d) -function _covm(pp::DensePredChol{T, <:ProbabilityWeights, <:Cholesky}) where {T} +function _vcov(pp::DensePredChol{T, <:ProbabilityWeights, <:Cholesky}, u, d) where {T} wts = pp.wts - Z = mul!(pp.scratchm1, Diagonal(wts), pp.X) + Z = mul!(pp.scratchm1, Diagonal(sqrt.(wts).*u), pp.X) XtW2X = Z'Z invXtWX = invchol(pp) - invXtWX*XtW2X*invXtWX + V = invXtWX*XtW2X*invXtWX + n = length(wts) + k = length(pp.delbeta) + n/(n-k)*V end -function _covm(pp::DensePredChol{T, <:ProbabilityWeights, <:CholeskyPivoted}) where {T} +function _vcov(pp::DensePredChol{T, <:ProbabilityWeights, <:CholeskyPivoted}, u) where {T} wts = pp.wts - mul!(pp.scratchm1, Diagonal(wts), pp.X) + Z = mul!(pp.scratchm1, Diagonal(sqrt.(wts).*u), pp.X) rnk = rank(pp.chol) p = length(pp.delbeta) if rnk == p @@ -327,7 +330,10 @@ function _covm(pp::DensePredChol{T, <:ProbabilityWeights, <:CholeskyPivoted}) wh ## no idea end invXtWX = invchol(pp) - invXtWX*XtW2X*invXtWX + V = invXtWX*XtW2X*invXtWX + n = length(wts) + k = length(pp.delbeta) + n/(n-k)*V end function cor(x::LinPredModel) diff --git a/src/lm.jl b/src/lm.jl index d3f0d3ed..e0473ed4 100644 --- a/src/lm.jl +++ b/src/lm.jl @@ -93,7 +93,7 @@ function residuals(r::LmResp; weighted=false) res = r.y - r.mu if !weighted res - elseif isweighted(r) + elseif r.wts isa AbstractWeights sqrt.(wts).*res else throw(ArgumentError("`weighted=true` allowed only for weighted models.")) diff --git a/src/momentmatrix.jl b/src/momentmatrix.jl new file mode 100644 index 00000000..2dac6f78 --- /dev/null +++ b/src/momentmatrix.jl @@ -0,0 +1,27 @@ +## To remove when https://github.com/JuliaStats/StatsAPI.jl/pull/16 is merged + +mdisp(x::LmResp) = one() +mdisp(rr::GlmResp{T1, <: Normal, T2, T3}) where {T1, T2, T3} = one() + +function mdisp(rr::GlmResp{T1, <: Union{Gamma, Bernoulli, InverseGaussian}, T2, T3}) where {T1, T2, T3} + sum(abs2, rr.wrkwt.*rr.wrkresid)/sum(rr.wrkwt) +end + + +function momentmatrix(model::RegressionModel; weighted::Bool=false) + X = modelmatrix(model; weighted=weightd) + d = mdisp(model.model.rr) + r = residuals(model; weighted=weightd) + return (X.*r)./d +end + +# function momentmatrix(model::RegressionModel; weighted::Bool=false) +# X = modelmatrix(model; weighted=false) +# mm = similar(X) +# if weighted +# r = residual() +# d = dispersion(model) +# r = residuals(model; weighted=false) +# return (X.*r)./d +# end + From 8bdfb553deac0da3e574d8f6565781c6d57f5e42 Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Fri, 15 Jul 2022 18:13:48 +0200 Subject: [PATCH 013/106] Fix merge --- src/glmfit.jl | 51 --------------------------------------------------- 1 file changed, 51 deletions(-) diff --git a/src/glmfit.jl b/src/glmfit.jl index f820b427..16188c85 100644 --- a/src/glmfit.jl +++ b/src/glmfit.jl @@ -260,7 +260,6 @@ end deviance(m::AbstractGLM) = deviance(m.rr) -<<<<<<< HEAD loglikelihood(m::AbstractGLM) = loglikelihood(m.rr) function loglikelihood(r::GlmResp{T,D,L,<:UnitWeights}) where {T,D,L} @@ -276,64 +275,14 @@ function loglikelihood(r::GlmResp{T,D,L,<:UnitWeights}) where {T,D,L} end function loglikelihood(r::GlmResp{T,D,L,<:FrequencyWeights}) where {T,D,L} -======= -function nulldeviance(m::GeneralizedLinearModel) - r = m.rr - wts = weights(r.wts) - y = r.y - d = r.d - offset = r.offset - hasint = hasintercept(m) - dev = zero(eltype(y)) - if isempty(offset) # Faster method - if !isempty(wts) - mu = hasint ? - mean(y, wts) : - linkinv(r.link, zero(eltype(y))*zero(eltype(wts))/1) - @inbounds for i in eachindex(y, wts) - dev += wts[i] * devresid(d, y[i], mu) - end - else - mu = hasint ? mean(y) : linkinv(r.link, zero(eltype(y))/1) - @inbounds for i in eachindex(y) - dev += devresid(d, y[i], mu) - end - end - else - X = fill(1.0, length(y), hasint ? 1 : 0) - nullm = fit(GeneralizedLinearModel, - X, y, d, r.link, wts=wts, offset=offset, - maxiter=m.maxiter, minstepfac=m.minstepfac, - atol=m.atol, rtol=m.rtol) - dev = deviance(nullm) - end - return dev -end - -function loglikelihood(m::AbstractGLM) - r = m.rr ->>>>>>> 97ef55810a95f2b4122cfd1e1904c5b3c20182cb wts = r.wts y = r.y mu = r.mu d = r.d ll = zero(eltype(mu)) -<<<<<<< HEAD ϕ = deviance(r)/nobs(r) @inbounds for i in eachindex(y, mu, wts) ll += loglik_obs(d, y[i], mu[i], wts[i], ϕ) -======= - if !isempty(wts) - ϕ = deviance(m)/sum(wts) - @inbounds for i in eachindex(y, mu, wts) - ll += loglik_obs(d, y[i], mu[i], wts[i], ϕ) - end - else - ϕ = deviance(m)/length(y) - @inbounds for i in eachindex(y, mu) - ll += loglik_obs(d, y[i], mu[i], 1, ϕ) - end ->>>>>>> 97ef55810a95f2b4122cfd1e1904c5b3c20182cb end return ll end From 3eb2ca43735cdfe450ede65b99353af556d86564 Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Sat, 16 Jul 2022 09:55:29 +0200 Subject: [PATCH 014/106] Fix nulldeviance --- src/glmfit.jl | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/src/glmfit.jl b/src/glmfit.jl index 16188c85..5ae52467 100644 --- a/src/glmfit.jl +++ b/src/glmfit.jl @@ -68,6 +68,8 @@ function GlmResp(y::AbstractVector{<:Real}, d::D, l::L, off::AbstractVector{<:Re end deviance(r::GlmResp) = sum(r.devresid) + + weights(r::GlmResp) = r.wts """ cancancel(r::GlmResp{V,D,L}) @@ -260,6 +262,39 @@ end deviance(m::AbstractGLM) = deviance(m.rr) +function nulldeviance(m::GeneralizedLinearModel) + r = m.rr + wts = r.wts + y = r.y + d = r.d + offset = r.offset + hasint = hasintercept(m) + dev = zero(eltype(y)) + if isempty(offset) # Faster method + if !(wts isa UnitWeights) + mu = hasint ? + mean(y, wts) : + linkinv(r.link, zero(eltype(y))*zero(eltype(wts))/1) + @inbounds for i in eachindex(y, wts) + dev += wts[i] * devresid(d, y[i], mu) + end + else + mu = hasint ? mean(y) : linkinv(r.link, zero(eltype(y))/1) + @inbounds for i in eachindex(y) + dev += devresid(d, y[i], mu) + end + end + else + X = fill(1.0, length(y), hasint ? 1 : 0) + nullm = fit(GeneralizedLinearModel, + X, y, d, r.link, wts=wts, offset=offset, + maxiter=m.maxiter, minstepfac=m.minstepfac, + atol=m.atol, rtol=m.rtol) + dev = deviance(nullm) + end + return dev +end + loglikelihood(m::AbstractGLM) = loglikelihood(m.rr) function loglikelihood(r::GlmResp{T,D,L,<:UnitWeights}) where {T,D,L} From 63c8358a65bec002023c1c4f0a33176f7904ac1e Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Sat, 16 Jul 2022 10:14:31 +0200 Subject: [PATCH 015/106] Bypass crossmodelmatrix drom StatsAPI --- src/lm.jl | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/lm.jl b/src/lm.jl index e0473ed4..bd02d0fa 100644 --- a/src/lm.jl +++ b/src/lm.jl @@ -335,6 +335,13 @@ for each observation in linear model `obj`, giving an estimate of the influence of each data point. Currently only implemented for linear models without weights. """ +## To remove when https://github.com/JuliaStats/StatsAPI.jl/pull/16 is merged +function crossmodelmatrix(model::RegressionModel; weighted::Bool=false) + x = weighted ? modelmatrix(model; weighted=weighted) : modelmatrix(model) + return Symmetric(x' * x) +end + + function StatsBase.cooksdistance(obj::LinearModel) wts = weights(obj) u = residuals(obj; weighted=isweighted(obj)) From e93a9191295cc99adfdbf8d16c38c451498f2f74 Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Sat, 16 Jul 2022 10:42:53 +0200 Subject: [PATCH 016/106] Delete momentmatrix.jl --- src/momentmatrix.jl | 27 --------------------------- 1 file changed, 27 deletions(-) delete mode 100644 src/momentmatrix.jl diff --git a/src/momentmatrix.jl b/src/momentmatrix.jl deleted file mode 100644 index 2dac6f78..00000000 --- a/src/momentmatrix.jl +++ /dev/null @@ -1,27 +0,0 @@ -## To remove when https://github.com/JuliaStats/StatsAPI.jl/pull/16 is merged - -mdisp(x::LmResp) = one() -mdisp(rr::GlmResp{T1, <: Normal, T2, T3}) where {T1, T2, T3} = one() - -function mdisp(rr::GlmResp{T1, <: Union{Gamma, Bernoulli, InverseGaussian}, T2, T3}) where {T1, T2, T3} - sum(abs2, rr.wrkwt.*rr.wrkresid)/sum(rr.wrkwt) -end - - -function momentmatrix(model::RegressionModel; weighted::Bool=false) - X = modelmatrix(model; weighted=weightd) - d = mdisp(model.model.rr) - r = residuals(model; weighted=weightd) - return (X.*r)./d -end - -# function momentmatrix(model::RegressionModel; weighted::Bool=false) -# X = modelmatrix(model; weighted=false) -# mm = similar(X) -# if weighted -# r = residual() -# d = dispersion(model) -# r = residuals(model; weighted=false) -# return (X.*r)./d -# end - From 7bb0959d2b2f437c8010eb0fe930e7452a7e5e6c Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Sat, 16 Jul 2022 10:43:07 +0200 Subject: [PATCH 017/106] Delete scratch.jl --- src/scratch.jl | 77 -------------------------------------------------- 1 file changed, 77 deletions(-) delete mode 100644 src/scratch.jl diff --git a/src/scratch.jl b/src/scratch.jl deleted file mode 100644 index 4f2612b0..00000000 --- a/src/scratch.jl +++ /dev/null @@ -1,77 +0,0 @@ -using GLM -using DataFrames -using Random -using CSV -using StatsBase -using RDatasets -Random.seed!(11) - -y = rand(10) -x = rand(10,2) -wts = rand(10) -df = DataFrame(x, :auto) -df.y = y -df.wts = wts -lm1 = lm(x,y) -lmw = lm(x,y; wts = wts) -lmf = lm(@formula(y~x1+x2-1), df) -lmfw = lm(@formula(y~-1+x1+x2), df; wts = aweights(wts)) -lmfw = lm(@formula(y~-1+x1+x2), df; wts = pweights(wts)) -lmfw = lm(@formula(y~-1+x1+x2), df; wts = fweights(wts)) - -glm(@formula(y~-1+x1+x2), df, Normal, IdentityLink; wts = fweights(wts)) - -cooksdistance(lm1) - - - -df = dataset("quantreg", "engel") -N = nrow(df) -df.weights = repeat(1:5, Int(N/5)) -f = @formula(FoodExp ~ Income) -lm_model = lm(f, df, wts = FrequencyWeights(df.weights)) -glm_model = glm(f, df, Normal(), wts = FrequencyWeights(df.weights)) -@test isapprox(coef(lm_model), [154.35104595140706, 0.4836896390157505]) -@test isapprox(coef(glm_model), [154.35104595140706, 0.4836896390157505]) -@test isapprox(stderror(lm_model), [9.382302620120193, 0.00816741377772968]) -@test isapprox(r2(lm_model), 0.8330258148644486) -@test isapprox(adjr2(lm_model), 0.832788298242634) -@test isapprox(vcov(lm_model), [88.02760245551447 -0.06772589439264813; - -0.06772589439264813 6.670664781664879e-5]) -@test isapprox(first(predict(lm_model)), 357.57694841780994) -@test isapprox(loglikelihood(lm_model), -4353.946729075838) -@test isapprox(loglikelihood(glm_model), -4353.946729075838) -@test isapprox(nullloglikelihood(lm_model), -4984.892139711452) -@test isapprox(mean(residuals(lm_model)), -5.412966629787718) - -lm_model = lm(f, df, wts = df.weights) -glm_model = glm(f, df, Normal(), wts = df.weights) -@test isapprox(coef(lm_model), [154.35104595140706, 0.4836896390157505]) -@test isapprox(coef(glm_model), [154.35104595140706, 0.4836896390157505]) -@test isapprox(stderror(lm_model), [9.382302620120193, 0.00816741377772968]) -@test isapprox(r2(lm_model), 0.8330258148644486) -@test isapprox(adjr2(lm_model), 0.832788298242634) -@test isapprox(vcov(lm_model), [88.02760245551447 -0.06772589439264813; - -0.06772589439264813 6.670664781664879e-5]) -@test isapprox(first(predict(lm_model)), 357.57694841780994) -@test isapprox(loglikelihood(lm_model), -4353.946729075838) -@test isapprox(loglikelihood(glm_model), -4353.946729075838) -@test isapprox(nullloglikelihood(lm_model), -4984.892139711452) -@test isapprox(mean(residuals(lm_model)), -5.412966629787718) - - - -lm_model = lm(f, df, wts = aweights(df.weights)) -glm_model = glm(f, df, Normal(), wts = aweights(df.weights)) -@test isapprox(coef(lm_model), [154.35104595140706, 0.4836896390157505]) -@test isapprox(coef(glm_model), [154.35104595140706, 0.4836896390157505]) -@test isapprox(stderror(lm_model), [16.297055281313032, 0.014186793927918842]) -@test isapprox(r2(lm_model), 0.8330258148644486) -@test isapprox(adjr2(lm_model), 0.8323091874604334) -@test isapprox(vcov(lm_model), [265.59401084217296 -0.20434035947652907; - -0.20434035947652907 0.00020126512195323495]) -@test isapprox(first(predict(lm_model)), 357.57694841780994) -@test isapprox(loglikelihood(lm_model), -4353.946729075838) -@test isapprox(loglikelihood(glm_model), -4353.946729075838) -@test isapprox(nullloglikelihood(lm_model), -4984.892139711452) -@test isapprox(mean(residuals(lm_model)), -5.412966629787718) \ No newline at end of file From ded17a86684c13e541d0de7d422b03351fd4f539 Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Sat, 16 Jul 2022 10:43:38 +0200 Subject: [PATCH 018/106] Delete settings.json --- .vscode/settings.json | 3 --- 1 file changed, 3 deletions(-) delete mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index 9419f557..00000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "julia.persistentSession.alwaysCopy": true -} \ No newline at end of file From 3346774549d44f32678eee07a9505a92a77e8ce6 Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Mon, 5 Sep 2022 13:44:44 +0200 Subject: [PATCH 019/106] AbstractWeights are required to be real Co-authored-by: Milan Bouchet-Valat --- src/glmfit.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/glmfit.jl b/src/glmfit.jl index 5ae52467..74bea0f1 100644 --- a/src/glmfit.jl +++ b/src/glmfit.jl @@ -3,7 +3,7 @@ The response vector and various derived vectors in a generalized linear model. """ -struct GlmResp{V<:FPVector, D<:UnivariateDistribution,L<:Link,W<:AbstractWeights{<:Real}} <: ModResp +struct GlmResp{V<:FPVector,D<:UnivariateDistribution,L<:Link,W<:AbstractWeights} <: ModResp "`y`: response vector" y::V d::D From 7376e787b4961291dbc49a48444dcff83fac71e5 Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Mon, 5 Sep 2022 15:58:21 +0200 Subject: [PATCH 020/106] Update src/glmfit.jl Co-authored-by: Milan Bouchet-Valat --- src/glmfit.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/glmfit.jl b/src/glmfit.jl index 74bea0f1..9a1cadea 100644 --- a/src/glmfit.jl +++ b/src/glmfit.jl @@ -63,9 +63,9 @@ function GlmResp(y::FPVector, d::Distribution, l::Link, off::FPVector, wts::Abst return r end -function GlmResp(y::AbstractVector{<:Real}, d::D, l::L, off::AbstractVector{<:Real}, wts::AbstractWeights{<:Real}) where {D, L} +GlmResp(y::AbstractVector{<:Real}, d::D, l::L, off::AbstractVector{<:Real}, + wts::AbstractWeights) where {D, L} = GlmResp(float(y), d, l, float(off), wts) -end deviance(r::GlmResp) = sum(r.devresid) From a738268bcf8385834435be156c55b1469c108e04 Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Mon, 5 Sep 2022 16:21:20 +0200 Subject: [PATCH 021/106] Apply suggestions from code review Co-authored-by: Milan Bouchet-Valat --- src/glmfit.jl | 38 ++++++++++++++++---------------------- src/glmtools.jl | 6 +++--- src/linpred.jl | 35 +++++++++++++++-------------------- src/lm.jl | 33 +++++++++++++++++---------------- test/runtests.jl | 1 - 5 files changed, 51 insertions(+), 62 deletions(-) diff --git a/src/glmfit.jl b/src/glmfit.jl index 9a1cadea..4190accf 100644 --- a/src/glmfit.jl +++ b/src/glmfit.jl @@ -51,7 +51,7 @@ function GlmResp(y::V, d::D, l::L, η::V, μ::V, off::V, wts::W) where {V<:FPVec return GlmResp{V,D,L,W}(y, d, l, similar(y), η, μ, off, wts, similar(y), similar(y)) end -function GlmResp(y::FPVector, d::Distribution, l::Link, off::FPVector, wts::AbstractWeights{<:Real}) +function GlmResp(y::FPVector, d::Distribution, l::Link, off::FPVector, wts::AbstractWeights) # Instead of convert(Vector{Float64}, y) to be more ForwardDiff friendly _y = convert(Vector{float(eltype(y))}, y) _off = convert(Vector{float(eltype(off))}, off) @@ -69,8 +69,8 @@ GlmResp(y::AbstractVector{<:Real}, d::D, l::L, off::AbstractVector{<:Real}, deviance(r::GlmResp) = sum(r.devresid) - weights(r::GlmResp) = r.wts + """ cancancel(r::GlmResp{V,D,L}) @@ -108,7 +108,6 @@ function updateμ!(r::GlmResp{T,D,L,<:UnitWeights}, linPr::T) where {T<:FPVector r end - function updateμ!(r::GlmResp{V,D,L}) where {V<:FPVector,D,L} y, η, μ, wrkres, wrkwt, dres = r.y, r.eta, r.mu, r.wrkresid, r.wrkwt, r.devresid @@ -545,10 +544,10 @@ const FIT_GLM_DOC = """ # Keyword Arguments - `dofit::Bool=true`: Determines whether model will be fit - - `wts::AbstractWeights=aweights(similar(y,0))`: Weights of observations. + - `wts::AbstractWeights=uweights(length(y))`: Weights of observations. Allowed weights are `AnalyticalWeights`, `FrequencyWeights`, or `ProbabilityWeights`. - If a vector is passed (deprecated) it is coerced to FrequencyWeights. - Can be length 0 to indicate no weighting (default). + If a no-`AbstractWeights` vector is passed (deprecated) it is coerced to `FrequencyWeights`. + By default, `UnitWeights` are used, meaning that no weighting is applied. - `offset::Vector=similar(y,0)`: offset added to `Xβ` to form `eta`. Can be of length 0 - `verbose::Bool=false`: Display convergence information for each iteration @@ -590,8 +589,8 @@ function fit(::Type{M}, wts elseif isa(wts, AbstractVector) Base.depwarn("Passing weights as vector is deprecated in favor of explicitely using " * - "AnalyticalWeights, ProbabilityWeights, or FrequencyWeights. Proceeding " * - "by coercing wts to `FrequencyWeights`", :fit) + "`AnalyticalWeights`, `ProbabilityWeights`, or `FrequencyWeights`. Proceeding " * + "by coercing `wts` to `FrequencyWeights`", :fit) fweights(wts) else throw(ArgumentError("`wts` should be an AbstractVector coercible to AbstractWeights")) @@ -648,7 +647,6 @@ function dispersion(m::AbstractGLM, sqr::Bool=false) end end - """ predict(mm::AbstractGLM, newX::AbstractMatrix; offset::FPVector=eltype(newX)[], interval::Union{Symbol,Nothing}=nothing, level::Real = 0.95, @@ -782,11 +780,7 @@ nobs(r::GlmResp{V,D,L,W}) where {V,D,L,W} = oftype(sum(one(eltype(r.wts))), leng ## To be reviewed! Base.sqrt(::UnitWeights{T}) where T = one(T) -function residuals(r::GlmResp; weighted::Bool = false) - ## Note: this is necessary if we want to be able to evsaluate - ## unweighted residuls when the model is weighted. Otherwise, if we - ## agree that the residulas should follow the specification of the model - ## we could use the object devresid +function residuals(r::GlmResp; weighted::Bool=false) y, η, μ = r.y, r.eta, r.mu dres = similar(μ) @@ -797,31 +791,31 @@ function residuals(r::GlmResp; weighted::Bool = false) end if weighted - dres .= dres.*sqrt.(r.wts) + dres .*= sqrt.(r.wts) end return dres end -mdisp(rr::GlmResp{T1, <: Union{Normal, Poisson, Binomial, Bernoulli, NegativeBinomial}, T2, T3}) where {T1, T2, T3} = one(1) -mdisp(rr::GlmResp{T1, <: Union{Gamma, Geometric, InverseGaussian}, T2, T3}) where {T1, T2, T3} = sum(abs2, rr.wrkwt.*rr.wrkresid)/sum(rr.wrkwt) +mdisp(rr::GlmResp{<: Any, <: Union{Normal, Poisson, Binomial, Bernoulli, NegativeBinomial}}) = 1 +mdisp(rr::GlmResp{<: Any, <: Union{Gamma, Geometric, InverseGaussian}}) = + sum(abs2, Base.Broadcast.broadcasted(*, rr.wrkwt, rr.wrkresid))/sum(rr.wrkwt) momentmatrix(m::RegressionModel) = momentmatrix(m.model) function momentmatrix(m::GeneralizedLinearModel) X = modelmatrix(m; weighted=false) d = mdisp(m.rr) - r = m.rr.wrkwt.*m.rr.wrkresid - return (X.*r)./d + r = m.rr.wrkwt .* m.rr.wrkresid + return (X .* r) ./ d end function momentmatrix(m::LinearModel) X = modelmatrix(m; weighted=false) r = residuals(m; weighted=false) - mm = (X.*r) if isweighted(m) - mm.*weights(m) + return X .* r .* weights(m) else - mm + return X .* r end end diff --git a/src/glmtools.jl b/src/glmtools.jl index 3390bde6..d25a02d0 100644 --- a/src/glmtools.jl +++ b/src/glmtools.jl @@ -531,9 +531,9 @@ loglik_obs(d::NegativeBinomial, y, μ, wt, ϕ) = wt*logpdf(NegativeBinomial(d.r, ## Slight different interface for analytic and probability weights -## ϕ: is the deviance - not the deviance / n or sum(wt) -## sumwt: sum(wt) -## n is the numer of observations +## ϕ is the deviance - not the deviance/n nor sum(wt) +## sumwt is sum(wt) +## n is the number of observations loglik_aweights_obs(::Bernoulli, y, μ, wt, ϕ, sumwt, n) = logpdf(Binomial(round(Int, wt), μ), round(wt*y)) loglik_aweights_obs(::Binomial, y, μ, wt, ϕ, sumwt, n) = logpdf(Binomial(round(Int, wt), μ), round(wt*y)) loglik_aweights_obs(::Gamma, y, μ, wt, ϕ, sumwt, n) = wt*logpdf(Gamma(inv(ϕ/sumwt), μ*ϕ/sumwt), y) diff --git a/src/linpred.jl b/src/linpred.jl index 3ed17c28..c9270012 100644 --- a/src/linpred.jl +++ b/src/linpred.jl @@ -45,20 +45,19 @@ A `LinPred` type with a dense, unpivoted QR decomposition of `X` - `scratchbeta`: scratch vector of length `p`, used in `linpred!` method - `qr`: a `QRCompactWY` object created from `X`, with optional row weights. """ -mutable struct DensePredQR{T<:BlasReal, W<:AbstractWeights{<:Real}} <: DensePred +mutable struct DensePredQR{T<:BlasReal, W<:AbstractWeights} <: DensePred X::Matrix{T} # model matrix beta0::Vector{T} # base coefficient vector delbeta::Vector{T} # coefficient increment scratchbeta::Vector{T} qr::QRCompactWY{T} wts::W - wresponse::Vector{T} function DensePredQR{T}(X::Matrix{T}, beta0::Vector{T}, wts::W) where {T,W<:UnitWeights} n, p = size(X) length(beta0) == p || throw(DimensionMismatch("length(β0) ≠ size(X,2)")) - length(wts) == n || throw(DimensionMismatch("Lenght of weights does not match the dimension of X")) + length(wts) == n || throw(DimensionMismatch("Length of weights does not match the dimension of X")) qrX = qr(X) - new{T,W}(X, beta0, zeros(T,p), zeros(T,p), qrX, wts, similar(X, T, (size(X,1),) )) + new{T,W}(X, beta0, zeros(T,p), zeros(T,p), qrX, wts, similar(X, T, size(X,1))) end function DensePredQR{T}(X::Matrix{T}, beta0::Vector{T}, wts::W) where {T,W<:AbstractWeights{<:Real}} n, p = size(X) @@ -69,14 +68,10 @@ mutable struct DensePredQR{T<:BlasReal, W<:AbstractWeights{<:Real}} <: DensePred new{T,W}(X, beta0, zeros(T,p), zeros(T,p), qrX, wts, similar(X, T, (size(X,1),) )) end - function DensePredQR{T}(X::Matrix{T}, wts::W) where {T,W} + function DensePredQR{T}(X::Matrix{T}, wts::W=uweights(size(X,1))) where {T,W} n, p = size(X) DensePredQR(X, zeros(T, p), wts) end - function DensePredQR(X::Matrix{T}) where T - n, p = size(X) - DensePredQR{T}(X, zeros(T, p), uweights(size(X,1))) - end end DensePredQR{T}(X::Matrix) where T = DensePredQR{eltype(X)}(X, zeros(T, size(X, 2)), uweights(size(X,1))) DensePredQR(X::Matrix, beta0::Vector, wts::AbstractVector) = DensePredQR{eltype(X)}(X, beta0, wts) @@ -84,7 +79,7 @@ DensePredQR(X::Matrix{T}, wts::AbstractVector) where T = DensePredQR{T}(X, zeros convert(::Type{DensePredQR{T}}, X::Matrix{T}) where {T} = DensePredQR{T}(X) """ -delbeta!(p::LinPred, r::Vector) + delbeta!(p::LinPred, r::Vector) Evaluate and return `p.delbeta` the increment to the coefficient vector from residual `r` """ @@ -96,7 +91,7 @@ function delbeta!(p::DensePredQR{T}, r::Vector{T}) where T<:BlasReal end """ -DensePredChol{T} + DensePredChol{T} A `LinPred` type with a dense Cholesky factorization of `X'X` @@ -110,7 +105,7 @@ A `LinPred` type with a dense Cholesky factorization of `X'X` - `scratchm1`: scratch Matrix{T} of the same size as `X` - `scratchm2`: scratch Matrix{T} of the same size as `X'X` """ -mutable struct DensePredChol{T<:BlasReal,C,W<:AbstractVector{<:Real}} <: DensePred +mutable struct DensePredChol{T<:BlasReal,C,W<:AbstractVector} <: DensePred X::Matrix{T} # model matrix beta0::Vector{T} # base vector for coefficients delbeta::Vector{T} # coefficient increment @@ -151,8 +146,8 @@ function DensePredChol(X::AbstractMatrix, pivot::Bool, wts::UnitWeights) similar(cholfactors(F))) end -cholpred(X::AbstractMatrix, pivot::Bool, wts::AbstractWeights) = DensePredChol(X, pivot, wts) -cholpred(X::AbstractMatrix, pivot::Bool=false) = DensePredChol(X, pivot, uweights(size(X,1))) +cholpred(X::AbstractMatrix, pivot::Bool, wts::AbstractWeights=uweights(size(X,1))) = + DensePredChol(X, pivot, wts) cholfactors(c::Union{Cholesky,CholeskyPivoted}) = c.factors cholesky!(p::DensePredChol{T}) where {T<:FP} = p.chol @@ -170,7 +165,7 @@ end function delbeta!(p::DensePredChol{T,<:Cholesky, <:AbstractWeights}, r::Vector{T}) where T<:BlasReal X = mul!(p.scratchm1, Diagonal(p.wts), p.X) - ldiv!(p.chol, mul!(p.delbeta, transpose(X),r)) + ldiv!(p.chol, mul!(p.delbeta, transpose(X), r)) end function delbeta!(p::DensePredChol{T,<:CholeskyPivoted,<:UnitWeights}, r::Vector{T}) where T<:BlasReal @@ -225,7 +220,7 @@ function delbeta!(p::DensePredChol{T,<:CholeskyPivoted,<:AbstractWeights}, r::Ve p end -mutable struct SparsePredChol{T,M<:SparseMatrixCSC,C,W<:AbstractWeights{<:Real}} <: GLM.LinPred +mutable struct SparsePredChol{T,M<:SparseMatrixCSC,C,W<:AbstractWeights} <: GLM.LinPred X::M # model matrix Xt::M # X' beta0::Vector{T} # base vector for coefficients @@ -248,8 +243,8 @@ function SparsePredChol(X::SparseMatrixCSC{T}, wts::AbstractVector) where T wts) end -cholpred(X::SparseMatrixCSC, pivot::Bool=false) = SparsePredChol(X, uweights(size(X,1))) -cholpred(X::SparseMatrixCSC, pivot::Bool, wts::AbstractWeights) = SparsePredChol(X, wts) +cholpred(X::SparseMatrixCSC, pivot::Bool, wts::AbstractWeights=uweights(size(X,1))) = + SparsePredChol(X, wts) function delbeta!(p::SparsePredChol{T,M,C,<:UnitWeights}, r::Vector{T}, wt::Vector{T}) where {T,M,C} scr = mul!(p.scratchm1, Diagonal(wt), p.X) @@ -350,7 +345,7 @@ end modelframe(obj::LinPredModel) = obj.fr -function modelmatrix(obj::LinPredModel; weighted=false) +function modelmatrix(obj::LinPredModel; weighted::Bool=false) if !weighted obj.pp.X elseif isweighted(obj) @@ -365,7 +360,7 @@ response(obj::LinPredModel) = obj.rr.y fitted(m::LinPredModel) = m.rr.mu predict(mm::LinPredModel) = fitted(mm) StatsModels.formula(obj::LinPredModel) = modelframe(obj).formula -residuals(obj::LinPredModel; kwarg...) = residuals(obj.rr; kwarg...) +residuals(obj::LinPredModel; weighted::Bool=false) = residuals(obj.rr; weighted=weighted) weights(obj::RegressionModel) = weights(obj.model) weights(obj::LinPredModel) = weights(obj.rr) diff --git a/src/lm.jl b/src/lm.jl index bd02d0fa..b5109a73 100644 --- a/src/lm.jl +++ b/src/lm.jl @@ -7,12 +7,12 @@ Encapsulates the response for a linear model - `mu`: current value of the mean response vector or fitted value - `offset`: optional offset added to the linear predictor to form `mu` -- `wts`: optional weights for observations (AbstractWeights) +- `wts`: optional weights for observations (as `AbstractWeights`) - `y`: observed response vector Either or both `offset` and `wts` may be of length 0 """ -mutable struct LmResp{V<:FPVector, W<:AbstractWeights{<:Real}} <: ModResp # response in a linear model +mutable struct LmResp{V<:FPVector, W<:AbstractWeights} <: ModResp # response in a linear model mu::V # mean response offset::V # offset added to linear predictor (may have length 0) wts::W # prior weights (may have length 0) @@ -28,20 +28,20 @@ mutable struct LmResp{V<:FPVector, W<:AbstractWeights{<:Real}} <: ModResp # res end end -function LmResp(y::AbstractVector{<:Real}, wts::AbstractWeights{<:Real}) +function LmResp(y::AbstractVector{<:Real}, wts::AbstractWeights) # Instead of convert(Vector{Float64}, y) to be more ForwardDiff friendly _y = convert(Vector{float(eltype(y))}, y) return LmResp{typeof(_y), typeof(wts)}(zero(_y), zero(_y), wts, _y) end -function updateμ!(r::LmResp{V, W}, linPr::V) where {V<:FPVector, W} +function updateμ!(r::LmResp{V}, linPr::V) where {V<:FPVector} n = length(linPr) length(r.y) == n || error("length(linPr) is $n, should be $(length(r.y))") length(r.offset) == 0 ? copyto!(r.mu, linPr) : broadcast!(+, r.mu, linPr, r.offset) deviance(r) end -updateμ!(r::LmResp{V, W}, linPr) where {V<:FPVector, W} = updateμ!(r, convert(V, vec(linPr))) +updateμ!(r::LmResp{V}, linPr) where {V<:FPVector} = updateμ!(r, convert(V, vec(linPr))) function deviance(r::LmResp{T,<:UnitWeights}) where T y = r.y @@ -132,15 +132,16 @@ const FIT_LM_DOC = """ in columns (including if appropriate the intercept), and `y` must be a vector holding values of the dependent variable. - The keyword argument `wts` can be an `AbstractWeights` specifying frequency weights for observations. - Weights allowed are: + The keyword argument `wts` can be an `AbstractWeights` vector specifying prior weights + for observations. Allowed types are: + - `UnitWeights`: no weighting (all weights equal to 1). - `AnalyticaWeights`: describe a non-random relative importance (usually between 0 and 1) - for each observation. - - `FrequencyWeights`: describe the number of times (or frequency) each observation was observed. + for each observation. + - `FrequencyWeights`: describe the number of times (or frequency) each observation was seen. - `ProbabilityWeights`: represent the inverse of the sampling probability for each observation, - providing a correction mechanism for under- or over-sampling certain population groups - These weights gives equal point estimates but different standard errors. - If a vector is passed (deprecated), it is coerced to `FrequencyWeights`. + providing a correction mechanism for under- or over-sampling certain population groups. + These weights give equal point estimates but different standard errors. + If a non-`AbstractWeights` vector is passed (deprecated), it is coerced to `FrequencyWeights`. `dropcollinear` controls whether or not `lm` accepts a model matrix which is less-than-full rank. If `true` (the default), only the first of each set of @@ -171,12 +172,12 @@ function fit(::Type{LinearModel}, X::AbstractMatrix{<:Real}, y::AbstractVector{< _wts = if isa(wts, AbstractWeights) wts elseif isa(wts, AbstractVector) - Base.depwarn("Passing weights as vector is deprecated in favor of explicitely using " * - "AnalyticalWeights, ProbabilityWeights, or FrequencyWeights. Proceeding " * - "by coercing wts to `FrequencyWeights`", :fit) + Base.depwarn("Passing weights as vector is deprecated in favor of explicitly using " * + "`AnalyticalWeights`, `ProbabilityWeights`, or `FrequencyWeights`. Proceeding " * + "by coercing `wts` to `FrequencyWeights`", :fit) fweights(wts) else - throw(ArgumentError("`wts` should be an AbstractVector coercible to AbstractWeights")) + throw(ArgumentError("`wts` should be an `AbstractVector` coercible to `AbstractWeights`")) end fit!(LinearModel(LmResp(y, _wts), cholpred(X, dropcollinear, _wts))) end diff --git a/test/runtests.jl b/test/runtests.jl index 00c2cdc5..21ebe068 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -83,7 +83,6 @@ end end @testset "linear model with weights" begin - df = dataset("quantreg", "engel") N = nrow(df) df.weights = repeat(1:5, Int(N/5)) From 6af3ca5fa9ed5c1b3ff0fb87429277c54c59a0c4 Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Mon, 5 Sep 2022 19:31:08 +0200 Subject: [PATCH 022/106] Throw error if GlmResp are not AbastractWeights --- src/glmfit.jl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/glmfit.jl b/src/glmfit.jl index 4190accf..27bdda57 100644 --- a/src/glmfit.jl +++ b/src/glmfit.jl @@ -35,6 +35,11 @@ function GlmResp(y::V, d::D, l::L, η::V, μ::V, off::V, wts::W) where {V<:FPVec # Check y values checky(y, d) + ## We don't support custom types of weights that a user may define + if !(wts isa AbstractWeights) + throw(ArgumentError("`wts` should be an AbstractWeights but was $W")) + end + # Lengths of y, η, and η all need to be n if !(nη == nμ == n) throw(DimensionMismatch("lengths of η, μ, and y ($nη, $nμ, $n) are not equal")) From 0ded1d4cea27dc8fa0b09bd95dd5009b73c46294 Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Mon, 5 Sep 2022 20:15:19 +0200 Subject: [PATCH 023/106] Addressing review comments --- src/glmfit.jl | 25 ++----------------------- src/linpred.jl | 16 +++++++++------- src/lm.jl | 21 +++++++++++++++------ 3 files changed, 26 insertions(+), 36 deletions(-) diff --git a/src/glmfit.jl b/src/glmfit.jl index 27bdda57..23649e51 100644 --- a/src/glmfit.jl +++ b/src/glmfit.jl @@ -39,7 +39,7 @@ function GlmResp(y::V, d::D, l::L, η::V, μ::V, off::V, wts::W) where {V<:FPVec if !(wts isa AbstractWeights) throw(ArgumentError("`wts` should be an AbstractWeights but was $W")) end - + # Lengths of y, η, and η all need to be n if !(nη == nμ == n) throw(DimensionMismatch("lengths of η, μ, and y ($nη, $nμ, $n) are not equal")) @@ -767,20 +767,8 @@ function checky(y, d::Binomial) return nothing end -""" - nobs(obj::LinearModel) - nobs(obj::GLM) - -For linear and generalized linear models, returns the number of rows, or, -when prior weights of type FrequencyWeights are specified, the sum of weights. -""" -nobs(obj::LinPredModel) = nobs(obj.rr) - -nobs(r::LmResp{V,W}) where {V,W} = oftype(sum(one(eltype(r.wts))), length(r.y)) -nobs(r::LmResp{V,W}) where {V,W<:FrequencyWeights} = r.wts.sum - +nobs(r::GlmResp) = oftype(sum(one(eltype(r.wts))), length(r.y)) nobs(r::GlmResp{V,D,L,W}) where {V,D,L,W<:FrequencyWeights} = r.wts.sum -nobs(r::GlmResp{V,D,L,W}) where {V,D,L,W} = oftype(sum(one(eltype(r.wts))), length(r.y)) ## To be reviewed! Base.sqrt(::UnitWeights{T}) where T = one(T) @@ -815,12 +803,3 @@ function momentmatrix(m::GeneralizedLinearModel) return (X .* r) ./ d end -function momentmatrix(m::LinearModel) - X = modelmatrix(m; weighted=false) - r = residuals(m; weighted=false) - if isweighted(m) - return X .* r .* weights(m) - else - return X .* r - end -end diff --git a/src/linpred.jl b/src/linpred.jl index c9270012..87c21eaa 100644 --- a/src/linpred.jl +++ b/src/linpred.jl @@ -59,7 +59,7 @@ mutable struct DensePredQR{T<:BlasReal, W<:AbstractWeights} <: DensePred qrX = qr(X) new{T,W}(X, beta0, zeros(T,p), zeros(T,p), qrX, wts, similar(X, T, size(X,1))) end - function DensePredQR{T}(X::Matrix{T}, beta0::Vector{T}, wts::W) where {T,W<:AbstractWeights{<:Real}} + function DensePredQR{T}(X::Matrix{T}, beta0::Vector{T}, wts::W) where {T,W<:AbstractWeights} n, p = size(X) length(beta0) == p || throw(DimensionMismatch("length(β0) ≠ size(X,2)")) length(wts) == n || throw(DimensionMismatch("Lenght of weights does not match the dimension of X")) @@ -161,11 +161,13 @@ cholesky!(p::DensePredQR{T}) where {T<:FP} = Cholesky{T,typeof(p.X)}(p.qr.R, 'U' function delbeta!(p::DensePredChol{T,<:Cholesky, <:UnitWeights}, r::Vector{T}) where T<:BlasReal ldiv!(p.chol, mul!(p.delbeta, transpose(p.X), r)) + p end function delbeta!(p::DensePredChol{T,<:Cholesky, <:AbstractWeights}, r::Vector{T}) where T<:BlasReal X = mul!(p.scratchm1, Diagonal(p.wts), p.X) ldiv!(p.chol, mul!(p.delbeta, transpose(X), r)) + p end function delbeta!(p::DensePredChol{T,<:CholeskyPivoted,<:UnitWeights}, r::Vector{T}) where T<:BlasReal @@ -303,7 +305,7 @@ end _vcov(pp::LinPred, u, d) = rmul!(invchol(pp), d) -function _vcov(pp::DensePredChol{T, <:ProbabilityWeights, <:Cholesky}, u, d) where {T} +function _vcov(pp::DensePredChol{T, <:ProbabilityWeights, <:Cholesky}, u::AbstractVector, d::Real) where {T} wts = pp.wts Z = mul!(pp.scratchm1, Diagonal(sqrt.(wts).*u), pp.X) XtW2X = Z'Z @@ -314,7 +316,7 @@ function _vcov(pp::DensePredChol{T, <:ProbabilityWeights, <:Cholesky}, u, d) whe n/(n-k)*V end -function _vcov(pp::DensePredChol{T, <:ProbabilityWeights, <:CholeskyPivoted}, u) where {T} +function _vcov(pp::DensePredChol{T, <:ProbabilityWeights, <:CholeskyPivoted}, u::AbstractVector) where {T} wts = pp.wts Z = mul!(pp.scratchm1, Diagonal(sqrt.(wts).*u), pp.X) rnk = rank(pp.chol) @@ -346,12 +348,10 @@ end modelframe(obj::LinPredModel) = obj.fr function modelmatrix(obj::LinPredModel; weighted::Bool=false) - if !weighted - obj.pp.X - elseif isweighted(obj) + if isweighted(obj) mul!(obj.pp.scratchm1, Diagonal(sqrt.(obj.pp.wts)), obj.pp.X) else - throw(ArgumentError("`weighted=true` allowed only for weighted models.")) + obj.pp.X end end @@ -362,6 +362,8 @@ predict(mm::LinPredModel) = fitted(mm) StatsModels.formula(obj::LinPredModel) = modelframe(obj).formula residuals(obj::LinPredModel; weighted::Bool=false) = residuals(obj.rr; weighted=weighted) +nobs(obj::LinPredModel) = nobs(obj.rr) + weights(obj::RegressionModel) = weights(obj.model) weights(obj::LinPredModel) = weights(obj.rr) diff --git a/src/lm.jl b/src/lm.jl index b5109a73..db53f976 100644 --- a/src/lm.jl +++ b/src/lm.jl @@ -66,6 +66,9 @@ end weights(r::LmResp) = r.wts +nobs(r::LmResp{V,W}) where {V,W<:FrequencyWeights} = r.wts.sum +nobs(r::LmResp) = oftype(sum(one(eltype(r.wts))), length(r.y)) + function loglikelihood(r::LmResp{T,<:Union{UnitWeights, FrequencyWeights}}) where T n = nobs(r) -n/2 * (log(2π * deviance(r)/n) + 1) @@ -241,12 +244,7 @@ function nulldeviance(obj::LinearModel) end loglikelihood(obj::LinearModel) = loglikelihood(obj.rr) - -function nullloglikelihood(obj::LinearModel) - r = obj.rr - n = nobs(r) - -n/2 * (log(2π * nulldeviance(obj)/n) + 1) -end +nullloglikelihood(obj::LinearModel) = nullloglikelihood(obj.rr) r2(obj::LinearModel) = 1 - deviance(obj)/nulldeviance(obj) adjr2(obj::LinearModel) = 1 - (1 - r²(obj))*(nobs(obj)-hasintercept(obj))/dof_residual(obj) @@ -328,6 +326,17 @@ function confint(obj::LinearModel; level::Real=0.95) quantile(TDist(dof_residual(obj)), (1. - level)/2.) * [1. -1.] end + +function momentmatrix(m::LinearModel) + X = modelmatrix(m; weighted=false) + r = residuals(m; weighted=false) + if isweighted(m) + return X .* r .* weights(m) + else + return X .* r + end +end + """ cooksdistance(obj::LinearModel) From d923e48b387c5e1f252481e4ec23eec68a43a901 Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Mon, 5 Sep 2022 20:16:47 +0200 Subject: [PATCH 024/106] Reexport aweights, pweights, fweights --- src/GLM.jl | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/src/GLM.jl b/src/GLM.jl index c2f3d76d..fa7c1010 100644 --- a/src/GLM.jl +++ b/src/GLM.jl @@ -19,7 +19,8 @@ module GLM export coef, coeftable, confint, deviance, nulldeviance, dof, dof_residual, loglikelihood, nullloglikelihood, nobs, stderror, vcov, residuals, predict, fitted, fit, fit!, model_response, response, modelmatrix, r2, r², adjr2, adjr², - cooksdistance, hasintercept, dispersion, weights, AnalyticWeights, ProbabilityWeights, FrequencyWeights + cooksdistance, hasintercept, dispersion, weights, AnalyticWeights, ProbabilityWeights, FrequencyWeights, + aweights, fweights, pweights export # types @@ -52,17 +53,17 @@ module GLM LinearModel, # functions - canonicallink, # canonical link function for a distribution - deviance, # deviance of fitted and observed responses - devresid, # vector of squared deviance residuals - formula, # extract the formula from a model - glm, # general interface - linpred, # linear predictor - lm, # linear model - negbin, # interface to fitting negative binomial regression - nobs, # total number of observations - predict, # make predictions - ftest # compare models with an F test + canonicallink, # canonical link function for a distribution + deviance, # deviance of fitted and observed responses + devresid, # vector of squared deviance residuals + formula, # extract the formula from a model + glm, # general interface + linpred, # linear predictor + lm, # linear model + negbin, # interface to fitting negative binomial regression + nobs, # total number of observations + predict, # make predictions + ftest # compare models with an F test const FP = AbstractFloat const FPVector{T<:FP} = AbstractArray{T,1} From 84f27d1eb3b93209663097b54c795fc9880fd7cf Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Tue, 6 Sep 2022 18:34:15 +0200 Subject: [PATCH 025/106] Fixed remaining issues with null loglikelihood --- src/glmfit.jl | 71 ++++++++++++++++++++++++++++++------------------- src/glmtools.jl | 16 +++++------ src/linpred.jl | 21 +++------------ src/lm.jl | 31 ++++++++++----------- 4 files changed, 72 insertions(+), 67 deletions(-) diff --git a/src/glmfit.jl b/src/glmfit.jl index 23649e51..d6581d67 100644 --- a/src/glmfit.jl +++ b/src/glmfit.jl @@ -268,14 +268,14 @@ deviance(m::AbstractGLM) = deviance(m.rr) function nulldeviance(m::GeneralizedLinearModel) r = m.rr - wts = r.wts + wts = weights(r) y = r.y d = r.d offset = r.offset hasint = hasintercept(m) - dev = zero(eltype(y)) - if isempty(offset) # Faster method - if !(wts isa UnitWeights) + dev = zero(eltype(y)) + if isempty(offset) # Faster method + if isweighted(m) mu = hasint ? mean(y, wts) : linkinv(r.link, zero(eltype(y))*zero(eltype(wts))/1) @@ -319,7 +319,7 @@ function loglikelihood(r::GlmResp{T,D,L,<:FrequencyWeights}) where {T,D,L} mu = r.mu d = r.d ll = zero(eltype(mu)) - ϕ = deviance(r)/nobs(r) + ϕ = deviance(r)/nobs(r) @inbounds for i in eachindex(y, mu, wts) ll += loglik_obs(d, y[i], mu[i], wts[i], ϕ) end @@ -329,33 +329,42 @@ end function loglikelihood(r::GlmResp{T,D,L,<:AbstractWeights}) where {T,D,L} wts = r.wts sumwt = sum(wts) - y = r.y - mu = r.mu - d = r.d - ll = zero(eltype(mu)) + y = r.y + mu = r.mu + d = r.d + ll = zero(eltype(mu)) ϕ = deviance(r) n = length(y) @inbounds for i in eachindex(y, mu, wts) - ll += loglik_aweights_obs(d, y[i], mu[i], wts[i], ϕ, sumwt, n) + ll += loglik_apweights_obs(d, y[i], mu[i], wts[i], ϕ, sumwt, n) end return ll end function nullloglikelihood(m::GeneralizedLinearModel) r = m.rr - wts = r.wts + wts = weights(m) + sumwt = sum(wts) y = r.y d = r.d offset = r.offset hasint = hasintercept(m) - ll = zero(eltype(y)) + ll = zero(eltype(y)) if isempty(r.offset) # Faster method - if !isempty(wts) - mu = hasint ? mean(y, weights(wts)) : linkinv(r.link, zero(ll)/1) - ϕ = nulldeviance(m)/sum(wts) - @inbounds for i in eachindex(y, wts) - ll += loglik_obs(d, y[i], mu, wts[i], ϕ) - end + if isweighted(m) + mu = hasint ? mean(y, wts) : linkinv(r.link, zero(ll)/1) + if wts isa FrequencyWeights + ϕ = nulldeviance(m)/nobs(m) + @inbounds for i in eachindex(y, wts) + ll += loglik_obs(d, y[i], mu, wts[i], ϕ) + end + else + ϕ = nulldeviance(m) + n = length(y) + @inbounds for i in eachindex(y, wts) + ll += loglik_apweights_obs(d, y[i], mu, wts[i], ϕ, sumwt, n) + end + end else mu = hasint ? mean(y) : linkinv(r.link, zero(ll)/1) ϕ = nulldeviance(m)/length(y) @@ -593,7 +602,7 @@ function fit(::Type{M}, _wts = if isa(wts, AbstractWeights) wts elseif isa(wts, AbstractVector) - Base.depwarn("Passing weights as vector is deprecated in favor of explicitely using " * + Base.depwarn("Passing weights as vector is deprecated in favor of explicitly using " * "`AnalyticalWeights`, `ProbabilityWeights`, or `FrequencyWeights`. Proceeding " * "by coercing `wts` to `FrequencyWeights`", :fit) fweights(wts) @@ -767,11 +776,11 @@ function checky(y, d::Binomial) return nothing end -nobs(r::GlmResp) = oftype(sum(one(eltype(r.wts))), length(r.y)) -nobs(r::GlmResp{V,D,L,W}) where {V,D,L,W<:FrequencyWeights} = r.wts.sum +nobs(r::GlmResp{V,D,L,W}) where {V,D,L,W<:AbstractWeights} = oftype(sum(one(eltype(r.wts))), length(r.y)) +nobs(r::GlmResp{V,D,L,W}) where {V,D,L,W<:FrequencyWeights} = sum(r.wts) -## To be reviewed! -Base.sqrt(::UnitWeights{T}) where T = one(T) +##To be reviewed! +# Base.sqrt(::UnitWeights{T}) where T = one(T) function residuals(r::GlmResp; weighted::Bool=false) y, η, μ = r.y, r.eta, r.mu @@ -790,16 +799,24 @@ function residuals(r::GlmResp; weighted::Bool=false) return dres end -mdisp(rr::GlmResp{<: Any, <: Union{Normal, Poisson, Binomial, Bernoulli, NegativeBinomial}}) = 1 -mdisp(rr::GlmResp{<: Any, <: Union{Gamma, Geometric, InverseGaussian}}) = - sum(abs2, Base.Broadcast.broadcasted(*, rr.wrkwt, rr.wrkresid))/sum(rr.wrkwt) + +## To be removed once is merged momentmatrix(m::RegressionModel) = momentmatrix(m.model) +""" + momentmatrix(m::GeneralizedLinearModel) + + Return the moment matrix (score equation) of a GLM model. +""" function momentmatrix(m::GeneralizedLinearModel) X = modelmatrix(m; weighted=false) - d = mdisp(m.rr) + d = variancestructure(m.rr) r = m.rr.wrkwt .* m.rr.wrkresid return (X .* r) ./ d end +variancestructure(rr::GlmResp{<: Any, <: Union{Normal, Poisson, Binomial, Bernoulli, NegativeBinomial}}) = 1 +variancestructure(rr::GlmResp{<: Any, <: Union{Gamma, Geometric, InverseGaussian}}) = + sum(abs2, Base.Broadcast.broadcasted(*, rr.wrkwt, rr.wrkresid))/sum(rr.wrkwt) + diff --git a/src/glmtools.jl b/src/glmtools.jl index d25a02d0..9eb1bacf 100644 --- a/src/glmtools.jl +++ b/src/glmtools.jl @@ -534,11 +534,11 @@ loglik_obs(d::NegativeBinomial, y, μ, wt, ϕ) = wt*logpdf(NegativeBinomial(d.r, ## ϕ is the deviance - not the deviance/n nor sum(wt) ## sumwt is sum(wt) ## n is the number of observations -loglik_aweights_obs(::Bernoulli, y, μ, wt, ϕ, sumwt, n) = logpdf(Binomial(round(Int, wt), μ), round(wt*y)) -loglik_aweights_obs(::Binomial, y, μ, wt, ϕ, sumwt, n) = logpdf(Binomial(round(Int, wt), μ), round(wt*y)) -loglik_aweights_obs(::Gamma, y, μ, wt, ϕ, sumwt, n) = wt*logpdf(Gamma(inv(ϕ/sumwt), μ*ϕ/sumwt), y) -loglik_aweights_obs(::Geometric, y, μ, wt, ϕ, sumwt, n) = wt*logpdf(Geometric(1 / (μ + 1)), y) -loglik_aweights_obs(::InverseGaussian, y, μ, wt, ϕ, sumwt, n) = -(wt*(1 + log(2π*(ϕ/sumwt))) + 3*log(y)*wt)/2 -loglik_aweights_obs(::Normal, y, μ, wt, ϕ, sumwt, n) = ((-log(2π*ϕ/n) - 1) + log(wt))/2 -loglik_aweights_obs(::Poisson, y, μ, wt, ϕ, sumwt, n) = wt*logpdf(Poisson(μ), y) -loglik_aweights_obs(d::NegativeBinomial, y, μ, wt, ϕ, sumwt, n) = wt*logpdf(NegativeBinomial(d.r, d.r/(μ+d.r)), y) +loglik_apweights_obs(::Bernoulli, y, μ, wt, ϕ, sumwt, n) = logpdf(Binomial(round(Int, wt), μ), round(wt*y)) +loglik_apweights_obs(::Binomial, y, μ, wt, ϕ, sumwt, n) = logpdf(Binomial(round(Int, wt), μ), round(wt*y)) +loglik_apweights_obs(::Gamma, y, μ, wt, ϕ, sumwt, n) = wt*logpdf(Gamma(inv(ϕ/sumwt), μ*ϕ/sumwt), y) +loglik_apweights_obs(::Geometric, y, μ, wt, ϕ, sumwt, n) = wt*logpdf(Geometric(1 / (μ + 1)), y) +loglik_apweights_obs(::InverseGaussian, y, μ, wt, ϕ, sumwt, n) = -(wt*(1 + log(2π*(ϕ/sumwt))) + 3*log(y)*wt)/2 +loglik_apweights_obs(::Normal, y, μ, wt, ϕ, sumwt, n) = ((-log(2π*ϕ/n) - 1) + log(wt))/2 +loglik_apweights_obs(::Poisson, y, μ, wt, ϕ, sumwt, n) = wt*logpdf(Poisson(μ), y) +loglik_apweights_obs(d::NegativeBinomial, y, μ, wt, ϕ, sumwt, n) = wt*logpdf(NegativeBinomial(d.r, d.r/(μ+d.r)), y) diff --git a/src/linpred.jl b/src/linpred.jl index 87c21eaa..4a72bb37 100644 --- a/src/linpred.jl +++ b/src/linpred.jl @@ -52,30 +52,16 @@ mutable struct DensePredQR{T<:BlasReal, W<:AbstractWeights} <: DensePred scratchbeta::Vector{T} qr::QRCompactWY{T} wts::W - function DensePredQR{T}(X::Matrix{T}, beta0::Vector{T}, wts::W) where {T,W<:UnitWeights} + function DensePredQR{T}(X::Matrix{T}, beta0::Vector{T}, wts::W) where {T,W<:AbstractWeights} n, p = size(X) length(beta0) == p || throw(DimensionMismatch("length(β0) ≠ size(X,2)")) length(wts) == n || throw(DimensionMismatch("Length of weights does not match the dimension of X")) qrX = qr(X) - new{T,W}(X, beta0, zeros(T,p), zeros(T,p), qrX, wts, similar(X, T, size(X,1))) - end - function DensePredQR{T}(X::Matrix{T}, beta0::Vector{T}, wts::W) where {T,W<:AbstractWeights} - n, p = size(X) - length(beta0) == p || throw(DimensionMismatch("length(β0) ≠ size(X,2)")) - length(wts) == n || throw(DimensionMismatch("Lenght of weights does not match the dimension of X")) - - qrX = qr(Diagonal(sqrt.(wts))*X) - new{T,W}(X, beta0, zeros(T,p), zeros(T,p), qrX, wts, similar(X, T, (size(X,1),) )) - end - - function DensePredQR{T}(X::Matrix{T}, wts::W=uweights(size(X,1))) where {T,W} - n, p = size(X) - DensePredQR(X, zeros(T, p), wts) + new{T,W}(X, beta0, zeros(T,p), zeros(T,p), qrX, wts) end end DensePredQR{T}(X::Matrix) where T = DensePredQR{eltype(X)}(X, zeros(T, size(X, 2)), uweights(size(X,1))) -DensePredQR(X::Matrix, beta0::Vector, wts::AbstractVector) = DensePredQR{eltype(X)}(X, beta0, wts) -DensePredQR(X::Matrix{T}, wts::AbstractVector) where T = DensePredQR{T}(X, zeros(T, size(X,2)), wts) +#DensePredQR(X::Matrix{T}, wts::AbstractWeights) where T = DensePredQR{T}(X, zeros(T, size(X,2)), wts) convert(::Type{DensePredQR{T}}, X::Matrix{T}) where {T} = DensePredQR{T}(X) """ @@ -119,6 +105,7 @@ end function DensePredChol(X::AbstractMatrix, pivot::Bool, wts::AbstractWeights) scr = similar(X) mul!(scr, Diagonal(wts), X) + F = Hermitian(float(scr'X)) T = eltype(F) F = pivot ? pivoted_cholesky!(F, tol = -one(T), check = false) : cholesky!(F) diff --git a/src/lm.jl b/src/lm.jl index db53f976..836311a7 100644 --- a/src/lm.jl +++ b/src/lm.jl @@ -66,8 +66,8 @@ end weights(r::LmResp) = r.wts -nobs(r::LmResp{V,W}) where {V,W<:FrequencyWeights} = r.wts.sum -nobs(r::LmResp) = oftype(sum(one(eltype(r.wts))), length(r.y)) +nobs(r::LmResp{V,W}) where {V<:AbstractVector{T} where T<:AbstractFloat,W<:FrequencyWeights} = sum(r.wts) +nobs(r::LmResp{V,W}) where {V<:AbstractVector{T} where T<:AbstractFloat,W<:AbstractWeights} = oftype(sum(one(eltype(r.wts))), length(r.y)) function loglikelihood(r::LmResp{T,<:Union{UnitWeights, FrequencyWeights}}) where T n = nobs(r) @@ -75,22 +75,11 @@ function loglikelihood(r::LmResp{T,<:Union{UnitWeights, FrequencyWeights}}) wher end function loglikelihood(r::LmResp{T,<:AbstractWeights}) where T - N = nobs(r) + N = length(r.y) n = sum(log.(weights(r))) 0.5*(n - N * (log(2π * deviance(r)/N) + 1)) end -function nullloglikelihood(r::LmResp{T,<:Union{UnitWeights, FrequencyWeights}}) where T - n = nobs(r) - -n/2 * (log(2π * nulldeviance(r)/n) + 1) -end - -function nullloglikelihood(r::LmResp{T,<:AbstractWeights}) where T - N = nobs(r) - n = sum(log.(weights(r))) - 0.5*(n - N * (log(2π * nulldeviance(r)/N) + 1)) -end - function residuals(r::LmResp; weighted=false) wts = weights(r) res = r.y - r.mu @@ -243,8 +232,20 @@ function nulldeviance(obj::LinearModel) v end +function nullloglikelihood(m::LinearModel) + wts = weights(m) + if wts isa Union{UnitWeights, FrequencyWeights} + n = nobs(m) + -n/2 * (log(2π * nulldeviance(m)/n) + 1) + else + N = length(m.rr.y) + n = sum(log.(wts)) + 0.5*(n - N * (log(2π * nulldeviance(m)/N) + 1)) + end +end + loglikelihood(obj::LinearModel) = loglikelihood(obj.rr) -nullloglikelihood(obj::LinearModel) = nullloglikelihood(obj.rr) + r2(obj::LinearModel) = 1 - deviance(obj)/nulldeviance(obj) adjr2(obj::LinearModel) = 1 - (1 - r²(obj))*(nobs(obj)-hasintercept(obj))/dof_residual(obj) From 8804dc15e4417a52645cffb9af294daf5cda4f85 Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Tue, 6 Sep 2022 18:34:46 +0200 Subject: [PATCH 026/106] Fix nullloglikelihood tests --- test/runtests.jl | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index 21ebe068..4ce330b0 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -107,9 +107,6 @@ end @test isa(weights(lm_model), FrequencyWeights) @test isa(weights(glm_model), FrequencyWeights) - - - lm_model = lm(f, df, wts = aweights(df.weights)) glm_model = glm(f, df, Normal(), wts = aweights(df.weights)) @test isapprox(coef(lm_model), [154.35104595140706, 0.4836896390157505]) @@ -122,9 +119,9 @@ end @test isapprox(first(predict(lm_model)), 357.57694841780994) @test isapprox(loglikelihood(lm_model), -1467.8964643217373) @test isapprox(loglikelihood(glm_model), -1467.8964643217373) - @test isapprox(nullloglikelihood(lm_model), -1790.7176571556527) - #@test isapprox(nullloglikelihood(glm_model), -1790.7176571556527) - @test isapprox(nullloglikelihood(lm_model), -1790.7176571556527) + @test isapprox(nullloglikelihood(lm_model), -1678.2116012002746) + @test isapprox(nullloglikelihood(glm_model), -1678.2116012002746) + @test isapprox(nullloglikelihood(lm_model), -1678.2116012002746) @test isapprox(mean(residuals(lm_model)), -5.412966629787718) end @@ -169,12 +166,12 @@ end @testset "Passing wts (depwarn)" begin df = DataFrame(x=["a", "b", "c"], y=[1, 2, 3], wts = [3,3,3]) - @test_logs (:warn, "Passing weights as vector is deprecated in favor of explicitely using " * - "AnalyticalWeights, ProbabilityWeights, or FrequencyWeights. Proceeding " * - "by coercing wts to `FrequencyWeights`") lm(@formula(y~x), df; wts=df.wts) - @test_logs (:warn, "Passing weights as vector is deprecated in favor of explicitely using " * - "AnalyticalWeights, ProbabilityWeights, or FrequencyWeights. Proceeding " * - "by coercing wts to `FrequencyWeights`") glm(@formula(y~x), df, Normal(), IdentityLink(); wts=df.wts) + @test_logs (:warn, "Passing weights as vector is deprecated in favor of explicitly using " * + "`AnalyticalWeights`, `ProbabilityWeights`, or `FrequencyWeights`. Proceeding " * + "by coercing `wts` to `FrequencyWeights`") lm(@formula(y~x), df; wts=df.wts) + @test_logs (:warn, "Passing weights as vector is deprecated in favor of explicitly using " * + "`AnalyticalWeights`, `ProbabilityWeights`, or `FrequencyWeights`. Proceeding " * + "by coercing `wts` to `FrequencyWeights`") glm(@formula(y~x), df, Normal(), IdentityLink(); wts=df.wts) end @testset "saturated linear model" begin From 7f3aa36478200171013dd16621311dd928d4dfce Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Tue, 6 Sep 2022 19:33:36 +0200 Subject: [PATCH 027/106] Do not dispatch on Weights but use if --- src/glmfit.jl | 76 ++++++++++++++++++--------------------------------- src/lm.jl | 14 +++++----- 2 files changed, 33 insertions(+), 57 deletions(-) diff --git a/src/glmfit.jl b/src/glmfit.jl index d6581d67..a6abf84d 100644 --- a/src/glmfit.jl +++ b/src/glmfit.jl @@ -301,44 +301,28 @@ end loglikelihood(m::AbstractGLM) = loglikelihood(m.rr) -function loglikelihood(r::GlmResp{T,D,L,<:UnitWeights}) where {T,D,L} - y = r.y - mu = r.mu - d = r.d - ll = zero(eltype(mu)) - ϕ = deviance(r)/nobs(r) - @inbounds for i in eachindex(y, mu) - ll += loglik_obs(d, y[i], mu[i], 1, ϕ) - end - return ll -end - -function loglikelihood(r::GlmResp{T,D,L,<:FrequencyWeights}) where {T,D,L} - wts = r.wts - y = r.y - mu = r.mu - d = r.d - ll = zero(eltype(mu)) - ϕ = deviance(r)/nobs(r) - @inbounds for i in eachindex(y, mu, wts) - ll += loglik_obs(d, y[i], mu[i], wts[i], ϕ) - end - return ll -end function loglikelihood(r::GlmResp{T,D,L,<:AbstractWeights}) where {T,D,L} - wts = r.wts - sumwt = sum(wts) y = r.y mu = r.mu + wts = weights(r) + sumwt = sum(wts) d = r.d ll = zero(eltype(mu)) - ϕ = deviance(r) - n = length(y) - @inbounds for i in eachindex(y, mu, wts) - ll += loglik_apweights_obs(d, y[i], mu[i], wts[i], ϕ, sumwt, n) + n = nobs(r) + N = length(y) + δ = deviance(r) + ϕ = δ/n + if wts isa FrequencyWeights || wts isa UnitWeights + @inbounds for i in eachindex(y, mu) + ll += loglik_obs(d, y[i], mu[i], wts[i], ϕ) + end + else + @inbounds for i in eachindex(y, mu, wts) + ll += loglik_apweights_obs(d, y[i], mu[i], wts[i], δ, sumwt, N) + end end - return ll + return ll end function nullloglikelihood(m::GeneralizedLinearModel) @@ -351,26 +335,18 @@ function nullloglikelihood(m::GeneralizedLinearModel) hasint = hasintercept(m) ll = zero(eltype(y)) if isempty(r.offset) # Faster method - if isweighted(m) - mu = hasint ? mean(y, wts) : linkinv(r.link, zero(ll)/1) - if wts isa FrequencyWeights - ϕ = nulldeviance(m)/nobs(m) - @inbounds for i in eachindex(y, wts) - ll += loglik_obs(d, y[i], mu, wts[i], ϕ) - end - else - ϕ = nulldeviance(m) - n = length(y) - @inbounds for i in eachindex(y, wts) - ll += loglik_apweights_obs(d, y[i], mu, wts[i], ϕ, sumwt, n) - end - end - else - mu = hasint ? mean(y) : linkinv(r.link, zero(ll)/1) - ϕ = nulldeviance(m)/length(y) - @inbounds for i in eachindex(y) - ll += loglik_obs(d, y[i], mu, 1, ϕ) + mu = hasint ? mean(y, wts) : linkinv(r.link, zero(ll)/1) + δ = nulldeviance(m) + ϕ = nulldeviance(m)/nobs(m) + N = length(y) + if wts isa FrequencyWeights || wts isa UnitWeights + @inbounds for i in eachindex(y, wts) + ll += loglik_obs(d, y[i], mu, wts[i], ϕ) end + else + @inbounds for i in eachindex(y, wts) + ll += loglik_apweights_obs(d, y[i], mu, wts[i], δ, sumwt, N) + end end else X = fill(1.0, length(y), hasint ? 1 : 0) diff --git a/src/lm.jl b/src/lm.jl index 836311a7..142d467a 100644 --- a/src/lm.jl +++ b/src/lm.jl @@ -220,16 +220,16 @@ function nulldeviance(obj::LinearModel) end v = zero(eltype(y))*zero(eltype(wts)) - if !isweighted(obj) - @inbounds @simd for yi in y - v += abs2(yi - m) - end - else + # if !isweighted(obj) + # @inbounds @simd for yi in y + # v += abs2(yi - m) + # end + # else @inbounds @simd for i = eachindex(y,wts) v += abs2(y[i] - m)*wts[i] end - end - v + #end + return v end function nullloglikelihood(m::LinearModel) From f67a8e0f2e7f12663413e4bc2fc6352f89fef6d1 Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Tue, 6 Sep 2022 19:34:38 +0200 Subject: [PATCH 028/106] Do not dispatch on Weights use if --- src/lm.jl | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/src/lm.jl b/src/lm.jl index 142d467a..c3226519 100644 --- a/src/lm.jl +++ b/src/lm.jl @@ -207,12 +207,9 @@ For linear models, the deviance of the null model is equal to the total sum of s function nulldeviance(obj::LinearModel) y = obj.rr.y wts = weights(obj) + if hasintercept(obj) - if !isweighted(obj) - m = mean(y) - else - m = mean(y, wts) - end + m = mean(y, wts) else @warn("Starting from GLM.jl 1.8, null model is defined as having no predictor at all " * "when a model without an intercept is passed.") @@ -220,15 +217,9 @@ function nulldeviance(obj::LinearModel) end v = zero(eltype(y))*zero(eltype(wts)) - # if !isweighted(obj) - # @inbounds @simd for yi in y - # v += abs2(yi - m) - # end - # else - @inbounds @simd for i = eachindex(y,wts) - v += abs2(y[i] - m)*wts[i] - end - #end + @inbounds @simd for i = eachindex(y,wts) + v += abs2(y[i] - m)*wts[i] + end return v end From 23a3e8771e2d8d344a89ec860503a76506ba9d1c Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Tue, 6 Sep 2022 19:35:06 +0200 Subject: [PATCH 029/106] Fix inferred test --- test/runtests.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/runtests.jl b/test/runtests.jl index 4ce330b0..a4b708b9 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1304,7 +1304,7 @@ end end @testset "Issue 118" begin - @inferred nobs(lm(randn(10, 2), randn(10))) + Test.@inferred nobs(lm(randn(10, 2), randn(10))) end @testset "Issue 153" begin From 54812841cc5d543df9bf700ea5144d201d21e75e Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Tue, 6 Sep 2022 19:56:13 +0200 Subject: [PATCH 030/106] Use if instead of dispatching on Weights --- src/glmfit.jl | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/src/glmfit.jl b/src/glmfit.jl index a6abf84d..934f6a75 100644 --- a/src/glmfit.jl +++ b/src/glmfit.jl @@ -102,15 +102,11 @@ function updateμ! end function updateμ!(r::GlmResp{T,D,L,<:AbstractWeights}, linPr::T) where {T<:FPVector,D,L} isempty(r.offset) ? copyto!(r.eta, linPr) : broadcast!(+, r.eta, linPr, r.offset) updateμ!(r) - map!(*, r.devresid, r.devresid, r.wts) - map!(*, r.wrkwt, r.wrkwt, r.wts) - r -end - -function updateμ!(r::GlmResp{T,D,L,<:UnitWeights}, linPr::T) where {T<:FPVector,D,L} - isempty(r.offset) ? copyto!(r.eta, linPr) : broadcast!(+, r.eta, linPr, r.offset) - updateμ!(r) - r + if !(weights(r) isa UnitWeights) + map!(*, r.devresid, r.devresid, r.wts) + map!(*, r.wrkwt, r.wrkwt, r.wts) + end + return r end function updateμ!(r::GlmResp{V,D,L}) where {V<:FPVector,D,L} From d12222e2eb4e8c4fde979eae4de88d0bd4294f47 Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Wed, 7 Sep 2022 12:12:17 +0200 Subject: [PATCH 031/106] Add doc for weights and fix output --- docs/src/api.md | 4 +-- docs/src/examples.md | 16 ++++----- docs/src/index.md | 86 ++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 94 insertions(+), 12 deletions(-) diff --git a/docs/src/api.md b/docs/src/api.md index 46990470..fc81d786 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -25,7 +25,7 @@ The most general approach to fitting a model is with the `fit` function, as in julia> using Random julia> fit(LinearModel, hcat(ones(10), 1:10), randn(MersenneTwister(12321), 10)) -LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}}}: +LinearModel{GLM.LmResp{Vector{Float64}, UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}, UnitWeights{Int64}}}: Coefficients: ──────────────────────────────────────────────────────────────── @@ -41,7 +41,7 @@ This model can also be fit as julia> using Random julia> lm(hcat(ones(10), 1:10), randn(MersenneTwister(12321), 10)) -LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}}}: +LinearModel{GLM.LmResp{Vector{Float64}, UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}, UnitWeights{Int64}}}: Coefficients: ──────────────────────────────────────────────────────────────── diff --git a/docs/src/examples.md b/docs/src/examples.md index 54de17d6..ba8bba38 100644 --- a/docs/src/examples.md +++ b/docs/src/examples.md @@ -20,7 +20,7 @@ julia> data = DataFrame(X=[1,2,3], Y=[2,4,7]) 3 │ 3 7 julia> ols = lm(@formula(Y ~ X), data) -StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}}}, Matrix{Float64}} +StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}, UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}, UnitWeights{Int64}}}, Matrix{Float64}} Y ~ 1 + X @@ -99,7 +99,7 @@ julia> data = DataFrame(X=[1,2,2], Y=[1,0,1]) 3 │ 2 1 julia> probit = glm(@formula(Y ~ X), data, Binomial(), ProbitLink()) -StatsModels.TableRegressionModel{GeneralizedLinearModel{GLM.GlmResp{Vector{Float64}, Binomial{Float64}, ProbitLink}, GLM.DensePredChol{Float64, LinearAlgebra.Cholesky{Float64, Matrix{Float64}}}}, Matrix{Float64}} +StatsModels.TableRegressionModel{GeneralizedLinearModel{GLM.GlmResp{Vector{Float64}, Binomial{Float64}, ProbitLink, UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.Cholesky{Float64, Matrix{Float64}}, UnitWeights{Int64}}}, Matrix{Float64}} Y ~ 1 + X @@ -140,7 +140,7 @@ julia> quine = dataset("MASS", "quine") 131 rows omitted julia> nbrmodel = glm(@formula(Days ~ Eth+Sex+Age+Lrn), quine, NegativeBinomial(2.0), LogLink()) -StatsModels.TableRegressionModel{GeneralizedLinearModel{GLM.GlmResp{Vector{Float64}, NegativeBinomial{Float64}, LogLink}, GLM.DensePredChol{Float64, LinearAlgebra.Cholesky{Float64, Matrix{Float64}}}}, Matrix{Float64}} +StatsModels.TableRegressionModel{GeneralizedLinearModel{GLM.GlmResp{Vector{Float64}, NegativeBinomial{Float64}, LogLink, UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.Cholesky{Float64, Matrix{Float64}}, UnitWeights{Int64}}}, Matrix{Float64}} Days ~ 1 + Eth + Sex + Age + Lrn @@ -158,7 +158,7 @@ Lrn: SL 0.296768 0.185934 1.60 0.1105 -0.0676559 0.661191 ──────────────────────────────────────────────────────────────────────────── julia> nbrmodel = negbin(@formula(Days ~ Eth+Sex+Age+Lrn), quine, LogLink()) -StatsModels.TableRegressionModel{GeneralizedLinearModel{GLM.GlmResp{Vector{Float64}, NegativeBinomial{Float64}, LogLink}, GLM.DensePredChol{Float64, LinearAlgebra.Cholesky{Float64, Matrix{Float64}}}}, Matrix{Float64}} +StatsModels.TableRegressionModel{GeneralizedLinearModel{GLM.GlmResp{Vector{Float64}, NegativeBinomial{Float64}, LogLink, UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.Cholesky{Float64, Matrix{Float64}}, UnitWeights{Int64}}}, Matrix{Float64}} Days ~ 1 + Eth + Sex + Age + Lrn @@ -207,7 +207,7 @@ julia> form = dataset("datasets", "Formaldehyde") 6 │ 0.9 0.782 julia> lm1 = fit(LinearModel, @formula(OptDen ~ Carb), form) -StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}}}, Matrix{Float64}} +StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}, UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}, UnitWeights{Int64}}}, Matrix{Float64}} OptDen ~ 1 + Carb @@ -256,7 +256,7 @@ julia> LifeCycleSavings = dataset("datasets", "LifeCycleSavings") 35 rows omitted julia> fm2 = fit(LinearModel, @formula(SR ~ Pop15 + Pop75 + DPI + DDPI), LifeCycleSavings) -StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}}}, Matrix{Float64}} +StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}, StatsBase.UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}, StatsBase.UnitWeights{Int64}}}, Matrix{Float64}} SR ~ 1 + Pop15 + Pop75 + DPI + DDPI @@ -364,7 +364,7 @@ julia> dobson = DataFrame(Counts = [18.,17,15,20,10,21,25,13,13], 9 │ 13.0 3 3 julia> gm1 = fit(GeneralizedLinearModel, @formula(Counts ~ Outcome + Treatment), dobson, Poisson()) -StatsModels.TableRegressionModel{GeneralizedLinearModel{GLM.GlmResp{Vector{Float64}, Poisson{Float64}, LogLink}, GLM.DensePredChol{Float64, LinearAlgebra.Cholesky{Float64, Matrix{Float64}}}}, Matrix{Float64}} +StatsModels.TableRegressionModel{GeneralizedLinearModel{GLM.GlmResp{Vector{Float64}, Poisson{Float64}, LogLink, UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.Cholesky{Float64, Matrix{Float64}}, UnitWeights{Int64}}}, Matrix{Float64}} Counts ~ 1 + Outcome + Treatment @@ -421,7 +421,7 @@ julia> round(optimal_bic.minimizer, digits = 5) # Optimal λ 0.40935 julia> glm(@formula(Volume ~ Height + Girth), trees, Normal(), PowerLink(optimal_bic.minimizer)) # Best model -StatsModels.TableRegressionModel{GeneralizedLinearModel{GLM.GlmResp{Vector{Float64}, Normal{Float64}, PowerLink}, GLM.DensePredChol{Float64, LinearAlgebra.Cholesky{Float64, Matrix{Float64}}}}, Matrix{Float64}} +StatsModels.TableRegressionModel{GeneralizedLinearModel{GLM.GlmResp{Vector{Float64}, Normal{Float64}, PowerLink, UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.Cholesky{Float64, Matrix{Float64}}, UnitWeights{Int64}}}, Matrix{Float64}} Volume ~ 1 + Height + Girth diff --git a/docs/src/index.md b/docs/src/index.md index fb62a0dc..a6760551 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -85,7 +85,7 @@ julia> data = DataFrame(y = rand(rng, 100), x = categorical(repeat([1, 2, 3, 4], julia> lm(@formula(y ~ x), data) -StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}}}, Matrix{Float64}} +StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}, UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}, UnitWeights{Int64}}}, Matrix{Float64}} y ~ 1 + x @@ -108,7 +108,7 @@ julia> using StableRNGs julia> data = DataFrame(y = rand(StableRNG(1), 100), x = repeat([1, 2, 3, 4], 25)); julia> lm(@formula(y ~ x), data, contrasts = Dict(:x => DummyCoding())) -StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}}}, Matrix{Float64}} +StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}, UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}, UnitWeights{Int64}}}, Matrix{Float64}} y ~ 1 + x @@ -123,6 +123,88 @@ x: 4 -0.032673 0.0797865 -0.41 0.6831 -0.191048 0.125702 ─────────────────────────────────────────────────────────────────────────── ``` +## Weighting + +Both `lm` and `glm` allow weighted estimation. The three different [types of weights](https://juliastats.org/StatsBase.jl/stable/weights/) defined in [StatsBase.jl](https://github.com/JuliaStats/StatsBase.jl) can be used to fit a model: + +- `AnalyticWeights` describe a non-random relative importance (usually between 0 and 1) for each observation. These weights may also be referred to as reliability weights, precision weights or inverse variance weights. These are typically used when the observations being weighted are aggregate values (e.g., averages) with differing variances. +- `FrequencyWeights` describe the inverse of the sampling probability for each observation, providing a correction mechanism for under- or over-sampling certain population groups. These weights may also be referred to as sampling weights. +- `ProbabilityWeights` describe how trhe sample can be scaled back to the population. Ususally are the reciprocals of sampling probabilities. + +We illustrate the API with random generated data. + +```jldoctest weights +julia> using StableRNGs + +julia> data = DataFrame(y = rand(StableRNG(1), 100), x = randn(StableRNG(2), 100), weights = repeat([1, 2, 3, 4], 25), ); + + +julia> m = lm(@formula(y ~ x), data) +StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}, UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}, UnitWeights{Int64}}}, Matrix{Float64}} + +y ~ 1 + x + +Coefficients: +────────────────────────────────────────────────────────────────────────── + Coef. Std. Error t Pr(>|t|) Lower 95% Upper 95% +────────────────────────────────────────────────────────────────────────── +(Intercept) 0.517369 0.0280232 18.46 <1e-32 0.461758 0.57298 +x -0.0500249 0.0307201 -1.63 0.1066 -0.110988 0.0109382 +────────────────────────────────────────────────────────────────────────── + +julia> m_aweights = lm(@formula(y ~ x), data, wts=aweights(data.weights)) +StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}, AnalyticWeights{Int64, Int64, Vector{Int64}}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}, AnalyticWeights{Int64, Int64, Vector{Int64}}}}, Matrix{Float64}} + +y ~ 1 + x + +Coefficients: +────────────────────────────────────────────────────────────────────────── + Coef. Std. Error t Pr(>|t|) Lower 95% Upper 95% +────────────────────────────────────────────────────────────────────────── +(Intercept) 0.51673 0.0270707 19.09 <1e-34 0.463009 0.570451 +x -0.0478667 0.0308395 -1.55 0.1239 -0.109067 0.0133333 +────────────────────────────────────────────────────────────────────────── + +julia> m_fweights = lm(@formula(y ~ x), data, wts=fweights(data.weights)) +StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}, FrequencyWeights{Int64, Int64, Vector{Int64}}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}, FrequencyWeights{Int64, Int64, Vector{Int64}}}}, Matrix{Float64}} + +y ~ 1 + x + +Coefficients: +───────────────────────────────────────────────────────────────────────────── + Coef. Std. Error t Pr(>|t|) Lower 95% Upper 95% +───────────────────────────────────────────────────────────────────────────── +(Intercept) 0.51673 0.0170172 30.37 <1e-84 0.483213 0.550246 +x -0.0478667 0.0193863 -2.47 0.0142 -0.0860494 -0.00968394 +───────────────────────────────────────────────────────────────────────────── + +julia> m_pweights = lm(@formula(y ~ x), data, wts=pweights(data.weights)) +StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}, ProbabilityWeights{Int64, Int64, Vector{Int64}}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}, ProbabilityWeights{Int64, Int64, Vector{Int64}}}}, Matrix{Float64}} + +y ~ 1 + x + +Coefficients: +────────────────────────────────────────────────────────────────────────── + Coef. Std. Error t Pr(>|t|) Lower 95% Upper 95% +────────────────────────────────────────────────────────────────────────── +(Intercept) 0.51673 0.0270707 19.09 <1e-34 0.463009 0.570451 +x -0.0478667 0.0308395 -1.55 0.1239 -0.109067 0.0133333 +────────────────────────────────────────────────────────────────────────── +``` + +The type of the weights selected will affect the variance of the estimated coefficients and calculations involving the variance such as `likelihood`, `deviance`, `nulllikelihood`, `nulldeviance`. The fit of the model is the same regardless of the type of weights. + +```jldoctest +julia> loglikelihood(m_aweights) +-16.29630756138424 + +julia> loglikelihood(m_fweights) +-25.51860961756451 + +julia> loglikelihood(m_pweights) +-16.29630756138424 +``` + ## Comparing models with F-test Comparisons between two or more linear models can be performed using the `ftest` function, From a17e8128416a5330316a7314936bb4751da526ba Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Wed, 7 Sep 2022 12:47:00 +0200 Subject: [PATCH 032/106] Fix docs failures --- docs/src/examples.md | 14 +++++++------- docs/src/index.md | 14 +++++++------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/docs/src/examples.md b/docs/src/examples.md index ba8bba38..4db425dc 100644 --- a/docs/src/examples.md +++ b/docs/src/examples.md @@ -20,7 +20,7 @@ julia> data = DataFrame(X=[1,2,3], Y=[2,4,7]) 3 │ 3 7 julia> ols = lm(@formula(Y ~ X), data) -StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}, UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}, UnitWeights{Int64}}}, Matrix{Float64}} +StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}, StatsBase.UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}, StatsBase.UnitWeights{Int64}}}, Matrix{Float64}} Y ~ 1 + X @@ -61,7 +61,7 @@ julia> dof(ols) 3 julia> dof_residual(ols) -1.0 +1 julia> round(aic(ols); digits=5) 5.84252 @@ -99,7 +99,7 @@ julia> data = DataFrame(X=[1,2,2], Y=[1,0,1]) 3 │ 2 1 julia> probit = glm(@formula(Y ~ X), data, Binomial(), ProbitLink()) -StatsModels.TableRegressionModel{GeneralizedLinearModel{GLM.GlmResp{Vector{Float64}, Binomial{Float64}, ProbitLink, UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.Cholesky{Float64, Matrix{Float64}}, UnitWeights{Int64}}}, Matrix{Float64}} +StatsModels.TableRegressionModel{GeneralizedLinearModel{GLM.GlmResp{Vector{Float64}, Binomial{Float64}, ProbitLink, StatsBase.UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.Cholesky{Float64, Matrix{Float64}}, StatsBase.UnitWeights{Int64}}}, Matrix{Float64}} Y ~ 1 + X @@ -140,7 +140,7 @@ julia> quine = dataset("MASS", "quine") 131 rows omitted julia> nbrmodel = glm(@formula(Days ~ Eth+Sex+Age+Lrn), quine, NegativeBinomial(2.0), LogLink()) -StatsModels.TableRegressionModel{GeneralizedLinearModel{GLM.GlmResp{Vector{Float64}, NegativeBinomial{Float64}, LogLink, UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.Cholesky{Float64, Matrix{Float64}}, UnitWeights{Int64}}}, Matrix{Float64}} +StatsModels.TableRegressionModel{GeneralizedLinearModel{GLM.GlmResp{Vector{Float64}, NegativeBinomial{Float64}, LogLink, StatsBase.UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.Cholesky{Float64, Matrix{Float64}}, StatsBase.UnitWeights{Int64}}}, Matrix{Float64}} Days ~ 1 + Eth + Sex + Age + Lrn @@ -158,7 +158,7 @@ Lrn: SL 0.296768 0.185934 1.60 0.1105 -0.0676559 0.661191 ──────────────────────────────────────────────────────────────────────────── julia> nbrmodel = negbin(@formula(Days ~ Eth+Sex+Age+Lrn), quine, LogLink()) -StatsModels.TableRegressionModel{GeneralizedLinearModel{GLM.GlmResp{Vector{Float64}, NegativeBinomial{Float64}, LogLink, UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.Cholesky{Float64, Matrix{Float64}}, UnitWeights{Int64}}}, Matrix{Float64}} +StatsModels.TableRegressionModel{GeneralizedLinearModel{GLM.GlmResp{Vector{Float64}, NegativeBinomial{Float64}, LogLink, StatsBase.UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.Cholesky{Float64, Matrix{Float64}}, StatsBase.UnitWeights{Int64}}}, Matrix{Float64}} Days ~ 1 + Eth + Sex + Age + Lrn @@ -207,7 +207,7 @@ julia> form = dataset("datasets", "Formaldehyde") 6 │ 0.9 0.782 julia> lm1 = fit(LinearModel, @formula(OptDen ~ Carb), form) -StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}, UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}, UnitWeights{Int64}}}, Matrix{Float64}} +StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}, StatsBase.UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}, UnitWeights{Int64}}}, Matrix{Float64}} OptDen ~ 1 + Carb @@ -256,7 +256,7 @@ julia> LifeCycleSavings = dataset("datasets", "LifeCycleSavings") 35 rows omitted julia> fm2 = fit(LinearModel, @formula(SR ~ Pop15 + Pop75 + DPI + DDPI), LifeCycleSavings) -StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}, StatsBase.UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}, StatsBase.UnitWeights{Int64}}}, Matrix{Float64}} +StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}, UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}, UnitWeights{Int64}}}, Matrix{Float64}} SR ~ 1 + Pop15 + Pop75 + DPI + DDPI diff --git a/docs/src/index.md b/docs/src/index.md index a6760551..9a6309bb 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -85,7 +85,7 @@ julia> data = DataFrame(y = rand(rng, 100), x = categorical(repeat([1, 2, 3, 4], julia> lm(@formula(y ~ x), data) -StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}, UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}, UnitWeights{Int64}}}, Matrix{Float64}} +StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}, StatsBase.UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}, StatsBase.UnitWeights{Int64}}}, Matrix{Float64}} y ~ 1 + x @@ -108,7 +108,7 @@ julia> using StableRNGs julia> data = DataFrame(y = rand(StableRNG(1), 100), x = repeat([1, 2, 3, 4], 25)); julia> lm(@formula(y ~ x), data, contrasts = Dict(:x => DummyCoding())) -StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}, UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}, UnitWeights{Int64}}}, Matrix{Float64}} +StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}, StatsBase.UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}, StatsBase.UnitWeights{Int64}}}, Matrix{Float64}} y ~ 1 + x @@ -134,13 +134,13 @@ Both `lm` and `glm` allow weighted estimation. The three different [types of wei We illustrate the API with random generated data. ```jldoctest weights -julia> using StableRNGs +julia> using StableRNGs, DataFrames, GLM julia> data = DataFrame(y = rand(StableRNG(1), 100), x = randn(StableRNG(2), 100), weights = repeat([1, 2, 3, 4], 25), ); julia> m = lm(@formula(y ~ x), data) -StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}, UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}, UnitWeights{Int64}}}, Matrix{Float64}} +StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}, StatsBase.UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}, StatsBase.UnitWeights{Int64}}}, Matrix{Float64}} y ~ 1 + x @@ -153,7 +153,7 @@ x -0.0500249 0.0307201 -1.63 0.1066 -0.110988 0.0109382 ────────────────────────────────────────────────────────────────────────── julia> m_aweights = lm(@formula(y ~ x), data, wts=aweights(data.weights)) -StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}, AnalyticWeights{Int64, Int64, Vector{Int64}}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}, AnalyticWeights{Int64, Int64, Vector{Int64}}}}, Matrix{Float64}} +StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}, StatsBase.AnalyticWeights{Int64, Int64, Vector{Int64}}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}, StatsBase.AnalyticWeights{Int64, Int64, Vector{Int64}}}}, Matrix{Float64}} y ~ 1 + x @@ -166,7 +166,7 @@ x -0.0478667 0.0308395 -1.55 0.1239 -0.109067 0.0133333 ────────────────────────────────────────────────────────────────────────── julia> m_fweights = lm(@formula(y ~ x), data, wts=fweights(data.weights)) -StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}, FrequencyWeights{Int64, Int64, Vector{Int64}}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}, FrequencyWeights{Int64, Int64, Vector{Int64}}}}, Matrix{Float64}} +StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}, StatsBase.FrequencyWeights{Int64, Int64, Vector{Int64}}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}, StatsBase.FrequencyWeights{Int64, Int64, Vector{Int64}}}}, Matrix{Float64}} y ~ 1 + x @@ -194,7 +194,7 @@ x -0.0478667 0.0308395 -1.55 0.1239 -0.109067 0.0133333 The type of the weights selected will affect the variance of the estimated coefficients and calculations involving the variance such as `likelihood`, `deviance`, `nulllikelihood`, `nulldeviance`. The fit of the model is the same regardless of the type of weights. -```jldoctest +```jldoctest weights julia> loglikelihood(m_aweights) -16.29630756138424 From 58dec0c0a6de0aa7fa6c701978512ad12fa7c4a2 Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Wed, 7 Sep 2022 16:44:37 +0200 Subject: [PATCH 033/106] Fix pweights stderror even for rank deficient des --- src/linpred.jl | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/src/linpred.jl b/src/linpred.jl index 4a72bb37..9ca1ffa7 100644 --- a/src/linpred.jl +++ b/src/linpred.jl @@ -292,7 +292,7 @@ end _vcov(pp::LinPred, u, d) = rmul!(invchol(pp), d) -function _vcov(pp::DensePredChol{T, <:ProbabilityWeights, <:Cholesky}, u::AbstractVector, d::Real) where {T} +function _vcov(pp::DensePredChol{T, <:Cholesky, <:ProbabilityWeights}, u::AbstractVector, d::Real) where {T} wts = pp.wts Z = mul!(pp.scratchm1, Diagonal(sqrt.(wts).*u), pp.X) XtW2X = Z'Z @@ -303,21 +303,36 @@ function _vcov(pp::DensePredChol{T, <:ProbabilityWeights, <:Cholesky}, u::Abstra n/(n-k)*V end -function _vcov(pp::DensePredChol{T, <:ProbabilityWeights, <:CholeskyPivoted}, u::AbstractVector) where {T} +function nancolidx(A::AbstractMatrix) + ## Return the columns without missing values + allnanidx = findall(map(x->all(isnan.(x)), eachcol(A))) + nonnanidx = setdiff(axes(A, 2), allnanidx) + return (allnanidx, nonnanidx) +end + +function _vcov(pp::DensePredChol{T, <:CholeskyPivoted, <:ProbabilityWeights}, u::AbstractVector, d::Real) where {T} wts = pp.wts Z = mul!(pp.scratchm1, Diagonal(sqrt.(wts).*u), pp.X) - rnk = rank(pp.chol) + ch = pp.chol + rnk = rank(ch) p = length(pp.delbeta) + invXtWX = invchol(pp) if rnk == p - XtW2X = Z'Z + B = Z'Z + A = invXtWX + V = A*B*A else - ## no idea + idx_nan, idx_non = nancolidx(invXtWX) + Zc = view(Z, :, idx_non) + B = Zc'Zc + A = view(invXtWX, idx_non, idx_non) + V = similar(pp.scratchm2) + V[idx_non, idx_non] = A*B*A + V[idx_nan, :] .= convert(T, NaN) + V[:, idx_nan] .= convert(T, NaN) end - invXtWX = invchol(pp) - V = invXtWX*XtW2X*invXtWX n = length(wts) - k = length(pp.delbeta) - n/(n-k)*V + n/(n-rnk)*V end function cor(x::LinPredModel) From a6f5c668fe2f0c33f37b929463db523ebbec8206 Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Wed, 7 Sep 2022 16:44:57 +0200 Subject: [PATCH 034/106] Add test for pweights stderror --- test/runtests.jl | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/test/runtests.jl b/test/runtests.jl index a4b708b9..9cd09495 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -123,6 +123,28 @@ end @test isapprox(nullloglikelihood(glm_model), -1678.2116012002746) @test isapprox(nullloglikelihood(lm_model), -1678.2116012002746) @test isapprox(mean(residuals(lm_model)), -5.412966629787718) + + lm_model = lm(f, df, wts = pweights(df.weights)) + glm_model = glm(f, df, Normal(), wts = pweights(df.weights)) + @test vcov(lm_model) ≈ [2230.3626444482406 -2.423827176758377; -2.4238271767583766 0.0026792687760410199] + @test vcov(glm_model) ≈ [2230.3626444482406 -2.423827176758377; -2.4238271767583766 0.0026792687760410199] + + ## Test the non full rank case + df.Income2 = df.Income*2 + df.Income3 = df.Income*3 + + f = @formula(FoodExp ~ Income3) + m1 = lm(f, df, wts = pweights(df.weights)) + f = @formula(FoodExp ~ Income + Income2 + Income3) + m2 = lm(f, df, wts = pweights(df.weights)) + @test stderror(m1) ≈ filter(!isnan, stderror(m2)) + + f = @formula(FoodExp ~ Income3+Income^2) + m3 = lm(f, df, wts = pweights(df.weights)) + f = @formula(FoodExp ~ Income + Income2 + Income3+Income^2) + m4 = lm(f, df, wts = pweights(df.weights)) + @test stderror(m3) ≈ filter(!isnan, stderror(m4)) + end @testset "rankdeficient" begin From 92ddb1e5ddcebfca95279c62b01448758244b377 Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Wed, 7 Sep 2022 17:56:23 +0200 Subject: [PATCH 035/106] Export UnitWeights --- src/GLM.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/GLM.jl b/src/GLM.jl index fa7c1010..e5f4b284 100644 --- a/src/GLM.jl +++ b/src/GLM.jl @@ -20,7 +20,7 @@ module GLM loglikelihood, nullloglikelihood, nobs, stderror, vcov, residuals, predict, fitted, fit, fit!, model_response, response, modelmatrix, r2, r², adjr2, adjr², cooksdistance, hasintercept, dispersion, weights, AnalyticWeights, ProbabilityWeights, FrequencyWeights, - aweights, fweights, pweights + UnitWeights, aweights, fweights, pweights export # types From 0c61fff9f9bda5ea74b0a900a7d632dfdd8e0788 Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Wed, 7 Sep 2022 18:00:40 +0200 Subject: [PATCH 036/106] Fix documentation --- docs/src/api.md | 22 +++++++++--------- docs/src/examples.md | 53 +++++++++++++------------------------------- docs/src/index.md | 43 +++++++++++++++++++++-------------- 3 files changed, 53 insertions(+), 65 deletions(-) diff --git a/docs/src/api.md b/docs/src/api.md index fc81d786..8ba6328d 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -2,7 +2,7 @@ ```@meta DocTestSetup = quote - using CategoricalArrays, DataFrames, Distributions, GLM, RDatasets + using CategoricalArrays, DataFrames, Distributions, GLM, RDatasets, StableRNGs end ``` @@ -22,33 +22,33 @@ GLM.ModResp The most general approach to fitting a model is with the `fit` function, as in ```jldoctest -julia> using Random +julia> using GLM, StableRNGs -julia> fit(LinearModel, hcat(ones(10), 1:10), randn(MersenneTwister(12321), 10)) -LinearModel{GLM.LmResp{Vector{Float64}, UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}, UnitWeights{Int64}}}: +julia> fit(LinearModel, hcat(ones(10), 1:10), randn(StableRNG(12321), 10)) +LinearModel{GLM.LmResp{Vector{Float64}, UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}, UnitWeights{Int64}}}: Coefficients: ──────────────────────────────────────────────────────────────── Coef. Std. Error t Pr(>|t|) Lower 95% Upper 95% ──────────────────────────────────────────────────────────────── -x1 0.717436 0.775175 0.93 0.3818 -1.07012 2.50499 -x2 -0.152062 0.124931 -1.22 0.2582 -0.440153 0.136029 +x1 0.361896 0.69896 0.52 0.6186 -1.24991 1.9737 +x2 -0.012125 0.112648 -0.11 0.9169 -0.271891 0.247641 ──────────────────────────────────────────────────────────────── ``` This model can also be fit as ```jldoctest -julia> using Random +julia> using GLM, StableRNGs -julia> lm(hcat(ones(10), 1:10), randn(MersenneTwister(12321), 10)) -LinearModel{GLM.LmResp{Vector{Float64}, UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}, UnitWeights{Int64}}}: +julia> lm(hcat(ones(10), 1:10), randn(StableRNG(12321), 10)) +LinearModel{GLM.LmResp{Vector{Float64}, UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}, UnitWeights{Int64}}}: Coefficients: ──────────────────────────────────────────────────────────────── Coef. Std. Error t Pr(>|t|) Lower 95% Upper 95% ──────────────────────────────────────────────────────────────── -x1 0.717436 0.775175 0.93 0.3818 -1.07012 2.50499 -x2 -0.152062 0.124931 -1.22 0.2582 -0.440153 0.136029 +x1 0.361896 0.69896 0.52 0.6186 -1.24991 1.9737 +x2 -0.012125 0.112648 -0.11 0.9169 -0.271891 0.247641 ──────────────────────────────────────────────────────────────── ``` diff --git a/docs/src/examples.md b/docs/src/examples.md index 4db425dc..fec181ff 100644 --- a/docs/src/examples.md +++ b/docs/src/examples.md @@ -12,15 +12,15 @@ julia> using DataFrames, GLM, StatsBase julia> data = DataFrame(X=[1,2,3], Y=[2,4,7]) 3×2 DataFrame - Row │ X Y - │ Int64 Int64 + Row │ X Y + │ Int64 Int64 ─────┼────────────── 1 │ 1 2 2 │ 2 4 3 │ 3 7 julia> ols = lm(@formula(Y ~ X), data) -StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}, StatsBase.UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}, StatsBase.UnitWeights{Int64}}}, Matrix{Float64}} +StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}, UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}, UnitWeights{Int64}}}, Matrix{Float64}} Y ~ 1 + X @@ -91,15 +91,15 @@ julia> round.(vcov(ols); digits=5) ```jldoctest julia> data = DataFrame(X=[1,2,2], Y=[1,0,1]) 3×2 DataFrame - Row │ X Y - │ Int64 Int64 + Row │ X Y + │ Int64 Int64 ─────┼────────────── 1 │ 1 1 2 │ 2 0 3 │ 2 1 julia> probit = glm(@formula(Y ~ X), data, Binomial(), ProbitLink()) -StatsModels.TableRegressionModel{GeneralizedLinearModel{GLM.GlmResp{Vector{Float64}, Binomial{Float64}, ProbitLink, StatsBase.UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.Cholesky{Float64, Matrix{Float64}}, StatsBase.UnitWeights{Int64}}}, Matrix{Float64}} +StatsModels.TableRegressionModel{GeneralizedLinearModel{GLM.GlmResp{Vector{Float64}, Binomial{Float64}, ProbitLink, UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.Cholesky{Float64, Matrix{Float64}}, UnitWeights{Int64}}}, Matrix{Float64}} Y ~ 1 + X @@ -140,7 +140,7 @@ julia> quine = dataset("MASS", "quine") 131 rows omitted julia> nbrmodel = glm(@formula(Days ~ Eth+Sex+Age+Lrn), quine, NegativeBinomial(2.0), LogLink()) -StatsModels.TableRegressionModel{GeneralizedLinearModel{GLM.GlmResp{Vector{Float64}, NegativeBinomial{Float64}, LogLink, StatsBase.UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.Cholesky{Float64, Matrix{Float64}}, StatsBase.UnitWeights{Int64}}}, Matrix{Float64}} +StatsModels.TableRegressionModel{GeneralizedLinearModel{GLM.GlmResp{Vector{Float64}, NegativeBinomial{Float64}, LogLink, UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.Cholesky{Float64, Matrix{Float64}}, UnitWeights{Int64}}}, Matrix{Float64}} Days ~ 1 + Eth + Sex + Age + Lrn @@ -158,7 +158,7 @@ Lrn: SL 0.296768 0.185934 1.60 0.1105 -0.0676559 0.661191 ──────────────────────────────────────────────────────────────────────────── julia> nbrmodel = negbin(@formula(Days ~ Eth+Sex+Age+Lrn), quine, LogLink()) -StatsModels.TableRegressionModel{GeneralizedLinearModel{GLM.GlmResp{Vector{Float64}, NegativeBinomial{Float64}, LogLink, StatsBase.UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.Cholesky{Float64, Matrix{Float64}}, StatsBase.UnitWeights{Int64}}}, Matrix{Float64}} +StatsModels.TableRegressionModel{GeneralizedLinearModel{GLM.GlmResp{Vector{Float64}, NegativeBinomial{Float64}, LogLink, UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.Cholesky{Float64, Matrix{Float64}}, UnitWeights{Int64}}}, Matrix{Float64}} Days ~ 1 + Eth + Sex + Age + Lrn @@ -196,8 +196,8 @@ julia> using GLM, RDatasets julia> form = dataset("datasets", "Formaldehyde") 6×2 DataFrame - Row │ Carb OptDen - │ Float64 Float64 + Row │ Carb OptDen + │ Float64 Float64 ─────┼────────────────── 1 │ 0.1 0.086 2 │ 0.3 0.269 @@ -207,7 +207,7 @@ julia> form = dataset("datasets", "Formaldehyde") 6 │ 0.9 0.782 julia> lm1 = fit(LinearModel, @formula(OptDen ~ Carb), form) -StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}, StatsBase.UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}, UnitWeights{Int64}}}, Matrix{Float64}} +StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}, UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}, UnitWeights{Int64}}}, Matrix{Float64}} OptDen ~ 1 + Carb @@ -256,7 +256,7 @@ julia> LifeCycleSavings = dataset("datasets", "LifeCycleSavings") 35 rows omitted julia> fm2 = fit(LinearModel, @formula(SR ~ Pop15 + Pop75 + DPI + DDPI), LifeCycleSavings) -StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}, UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}, UnitWeights{Int64}}}, Matrix{Float64}} +StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}, UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}, UnitWeights{Int64}}}, Matrix{Float64}} SR ~ 1 + Pop15 + Pop75 + DPI + DDPI @@ -350,8 +350,8 @@ julia> dobson = DataFrame(Counts = [18.,17,15,20,10,21,25,13,13], Outcome = categorical([1,2,3,1,2,3,1,2,3]), Treatment = categorical([1,1,1,2,2,2,3,3,3])) 9×3 DataFrame - Row │ Counts Outcome Treatment - │ Float64 Cat… Cat… + Row │ Counts Outcome Treatment + │ Float64 Cat… Cat… ─────┼───────────────────────────── 1 │ 18.0 1 1 2 │ 17.0 2 1 @@ -390,29 +390,8 @@ In this example, we choose the best model from a set of λs, based on minimum BI ```jldoctest julia> using GLM, RDatasets, StatsBase, DataFrames, Optim -julia> trees = DataFrame(dataset("datasets", "trees")) -31×3 DataFrame - Row │ Girth Height Volume - │ Float64 Int64 Float64 -─────┼────────────────────────── - 1 │ 8.3 70 10.3 - 2 │ 8.6 65 10.3 - 3 │ 8.8 63 10.2 - 4 │ 10.5 72 16.4 - 5 │ 10.7 81 18.8 - 6 │ 10.8 83 19.7 - 7 │ 11.0 66 15.6 - 8 │ 11.0 75 18.2 - ⋮ │ ⋮ ⋮ ⋮ - 25 │ 16.3 77 42.6 - 26 │ 17.3 81 55.4 - 27 │ 17.5 82 55.7 - 28 │ 17.9 80 58.3 - 29 │ 18.0 80 51.5 - 30 │ 18.0 80 51.0 - 31 │ 20.6 87 77.0 - 16 rows omitted - +julia> trees = DataFrame(dataset("datasets", "trees")); + julia> bic_glm(λ) = bic(glm(@formula(Volume ~ Height + Girth), trees, Normal(), PowerLink(λ))); julia> optimal_bic = optimize(bic_glm, -1.0, 1.0); diff --git a/docs/src/index.md b/docs/src/index.md index 9a6309bb..f991158b 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -85,7 +85,7 @@ julia> data = DataFrame(y = rand(rng, 100), x = categorical(repeat([1, 2, 3, 4], julia> lm(@formula(y ~ x), data) -StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}, StatsBase.UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}, StatsBase.UnitWeights{Int64}}}, Matrix{Float64}} +StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}, UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}, UnitWeights{Int64}}}, Matrix{Float64}} y ~ 1 + x @@ -108,7 +108,7 @@ julia> using StableRNGs julia> data = DataFrame(y = rand(StableRNG(1), 100), x = repeat([1, 2, 3, 4], 25)); julia> lm(@formula(y ~ x), data, contrasts = Dict(:x => DummyCoding())) -StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}, StatsBase.UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}, StatsBase.UnitWeights{Int64}}}, Matrix{Float64}} +StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}, UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}, UnitWeights{Int64}}}, Matrix{Float64}} y ~ 1 + x @@ -125,22 +125,23 @@ x: 4 -0.032673 0.0797865 -0.41 0.6831 -0.191048 0.125702 ## Weighting -Both `lm` and `glm` allow weighted estimation. The three different [types of weights](https://juliastats.org/StatsBase.jl/stable/weights/) defined in [StatsBase.jl](https://github.com/JuliaStats/StatsBase.jl) can be used to fit a model: +Both `lm` and `glm` allow weighted estimation. The three different [types of weights](https://juliastats.org/jl/stable/weights/) defined in [jl](https://github.com/JuliaStats/jl) can be used to fit a model: - `AnalyticWeights` describe a non-random relative importance (usually between 0 and 1) for each observation. These weights may also be referred to as reliability weights, precision weights or inverse variance weights. These are typically used when the observations being weighted are aggregate values (e.g., averages) with differing variances. - `FrequencyWeights` describe the inverse of the sampling probability for each observation, providing a correction mechanism for under- or over-sampling certain population groups. These weights may also be referred to as sampling weights. -- `ProbabilityWeights` describe how trhe sample can be scaled back to the population. Ususally are the reciprocals of sampling probabilities. +- `ProbabilityWeights` describe how the sample can be scaled back to the population. Usually are the reciprocals of sampling probabilities. -We illustrate the API with random generated data. +The `AnalyticWeights`, `FrequencyWeights`, and `ProbabilityWeights` can be constructed using `aweights`, `fweights`, and `pweights`, respectively. + +We illustrate the API with randomly generated data. ```jldoctest weights julia> using StableRNGs, DataFrames, GLM julia> data = DataFrame(y = rand(StableRNG(1), 100), x = randn(StableRNG(2), 100), weights = repeat([1, 2, 3, 4], 25), ); - julia> m = lm(@formula(y ~ x), data) -StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}, StatsBase.UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}, StatsBase.UnitWeights{Int64}}}, Matrix{Float64}} +StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}, UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}, UnitWeights{Int64}}}, Matrix{Float64}} y ~ 1 + x @@ -153,7 +154,7 @@ x -0.0500249 0.0307201 -1.63 0.1066 -0.110988 0.0109382 ────────────────────────────────────────────────────────────────────────── julia> m_aweights = lm(@formula(y ~ x), data, wts=aweights(data.weights)) -StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}, StatsBase.AnalyticWeights{Int64, Int64, Vector{Int64}}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}, StatsBase.AnalyticWeights{Int64, Int64, Vector{Int64}}}}, Matrix{Float64}} +StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}, AnalyticWeights{Int64, Int64, Vector{Int64}}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}, AnalyticWeights{Int64, Int64, Vector{Int64}}}}, Matrix{Float64}} y ~ 1 + x @@ -166,7 +167,7 @@ x -0.0478667 0.0308395 -1.55 0.1239 -0.109067 0.0133333 ────────────────────────────────────────────────────────────────────────── julia> m_fweights = lm(@formula(y ~ x), data, wts=fweights(data.weights)) -StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}, StatsBase.FrequencyWeights{Int64, Int64, Vector{Int64}}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}, StatsBase.FrequencyWeights{Int64, Int64, Vector{Int64}}}}, Matrix{Float64}} +StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}, FrequencyWeights{Int64, Int64, Vector{Int64}}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}, FrequencyWeights{Int64, Int64, Vector{Int64}}}}, Matrix{Float64}} y ~ 1 + x @@ -179,20 +180,24 @@ x -0.0478667 0.0193863 -2.47 0.0142 -0.0860494 -0.00968394 ───────────────────────────────────────────────────────────────────────────── julia> m_pweights = lm(@formula(y ~ x), data, wts=pweights(data.weights)) -StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}, ProbabilityWeights{Int64, Int64, Vector{Int64}}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}, ProbabilityWeights{Int64, Int64, Vector{Int64}}}}, Matrix{Float64}} +StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}, ProbabilityWeights{Int64, Int64, Vector{Int64}}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}, ProbabilityWeights{Int64, Int64, Vector{Int64}}}}, Matrix{Float64}} y ~ 1 + x Coefficients: -────────────────────────────────────────────────────────────────────────── - Coef. Std. Error t Pr(>|t|) Lower 95% Upper 95% -────────────────────────────────────────────────────────────────────────── -(Intercept) 0.51673 0.0270707 19.09 <1e-34 0.463009 0.570451 -x -0.0478667 0.0308395 -1.55 0.1239 -0.109067 0.0133333 -────────────────────────────────────────────────────────────────────────── +─────────────────────────────────────────────────────────────────────────── + Coef. Std. Error t Pr(>|t|) Lower 95% Upper 95% +─────────────────────────────────────────────────────────────────────────── +(Intercept) 0.51673 0.0288654 17.90 <1e-31 0.459447 0.574012 +x -0.0478667 0.0266884 -1.79 0.0760 -0.100829 0.00509556 +─────────────────────────────────────────────────────────────────────────── ``` -The type of the weights selected will affect the variance of the estimated coefficients and calculations involving the variance such as `likelihood`, `deviance`, `nulllikelihood`, `nulldeviance`. The fit of the model is the same regardless of the type of weights. +!!! warning + + In the old API, weights were passed as `AbstractVectors` and were silently treated in the internal computation of standard errors and related quantities as `FrequencyWeights`. Passing weights as `AbstractVector` is still allowed for backward compatibility, but it is deprecated. When weights are passed following the old API, they are now coerced to `FrequencyWeights` and a deprecation warning is issued. + +The specific type of the weights selected will affect the variance of the estimated coefficients and, for `FrequencyWeights`, calculations involving the variance such as `likelihood`, `deviance`, `nulllikelihood`, `nulldeviance`. The fit of the model is the same regardless of the type of weights. ```jldoctest weights julia> loglikelihood(m_aweights) @@ -205,6 +210,10 @@ julia> loglikelihood(m_pweights) -16.29630756138424 ``` +!!! note + + Note the R functions for fitting Linear and Generalized Linear models, `lm` and `glm`, accept a `weights` keyword argument. Both functions and related methods, such as `summary`, return standard errors assuming that the weights are analytic. + ## Comparing models with F-test Comparisons between two or more linear models can be performed using the `ftest` function, From 8b0e8e18fe4c43b583fc191c8570cfa14bc14f17 Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Wed, 7 Sep 2022 18:13:35 +0200 Subject: [PATCH 037/106] Mkae cooksdistance work with rank deficient design --- src/lm.jl | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/src/lm.jl b/src/lm.jl index c3226519..bd0da105 100644 --- a/src/lm.jl +++ b/src/lm.jl @@ -350,10 +350,19 @@ function StatsBase.cooksdistance(obj::LinearModel) mse = GLM.dispersion(obj,true) k = dof(obj)-1 d_res = dof_residual(obj) - X = modelmatrix(obj; weighted=isweighted(obj)) - XtX = crossmodelmatrix(obj; weighted=isweighted(obj)) - k == size(X,2) || throw(ArgumentError("Models with collinear terms are not currently supported.")) - hii = diag(X * inv(XtX) * X') - D = @. u^2 * (hii / (1 - hii)^2) / (k*mse) + X = modelmatrix(obj; weighted=isweighted(obj)) + if k == size(X,2) + XtX = crossmodelmatrix(obj; weighted=isweighted(obj)) + hii = diag(X * inv(XtX) * X') + D = @. u^2 * (hii / (1 - hii)^2) / (k*mse) + else + pp = obj.pp + C = invchol(pp) + idx_nan, idx_non = nancolidx(C) + Xc = view(X, :, idx_non) + XtX = (Xc)'*Xc + hii = diag(Xc * inv(XtX) * Xc') + D = @. u^2 * (hii / (1 - hii)^2) / (k*mse) + end return D end From f609f0631d3a760f5ea2926e97b5daf5914179ea Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Wed, 7 Sep 2022 18:21:24 +0200 Subject: [PATCH 038/106] Test cooksdistance with rank deficient design --- test/runtests.jl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index 9cd09495..503a107f 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -77,8 +77,9 @@ end # linear regression, two full collinear variables (XC = 2 XA) hence should get the same results as the original # after pivoting t_lm_colli = lm(@formula(Y ~ XA + XC), st_df, dropcollinear=true) - # Currently fails as the collinear variable is not dropped from `modelmatrix(obj)` - @test_throws ArgumentError isapprox(st_df.CooksD_base, cooksdistance(t_lm_colli)) + t_lm_colli_b = lm(@formula(Y ~ XC), st_df, dropcollinear=true) + @test isapprox(cooksdistance(t_lm_colli), cooksdistance(t_lm_colli_b)) + end From 23f3d0377741dc3e36ceaf4bd467b2e0c2cf063e Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Thu, 8 Sep 2022 14:35:13 +0200 Subject: [PATCH 039/106] Fix CholeskyPivoted signature in docs --- docs/src/api.md | 4 ++-- docs/src/examples.md | 6 +++--- docs/src/index.md | 8 ++++---- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/docs/src/api.md b/docs/src/api.md index 8ba6328d..c94914c2 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -25,7 +25,7 @@ The most general approach to fitting a model is with the `fit` function, as in julia> using GLM, StableRNGs julia> fit(LinearModel, hcat(ones(10), 1:10), randn(StableRNG(12321), 10)) -LinearModel{GLM.LmResp{Vector{Float64}, UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}, UnitWeights{Int64}}}: +LinearModel{GLM.LmResp{Vector{Float64}, UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}, UnitWeights{Int64}}}: Coefficients: ──────────────────────────────────────────────────────────────── @@ -41,7 +41,7 @@ This model can also be fit as julia> using GLM, StableRNGs julia> lm(hcat(ones(10), 1:10), randn(StableRNG(12321), 10)) -LinearModel{GLM.LmResp{Vector{Float64}, UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}, UnitWeights{Int64}}}: +LinearModel{GLM.LmResp{Vector{Float64}, UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}, UnitWeights{Int64}}}: Coefficients: ──────────────────────────────────────────────────────────────── diff --git a/docs/src/examples.md b/docs/src/examples.md index fec181ff..0b76ff3b 100644 --- a/docs/src/examples.md +++ b/docs/src/examples.md @@ -20,7 +20,7 @@ julia> data = DataFrame(X=[1,2,3], Y=[2,4,7]) 3 │ 3 7 julia> ols = lm(@formula(Y ~ X), data) -StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}, UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}, UnitWeights{Int64}}}, Matrix{Float64}} +StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}, UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}, UnitWeights{Int64}}}, Matrix{Float64}} Y ~ 1 + X @@ -207,7 +207,7 @@ julia> form = dataset("datasets", "Formaldehyde") 6 │ 0.9 0.782 julia> lm1 = fit(LinearModel, @formula(OptDen ~ Carb), form) -StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}, UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}, UnitWeights{Int64}}}, Matrix{Float64}} +StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}, UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}, UnitWeights{Int64}}}, Matrix{Float64}} OptDen ~ 1 + Carb @@ -256,7 +256,7 @@ julia> LifeCycleSavings = dataset("datasets", "LifeCycleSavings") 35 rows omitted julia> fm2 = fit(LinearModel, @formula(SR ~ Pop15 + Pop75 + DPI + DDPI), LifeCycleSavings) -StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}, UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}, UnitWeights{Int64}}}, Matrix{Float64}} +StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}, UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}, UnitWeights{Int64}}}, Matrix{Float64}} SR ~ 1 + Pop15 + Pop75 + DPI + DDPI diff --git a/docs/src/index.md b/docs/src/index.md index f991158b..c3fec83a 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -85,7 +85,7 @@ julia> data = DataFrame(y = rand(rng, 100), x = categorical(repeat([1, 2, 3, 4], julia> lm(@formula(y ~ x), data) -StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}, UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}, UnitWeights{Int64}}}, Matrix{Float64}} +StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}, UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}, UnitWeights{Int64}}}, Matrix{Float64}} y ~ 1 + x @@ -108,7 +108,7 @@ julia> using StableRNGs julia> data = DataFrame(y = rand(StableRNG(1), 100), x = repeat([1, 2, 3, 4], 25)); julia> lm(@formula(y ~ x), data, contrasts = Dict(:x => DummyCoding())) -StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}, UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}, UnitWeights{Int64}}}, Matrix{Float64}} +StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}, UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}, UnitWeights{Int64}}}, Matrix{Float64}} y ~ 1 + x @@ -141,7 +141,7 @@ julia> using StableRNGs, DataFrames, GLM julia> data = DataFrame(y = rand(StableRNG(1), 100), x = randn(StableRNG(2), 100), weights = repeat([1, 2, 3, 4], 25), ); julia> m = lm(@formula(y ~ x), data) -StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}, UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}, UnitWeights{Int64}}}, Matrix{Float64}} +StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}, UnitWeights{Int64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}, UnitWeights{Int64}}}, Matrix{Float64}} y ~ 1 + x @@ -154,7 +154,7 @@ x -0.0500249 0.0307201 -1.63 0.1066 -0.110988 0.0109382 ────────────────────────────────────────────────────────────────────────── julia> m_aweights = lm(@formula(y ~ x), data, wts=aweights(data.weights)) -StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}, AnalyticWeights{Int64, Int64, Vector{Int64}}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}, AnalyticWeights{Int64, Int64, Vector{Int64}}}}, Matrix{Float64}} +StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}, AnalyticWeights{Int64, Int64, Vector{Int64}}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}, AnalyticWeights{Int64, Int64, Vector{Int64}}}}, Matrix{Float64}} y ~ 1 + x From 2749b84d3c922c1e22959e11ed6caaf5eda5cb6c Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Thu, 8 Sep 2022 14:35:43 +0200 Subject: [PATCH 040/106] Make nancolidx v1.0 and v1.1 friendly --- src/linpred.jl | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/src/linpred.jl b/src/linpred.jl index 9ca1ffa7..b3afbfdf 100644 --- a/src/linpred.jl +++ b/src/linpred.jl @@ -304,10 +304,19 @@ function _vcov(pp::DensePredChol{T, <:Cholesky, <:ProbabilityWeights}, u::Abstra end function nancolidx(A::AbstractMatrix) - ## Return the columns without missing values - allnanidx = findall(map(x->all(isnan.(x)), eachcol(A))) - nonnanidx = setdiff(axes(A, 2), allnanidx) - return (allnanidx, nonnanidx) + ## Return to set the idx: + ## idx_nancol idx_nonnancol + nrow, ncol = size(A) + idx_nancol = Int64[] + idx_nonnancol = Int64[] + for j in axes(A, 2) + h = 0 + for i in axes(A, 1) + h += isnan(A[i,j]) ? 1 : 0 + end + h == nrow ? push!(idx_nancol, j) : push!(idx_nonnancol, j) + end + return (idx_nancol, idx_nonnancol) end function _vcov(pp::DensePredChol{T, <:CholeskyPivoted, <:ProbabilityWeights}, u::AbstractVector, d::Real) where {T} From 82e472bdebdfdb8bbe897f297d905bde6f46e35d Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Fri, 9 Sep 2022 10:48:01 +0200 Subject: [PATCH 041/106] Fix signatures --- docs/src/index.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/src/index.md b/docs/src/index.md index c3fec83a..7d853f52 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -167,7 +167,7 @@ x -0.0478667 0.0308395 -1.55 0.1239 -0.109067 0.0133333 ────────────────────────────────────────────────────────────────────────── julia> m_fweights = lm(@formula(y ~ x), data, wts=fweights(data.weights)) -StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}, FrequencyWeights{Int64, Int64, Vector{Int64}}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}, FrequencyWeights{Int64, Int64, Vector{Int64}}}}, Matrix{Float64}} +StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}, FrequencyWeights{Int64, Int64, Vector{Int64}}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}, FrequencyWeights{Int64, Int64, Vector{Int64}}}}, Matrix{Float64}} y ~ 1 + x @@ -180,7 +180,7 @@ x -0.0478667 0.0193863 -2.47 0.0142 -0.0860494 -0.00968394 ───────────────────────────────────────────────────────────────────────────── julia> m_pweights = lm(@formula(y ~ x), data, wts=pweights(data.weights)) -StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}, ProbabilityWeights{Int64, Int64, Vector{Int64}}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}, ProbabilityWeights{Int64, Int64, Vector{Int64}}}}, Matrix{Float64}} +StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}, ProbabilityWeights{Int64, Int64, Vector{Int64}}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}, ProbabilityWeights{Int64, Int64, Vector{Int64}}}}, Matrix{Float64}} y ~ 1 + x From 2d6aaed2adf5424dab9358561f221b6300b2f022 Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Fri, 9 Sep 2022 12:37:39 +0200 Subject: [PATCH 042/106] Correct implementation of momentmatrix --- src/lm.jl | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/lm.jl b/src/lm.jl index bd0da105..0ec1cce7 100644 --- a/src/lm.jl +++ b/src/lm.jl @@ -318,11 +318,10 @@ function confint(obj::LinearModel; level::Real=0.95) quantile(TDist(dof_residual(obj)), (1. - level)/2.) * [1. -1.] end - -function momentmatrix(m::LinearModel) +function momentmatrix(m::LinearModel; weighted=isweighted(m)) X = modelmatrix(m; weighted=false) r = residuals(m; weighted=false) - if isweighted(m) + if weighted & isweighted(m) return X .* r .* weights(m) else return X .* r From dbc9ae999d0dd43af023e5b182014fdef7afa41f Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Fri, 9 Sep 2022 12:37:48 +0200 Subject: [PATCH 043/106] Test moment matrix --- test/runtests.jl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/runtests.jl b/test/runtests.jl index 503a107f..b43f6d80 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -65,6 +65,7 @@ end # linear regression t_lm_base = lm(@formula(Y ~ XA), st_df) @test isapprox(st_df.CooksD_base, cooksdistance(t_lm_base)) + @test GLM.momentmatrix(t_lm_base) == modelmatrix(t_lm_base).*residuals(t_lm_base) # linear regression, no intercept t_lm_noint = lm(@formula(Y ~ XA +0), st_df) @@ -79,6 +80,7 @@ end t_lm_colli = lm(@formula(Y ~ XA + XC), st_df, dropcollinear=true) t_lm_colli_b = lm(@formula(Y ~ XC), st_df, dropcollinear=true) @test isapprox(cooksdistance(t_lm_colli), cooksdistance(t_lm_colli_b)) + end From e0d9cdf0d04da80d9bddc1e1998e64c9de57c7c5 Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Fri, 23 Sep 2022 09:37:20 +0200 Subject: [PATCH 044/106] Apply suggestions from code review Co-authored-by: Milan Bouchet-Valat --- docs/src/index.md | 4 ++-- src/GLM.jl | 22 +++++++++++----------- src/glmfit.jl | 12 ++---------- src/linpred.jl | 9 ++++----- src/lm.jl | 10 +++++----- test/runtests.jl | 2 +- 6 files changed, 25 insertions(+), 34 deletions(-) diff --git a/docs/src/index.md b/docs/src/index.md index 7d853f52..b3e9295b 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -125,13 +125,13 @@ x: 4 -0.032673 0.0797865 -0.41 0.6831 -0.191048 0.125702 ## Weighting -Both `lm` and `glm` allow weighted estimation. The three different [types of weights](https://juliastats.org/jl/stable/weights/) defined in [jl](https://github.com/JuliaStats/jl) can be used to fit a model: +Both `lm` and `glm` allow weighted estimation. The three different [types of weights](https://juliastats.org/StatsBase.jl/stable/weights/) defined in [StatsBase.jl](https://github.com/JuliaStats/StatsBase.jl) can be used to fit a model: - `AnalyticWeights` describe a non-random relative importance (usually between 0 and 1) for each observation. These weights may also be referred to as reliability weights, precision weights or inverse variance weights. These are typically used when the observations being weighted are aggregate values (e.g., averages) with differing variances. - `FrequencyWeights` describe the inverse of the sampling probability for each observation, providing a correction mechanism for under- or over-sampling certain population groups. These weights may also be referred to as sampling weights. - `ProbabilityWeights` describe how the sample can be scaled back to the population. Usually are the reciprocals of sampling probabilities. -The `AnalyticWeights`, `FrequencyWeights`, and `ProbabilityWeights` can be constructed using `aweights`, `fweights`, and `pweights`, respectively. +To indicate which kind of weights should be used, the vector of weights must be wrapped in one of the three weights types, and then passed to the `weights` keyword argument. Short-hand functions `aweights`, `fweights`, and `pweights` can be used to construct `AnalyticWeights`, `FrequencyWeights`, and `ProbabilityWeights`, respectively. We illustrate the API with randomly generated data. diff --git a/src/GLM.jl b/src/GLM.jl index e5f4b284..ea6b1390 100644 --- a/src/GLM.jl +++ b/src/GLM.jl @@ -53,17 +53,17 @@ module GLM LinearModel, # functions - canonicallink, # canonical link function for a distribution - deviance, # deviance of fitted and observed responses - devresid, # vector of squared deviance residuals - formula, # extract the formula from a model - glm, # general interface - linpred, # linear predictor - lm, # linear model - negbin, # interface to fitting negative binomial regression - nobs, # total number of observations - predict, # make predictions - ftest # compare models with an F test + canonicallink, # canonical link function for a distribution + deviance, # deviance of fitted and observed responses + devresid, # vector of squared deviance residuals + formula, # extract the formula from a model + glm, # general interface + linpred, # linear predictor + lm, # linear model + negbin, # interface to fitting negative binomial regression + nobs, # total number of observations + predict, # make predictions + ftest # compare models with an F test const FP = AbstractFloat const FPVector{T<:FP} = AbstractArray{T,1} diff --git a/src/glmfit.jl b/src/glmfit.jl index 934f6a75..46b07772 100644 --- a/src/glmfit.jl +++ b/src/glmfit.jl @@ -102,7 +102,7 @@ function updateμ! end function updateμ!(r::GlmResp{T,D,L,<:AbstractWeights}, linPr::T) where {T<:FPVector,D,L} isempty(r.offset) ? copyto!(r.eta, linPr) : broadcast!(+, r.eta, linPr, r.offset) updateμ!(r) - if !(weights(r) isa UnitWeights) + if isweighted(r) map!(*, r.devresid, r.devresid, r.wts) map!(*, r.wrkwt, r.wrkwt, r.wts) end @@ -270,7 +270,7 @@ function nulldeviance(m::GeneralizedLinearModel) offset = r.offset hasint = hasintercept(m) dev = zero(eltype(y)) - if isempty(offset) # Faster method + if isempty(offset) # Faster method if isweighted(m) mu = hasint ? mean(y, wts) : @@ -297,7 +297,6 @@ end loglikelihood(m::AbstractGLM) = loglikelihood(m.rr) - function loglikelihood(r::GlmResp{T,D,L,<:AbstractWeights}) where {T,D,L} y = r.y mu = r.mu @@ -480,7 +479,6 @@ function StatsBase.fit!(m::AbstractGLM, rtol::Real=1e-6, start=nothing, kwargs...) - if haskey(kwargs, :maxIter) Base.depwarn("'maxIter' argument is deprecated, use 'maxiter' instead", :fit!) maxiter = kwargs[:maxIter] @@ -776,11 +774,6 @@ end ## To be removed once is merged momentmatrix(m::RegressionModel) = momentmatrix(m.model) -""" - momentmatrix(m::GeneralizedLinearModel) - - Return the moment matrix (score equation) of a GLM model. -""" function momentmatrix(m::GeneralizedLinearModel) X = modelmatrix(m; weighted=false) d = variancestructure(m.rr) @@ -791,4 +784,3 @@ end variancestructure(rr::GlmResp{<: Any, <: Union{Normal, Poisson, Binomial, Bernoulli, NegativeBinomial}}) = 1 variancestructure(rr::GlmResp{<: Any, <: Union{Gamma, Geometric, InverseGaussian}}) = sum(abs2, Base.Broadcast.broadcasted(*, rr.wrkwt, rr.wrkresid))/sum(rr.wrkwt) - diff --git a/src/linpred.jl b/src/linpred.jl index b3afbfdf..04af8c60 100644 --- a/src/linpred.jl +++ b/src/linpred.jl @@ -52,7 +52,7 @@ mutable struct DensePredQR{T<:BlasReal, W<:AbstractWeights} <: DensePred scratchbeta::Vector{T} qr::QRCompactWY{T} wts::W - function DensePredQR{T}(X::Matrix{T}, beta0::Vector{T}, wts::W) where {T,W<:AbstractWeights} + function DensePredQR{T}(X::Matrix{T}, beta0::Vector{T}, wts::W) where {T,W<:AbstractWeights} n, p = size(X) length(beta0) == p || throw(DimensionMismatch("length(β0) ≠ size(X,2)")) length(wts) == n || throw(DimensionMismatch("Length of weights does not match the dimension of X")) @@ -61,7 +61,6 @@ mutable struct DensePredQR{T<:BlasReal, W<:AbstractWeights} <: DensePred end end DensePredQR{T}(X::Matrix) where T = DensePredQR{eltype(X)}(X, zeros(T, size(X, 2)), uweights(size(X,1))) -#DensePredQR(X::Matrix{T}, wts::AbstractWeights) where T = DensePredQR{T}(X, zeros(T, size(X,2)), wts) convert(::Type{DensePredQR{T}}, X::Matrix{T}) where {T} = DensePredQR{T}(X) """ @@ -290,7 +289,7 @@ function vcov(x::LinPredModel) _vcov(x.pp, u, d) end -_vcov(pp::LinPred, u, d) = rmul!(invchol(pp), d) +_vcov(pp::LinPred, u::AbstractVector, d::Real) = rmul!(invchol(pp), d) function _vcov(pp::DensePredChol{T, <:Cholesky, <:ProbabilityWeights}, u::AbstractVector, d::Real) where {T} wts = pp.wts @@ -337,8 +336,8 @@ function _vcov(pp::DensePredChol{T, <:CholeskyPivoted, <:ProbabilityWeights}, u: A = view(invXtWX, idx_non, idx_non) V = similar(pp.scratchm2) V[idx_non, idx_non] = A*B*A - V[idx_nan, :] .= convert(T, NaN) - V[:, idx_nan] .= convert(T, NaN) + V[idx_nan, :] .= NaN + V[:, idx_nan] .= NaN end n = length(wts) n/(n-rnk)*V diff --git a/src/lm.jl b/src/lm.jl index 0ec1cce7..c1a563c5 100644 --- a/src/lm.jl +++ b/src/lm.jl @@ -66,8 +66,8 @@ end weights(r::LmResp) = r.wts -nobs(r::LmResp{V,W}) where {V<:AbstractVector{T} where T<:AbstractFloat,W<:FrequencyWeights} = sum(r.wts) -nobs(r::LmResp{V,W}) where {V<:AbstractVector{T} where T<:AbstractFloat,W<:AbstractWeights} = oftype(sum(one(eltype(r.wts))), length(r.y)) +nobs(r::LmResp{<:Any,W}) where {W<:FrequencyWeights} = sum(r.wts) +nobs(r::LmResp) = oftype(sum(one(eltype(r.wts))), length(r.y)) function loglikelihood(r::LmResp{T,<:Union{UnitWeights, FrequencyWeights}}) where T n = nobs(r) @@ -76,7 +76,7 @@ end function loglikelihood(r::LmResp{T,<:AbstractWeights}) where T N = length(r.y) - n = sum(log.(weights(r))) + n = sum(log, weights(r)) 0.5*(n - N * (log(2π * deviance(r)/N) + 1)) end @@ -230,7 +230,7 @@ function nullloglikelihood(m::LinearModel) -n/2 * (log(2π * nulldeviance(m)/n) + 1) else N = length(m.rr.y) - n = sum(log.(wts)) + n = sum(log, wts) 0.5*(n - N * (log(2π * nulldeviance(m)/N) + 1)) end end @@ -321,7 +321,7 @@ end function momentmatrix(m::LinearModel; weighted=isweighted(m)) X = modelmatrix(m; weighted=false) r = residuals(m; weighted=false) - if weighted & isweighted(m) + if weighted && isweighted(m) return X .* r .* weights(m) else return X .* r diff --git a/test/runtests.jl b/test/runtests.jl index b43f6d80..80e40759 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1329,7 +1329,7 @@ end end @testset "Issue 118" begin - Test.@inferred nobs(lm(randn(10, 2), randn(10))) + @inferred nobs(lm(randn(10, 2), randn(10))) end @testset "Issue 153" begin From 46e8f92894c952ec9996e75cd498e0e4e76063bb Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Fri, 23 Sep 2022 20:18:18 +0200 Subject: [PATCH 045/106] Incorporate suggestions of reviewer --- docs/src/index.md | 47 ++++++++++++++++++---------- src/glmfit.jl | 42 +++++++++++++------------ src/linpred.jl | 79 ++++++++++------------------------------------- src/lm.jl | 34 ++++++++++---------- 4 files changed, 86 insertions(+), 116 deletions(-) diff --git a/docs/src/index.md b/docs/src/index.md index b3e9295b..9a1137bc 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -125,13 +125,24 @@ x: 4 -0.032673 0.0797865 -0.41 0.6831 -0.191048 0.125702 ## Weighting -Both `lm` and `glm` allow weighted estimation. The three different [types of weights](https://juliastats.org/StatsBase.jl/stable/weights/) defined in [StatsBase.jl](https://github.com/JuliaStats/StatsBase.jl) can be used to fit a model: - -- `AnalyticWeights` describe a non-random relative importance (usually between 0 and 1) for each observation. These weights may also be referred to as reliability weights, precision weights or inverse variance weights. These are typically used when the observations being weighted are aggregate values (e.g., averages) with differing variances. -- `FrequencyWeights` describe the inverse of the sampling probability for each observation, providing a correction mechanism for under- or over-sampling certain population groups. These weights may also be referred to as sampling weights. -- `ProbabilityWeights` describe how the sample can be scaled back to the population. Usually are the reciprocals of sampling probabilities. - -To indicate which kind of weights should be used, the vector of weights must be wrapped in one of the three weights types, and then passed to the `weights` keyword argument. Short-hand functions `aweights`, `fweights`, and `pweights` can be used to construct `AnalyticWeights`, `FrequencyWeights`, and `ProbabilityWeights`, respectively. +Both `lm` and `glm` allow weighted estimation. The three different +[types of weights](https://juliastats.org/StatsBase.jl/stable/weights/) defined in +[StatsBase.jl](https://github.com/JuliaStats/StatsBase.jl) can be used to fit a model: + +- `AnalyticWeights` describe a non-random relative importance (usually between 0 and 1) for + each observation. These weights may also be referred to as reliability weights, precision + weights or inverse variance weights. These are typically used when the observations being + weighted are aggregate values (e.g., averages) with differing variances. +- `FrequencyWeights` describe the inverse of the sampling probability for each observation, + providing a correction mechanism for under- or over-sampling certain population groups. + These weights may also be referred to as sampling weights. +- `ProbabilityWeights` describe how the sample can be scaled back to the population. + Usually are the reciprocals of sampling probabilities. + +To indicate which kind of weights should be used, the vector of weights must be wrapped in +one of the three weights types, and then passed to the `weights` keyword argument. +Short-hand functions `aweights`, `fweights`, and `pweights` can be used to construct +`AnalyticWeights`, `FrequencyWeights`, and `ProbabilityWeights`, respectively. We illustrate the API with randomly generated data. @@ -195,9 +206,15 @@ x -0.0478667 0.0266884 -1.79 0.0760 -0.100829 0.00509556 !!! warning - In the old API, weights were passed as `AbstractVectors` and were silently treated in the internal computation of standard errors and related quantities as `FrequencyWeights`. Passing weights as `AbstractVector` is still allowed for backward compatibility, but it is deprecated. When weights are passed following the old API, they are now coerced to `FrequencyWeights` and a deprecation warning is issued. + In the old API, weights were passed as `AbstractVectors` and were silently treated in + the internal computation of standard errors and related quantities as `FrequencyWeights`. + Passing weights as `AbstractVector` is still allowed for backward compatibility, but it + is deprecated. When weights are passed following the old API, they are now coerced to + `FrequencyWeights` and a deprecation warning is issued. -The specific type of the weights selected will affect the variance of the estimated coefficients and, for `FrequencyWeights`, calculations involving the variance such as `likelihood`, `deviance`, `nulllikelihood`, `nulldeviance`. The fit of the model is the same regardless of the type of weights. +The type of the weights will affect the variance of the estimated coefficients and the +quantities involving this variance. The coefficient point estimates will be the same +regardless of the type of weights. ```jldoctest weights julia> loglikelihood(m_aweights) @@ -210,10 +227,6 @@ julia> loglikelihood(m_pweights) -16.29630756138424 ``` -!!! note - - Note the R functions for fitting Linear and Generalized Linear models, `lm` and `glm`, accept a `weights` keyword argument. Both functions and related methods, such as `summary`, return standard errors assuming that the weights are analytic. - ## Comparing models with F-test Comparisons between two or more linear models can be performed using the `ftest` function, @@ -267,8 +280,8 @@ Many of the methods provided by this package have names similar to those in [R]( - `vcov`: variance-covariance matrix of the coefficient estimates -Note that the canonical link for negative binomial regression is `NegativeBinomialLink`, but -in practice one typically uses `LogLink`. +Note that the canonical link for negative binomial regression is `NegativeBinomialLink`, +but in practice one typically uses `LogLink`. ```jldoctest methods julia> using GLM, DataFrames, StatsBase @@ -300,7 +313,9 @@ julia> round.(predict(mdl, test_data); digits=8) 9.33333333 ``` -The [`cooksdistance`](@ref) method computes [Cook's distance](https://en.wikipedia.org/wiki/Cook%27s_distance) for each observation used to fit a linear model, giving an estimate of the influence of each data point. +The [`cooksdistance`](@ref) method computes +[Cook's distance](https://en.wikipedia.org/wiki/Cook%27s_distance) for each observation +used to fit a linear model, giving an estimate of the influence of each data point. Note that it's currently only implemented for linear models without weights. ```jldoctest methods diff --git a/src/glmfit.jl b/src/glmfit.jl index 46b07772..9acff144 100644 --- a/src/glmfit.jl +++ b/src/glmfit.jl @@ -36,8 +36,8 @@ function GlmResp(y::V, d::D, l::L, η::V, μ::V, off::V, wts::W) where {V<:FPVec checky(y, d) ## We don't support custom types of weights that a user may define - if !(wts isa AbstractWeights) - throw(ArgumentError("`wts` should be an AbstractWeights but was $W")) + if !(wts isa Union{FrequencyWeights, AnalyticWeights, ProbabilityWeights, UnitWeights}) + throw(ArgumentError("The type of `wts` was $W. The supported weights type are `FrequencyWeights`, `AnalyticWeights`, `ProbabilityWeights`, or a `UnitWeights`.")) end # Lengths of y, η, and η all need to be n @@ -75,6 +75,7 @@ GlmResp(y::AbstractVector{<:Real}, d::D, l::L, off::AbstractVector{<:Real}, deviance(r::GlmResp) = sum(r.devresid) weights(r::GlmResp) = r.wts +isweighted(r::GlmResp) = weights(r) isa Union{AnalyticWeights, FrequencyWeights, ProbabilityWeights} """ cancancel(r::GlmResp{V,D,L}) @@ -469,7 +470,7 @@ end function StatsBase.fit!(m::AbstractGLM, y; - wts=nothing, + wts=uweights(length(y)), offset=nothing, dofit::Bool=true, verbose::Bool=false, @@ -499,6 +500,9 @@ function StatsBase.fit!(m::AbstractGLM, rtol = kwargs[:tol] end + r = m.rr + V = typeof(r.y) + r.y = copy!(r.y, y) isa(offset, Nothing) || copy!(r.offset, offset) initialeta!(r.eta, r.d, r.l, r.y, r.wts, r.offset) updateμ!(r, r.eta) @@ -718,20 +722,19 @@ function initialeta!(eta::AbstractVector, end function _initialeta!(eta, dist, link, y, wts::UnitWeights) - @inbounds @simd for i in eachindex(y, eta) - μ = mustart(dist, y[i], 1) - eta[i] = linkfun(link, μ) - end -end - -function _initialeta!(eta, dist, link, y, wts::AbstractWeights) - @inbounds @simd for i in eachindex(y, eta) - μ = mustart(dist, y[i], wts[i]) - eta[i] = linkfun(link, μ) + if wts isa UnitWeights + @inbounds @simd for i in eachindex(y, eta) + μ = mustart(dist, y[i], 1) + eta[i] = linkfun(link, μ) + end + else + @inbounds @simd for i in eachindex(y, eta) + μ = mustart(dist, y[i], wts[i]) + eta[i] = linkfun(link, μ) + end end end - # Helper function to check that the values of y are in the allowed domain function checky(y, d::Distribution) if any(x -> !insupport(d, x), y) @@ -754,7 +757,8 @@ nobs(r::GlmResp{V,D,L,W}) where {V,D,L,W<:FrequencyWeights} = sum(r.wts) function residuals(r::GlmResp; weighted::Bool=false) y, η, μ = r.y, r.eta, r.mu - dres = similar(μ) + dres = similar(μ) + @inbounds for i in eachindex(y, μ) μi = μ[i] @@ -776,11 +780,11 @@ momentmatrix(m::RegressionModel) = momentmatrix(m.model) function momentmatrix(m::GeneralizedLinearModel) X = modelmatrix(m; weighted=false) - d = variancestructure(m.rr) r = m.rr.wrkwt .* m.rr.wrkresid + d = variancestructure(m.rr, r) return (X .* r) ./ d end -variancestructure(rr::GlmResp{<: Any, <: Union{Normal, Poisson, Binomial, Bernoulli, NegativeBinomial}}) = 1 -variancestructure(rr::GlmResp{<: Any, <: Union{Gamma, Geometric, InverseGaussian}}) = - sum(abs2, Base.Broadcast.broadcasted(*, rr.wrkwt, rr.wrkresid))/sum(rr.wrkwt) +variancestructure(rr::GlmResp{<: Any, <: Union{Normal, Poisson, Binomial, Bernoulli, NegativeBinomial}}, r) = 1 +variancestructure(rr::GlmResp{<: Any, <: Union{Gamma, Geometric, InverseGaussian}}, r) = + sum(abs2, r)/sum(rr.wrkwt) diff --git a/src/linpred.jl b/src/linpred.jl index 04af8c60..44cc2cc5 100644 --- a/src/linpred.jl +++ b/src/linpred.jl @@ -103,7 +103,11 @@ end function DensePredChol(X::AbstractMatrix, pivot::Bool, wts::AbstractWeights) scr = similar(X) - mul!(scr, Diagonal(wts), X) + if wts isa UnitWeights + copy!(scr, X) + else + mul!(scr, Diagonal(wts), X) + end F = Hermitian(float(scr'X)) T = eltype(F) @@ -118,20 +122,6 @@ function DensePredChol(X::AbstractMatrix, pivot::Bool, wts::AbstractWeights) similar(cholfactors(F))) end -function DensePredChol(X::AbstractMatrix, pivot::Bool, wts::UnitWeights) - F = Hermitian(float(X'X)) - T = eltype(F) - F = pivot ? pivoted_cholesky!(F, tol = -one(T), check = false) : cholesky!(F) - DensePredChol(Matrix{T}(X), - zeros(T, size(X, 2)), - zeros(T, size(X, 2)), - zeros(T, size(X, 2)), - F, - wts, - similar(X, T), - similar(cholfactors(F))) -end - cholpred(X::AbstractMatrix, pivot::Bool, wts::AbstractWeights=uweights(size(X,1))) = DensePredChol(X, pivot, wts) @@ -145,37 +135,15 @@ function cholesky(p::DensePredChol{T}) where T<:FP end cholesky!(p::DensePredQR{T}) where {T<:FP} = Cholesky{T,typeof(p.X)}(p.qr.R, 'U', 0) -function delbeta!(p::DensePredChol{T,<:Cholesky, <:UnitWeights}, r::Vector{T}) where T<:BlasReal - ldiv!(p.chol, mul!(p.delbeta, transpose(p.X), r)) - p -end - -function delbeta!(p::DensePredChol{T,<:Cholesky, <:AbstractWeights}, r::Vector{T}) where T<:BlasReal - X = mul!(p.scratchm1, Diagonal(p.wts), p.X) +function delbeta!(p::DensePredChol{T,<:Cholesky,W<:AbstractWeights}, r::Vector{T}) where T<:BlasReal + X = W isa UnitWeights ? copy!(p.scratchm1, p.X) : mul!(p.scratchm1, Diagonal(p.wts), p.X) ldiv!(p.chol, mul!(p.delbeta, transpose(X), r)) p end -function delbeta!(p::DensePredChol{T,<:CholeskyPivoted,<:UnitWeights}, r::Vector{T}) where T<:BlasReal - ch = p.chol - delbeta = mul!(p.delbeta, adjoint(p.X), r) - rnk = rank(ch) - if rnk == length(delbeta) - ldiv!(ch, delbeta) - else - permute!(delbeta, ch.p) - for k=(rnk+1):length(delbeta) - delbeta[k] = -zero(T) - end - LAPACK.potrs!(ch.uplo, view(ch.factors, 1:rnk, 1:rnk), view(delbeta, 1:rnk)) - invpermute!(delbeta, ch.p) - end - p -end - -function delbeta!(p::DensePredChol{T,<:CholeskyPivoted,<:AbstractWeights}, r::Vector{T}) where T<:BlasReal +function delbeta!(p::DensePredChol{T,<:CholeskyPivoted,W<:AbstractWeights}, r::Vector{T}) where T<:BlasReal ch = p.chol - X = mul!(p.scratchm1, Diagonal(p.wts), p.X) + X = W isa UnitWeights ? copy!(p.scratchm1, p.X) : mul!(p.scratchm1, Diagonal(p.wts), p.X) delbeta = mul!(p.delbeta, adjoint(X), r) rnk = rank(ch) if rnk == length(delbeta) @@ -302,22 +270,6 @@ function _vcov(pp::DensePredChol{T, <:Cholesky, <:ProbabilityWeights}, u::Abstra n/(n-k)*V end -function nancolidx(A::AbstractMatrix) - ## Return to set the idx: - ## idx_nancol idx_nonnancol - nrow, ncol = size(A) - idx_nancol = Int64[] - idx_nonnancol = Int64[] - for j in axes(A, 2) - h = 0 - for i in axes(A, 1) - h += isnan(A[i,j]) ? 1 : 0 - end - h == nrow ? push!(idx_nancol, j) : push!(idx_nonnancol, j) - end - return (idx_nancol, idx_nonnancol) -end - function _vcov(pp::DensePredChol{T, <:CholeskyPivoted, <:ProbabilityWeights}, u::AbstractVector, d::Real) where {T} wts = pp.wts Z = mul!(pp.scratchm1, Diagonal(sqrt.(wts).*u), pp.X) @@ -330,14 +282,15 @@ function _vcov(pp::DensePredChol{T, <:CholeskyPivoted, <:ProbabilityWeights}, u: A = invXtWX V = A*B*A else - idx_nan, idx_non = nancolidx(invXtWX) - Zc = view(Z, :, idx_non) + nancols = [all(isnan, col) for col in eachcol(invXtWX)] + nnancols = .!nancols + Zc = view(Z, :, nnancols) B = Zc'Zc - A = view(invXtWX, idx_non, idx_non) + A = view(invXtWX, nnancols, nnancols) V = similar(pp.scratchm2) - V[idx_non, idx_non] = A*B*A - V[idx_nan, :] .= NaN - V[:, idx_nan] .= NaN + V[nnancols, nnancols] = A*B*A + V[nancols, :] .= NaN + V[:, nancols] .= NaN end n = length(wts) n/(n-rnk)*V diff --git a/src/lm.jl b/src/lm.jl index c1a563c5..3e8ee036 100644 --- a/src/lm.jl +++ b/src/lm.jl @@ -43,23 +43,20 @@ end updateμ!(r::LmResp{V}, linPr) where {V<:FPVector} = updateμ!(r, convert(V, vec(linPr))) -function deviance(r::LmResp{T,<:UnitWeights}) where T - y = r.y - mu = r.mu - v = zero(eltype(y)) + zero(eltype(y)) - @inbounds @simd for i in eachindex(y,mu) - v += abs2(y[i] - mu[i]) - end - return v -end - function deviance(r::LmResp{T,<:AbstractWeights}) where T y = r.y mu = r.mu - wts = r.wts - v = zero(eltype(y)) + zero(eltype(y)) * zero(eltype(wts)) - @inbounds @simd for i in eachindex(y,mu,wts) - v += abs2(y[i] - mu[i])*wts[i] + wts = r.wts + if wts isa UnitWeights + v = zero(eltype(y)) + zero(eltype(y)) + @inbounds @simd for i in eachindex(y,mu,wts) + v += abs2(y[i] - mu[i]) + end + else + v = zero(eltype(y)) + zero(eltype(y)) * zero(eltype(wts)) + @inbounds @simd for i in eachindex(y,mu,wts) + v += abs2(y[i] - mu[i])*wts[i] + end end return v end @@ -88,7 +85,7 @@ function residuals(r::LmResp; weighted=false) elseif r.wts isa AbstractWeights sqrt.(wts).*res else - throw(ArgumentError("`weighted=true` allowed only for weighted models.")) + res end end @@ -355,10 +352,11 @@ function StatsBase.cooksdistance(obj::LinearModel) hii = diag(X * inv(XtX) * X') D = @. u^2 * (hii / (1 - hii)^2) / (k*mse) else - pp = obj.pp + pp = obj.pp C = invchol(pp) - idx_nan, idx_non = nancolidx(C) - Xc = view(X, :, idx_non) + nancols = [all(isnan, col) for col in eachcol(C)] + nnancols = .!nancols + Xc = view(X, :, nnancols) XtX = (Xc)'*Xc hii = diag(Xc * inv(XtX) * Xc') D = @. u^2 * (hii / (1 - hii)^2) / (k*mse) From 6df401be072d8decb47dba091485be443c158969 Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Sat, 24 Sep 2022 18:34:50 +0200 Subject: [PATCH 046/106] Deals with review comments --- src/glmfit.jl | 10 +++++----- src/linpred.jl | 28 +++++++++++++++------------- src/lm.jl | 6 +++--- 3 files changed, 23 insertions(+), 21 deletions(-) diff --git a/src/glmfit.jl b/src/glmfit.jl index 9acff144..aa76d503 100644 --- a/src/glmfit.jl +++ b/src/glmfit.jl @@ -309,7 +309,7 @@ function loglikelihood(r::GlmResp{T,D,L,<:AbstractWeights}) where {T,D,L} N = length(y) δ = deviance(r) ϕ = δ/n - if wts isa FrequencyWeights || wts isa UnitWeights + if wts isa Union{FrequencyWeights, UnitWeights} @inbounds for i in eachindex(y, mu) ll += loglik_obs(d, y[i], mu[i], wts[i], ϕ) end @@ -335,7 +335,7 @@ function nullloglikelihood(m::GeneralizedLinearModel) δ = nulldeviance(m) ϕ = nulldeviance(m)/nobs(m) N = length(y) - if wts isa FrequencyWeights || wts isa UnitWeights + if wts isa Union{FrequencyWeights, UnitWeights} @inbounds for i in eachindex(y, wts) ll += loglik_obs(d, y[i], mu, wts[i], ϕ) end @@ -573,9 +573,9 @@ function fit(::Type{M}, throw(DimensionMismatch("number of rows in X and y must match")) end # For backward compatibility accept wts as AbstractArray and coerce them to FrequencyWeights - _wts = if isa(wts, AbstractWeights) + _wts = if wts isa AbstractWeights wts - elseif isa(wts, AbstractVector) + elseif wts isa AbstractVector Base.depwarn("Passing weights as vector is deprecated in favor of explicitly using " * "`AnalyticalWeights`, `ProbabilityWeights`, or `FrequencyWeights`. Proceeding " * "by coercing `wts` to `FrequencyWeights`", :fit) @@ -721,7 +721,7 @@ function initialeta!(eta::AbstractVector, return eta end -function _initialeta!(eta, dist, link, y, wts::UnitWeights) +function _initialeta!(eta, dist, link, y, wts::AbstractWeights) if wts isa UnitWeights @inbounds @simd for i in eachindex(y, eta) μ = mustart(dist, y[i], 1) diff --git a/src/linpred.jl b/src/linpred.jl index 44cc2cc5..341088f1 100644 --- a/src/linpred.jl +++ b/src/linpred.jl @@ -101,16 +101,18 @@ mutable struct DensePredChol{T<:BlasReal,C,W<:AbstractVector} <: DensePred scratchm2::Matrix{T} end -function DensePredChol(X::AbstractMatrix, pivot::Bool, wts::AbstractWeights) - scr = similar(X) - if wts isa UnitWeights - copy!(scr, X) - else +function DensePredChol(X::AbstractMatrix, pivot::Bool, wts::AbstractWeights) + if wts isa UnitWeights + F = Hermitian(float(X'X)) + T = eltype(F) + scr = similar(X, T) + else + T = promote_type(eltype(wts), eltype(X)) + T = promote_type(eltype(float(zero(T))), T) + scr = similar(X, T) mul!(scr, Diagonal(wts), X) - end - - F = Hermitian(float(scr'X)) - T = eltype(F) + F = Hermitian(float(scr'X)) + end F = pivot ? pivoted_cholesky!(F, tol = -one(T), check = false) : cholesky!(F) DensePredChol(Matrix{T}(X), zeros(T, size(X, 2)), @@ -135,15 +137,15 @@ function cholesky(p::DensePredChol{T}) where T<:FP end cholesky!(p::DensePredQR{T}) where {T<:FP} = Cholesky{T,typeof(p.X)}(p.qr.R, 'U', 0) -function delbeta!(p::DensePredChol{T,<:Cholesky,W<:AbstractWeights}, r::Vector{T}) where T<:BlasReal - X = W isa UnitWeights ? copy!(p.scratchm1, p.X) : mul!(p.scratchm1, Diagonal(p.wts), p.X) +function delbeta!(p::DensePredChol{T,<:Cholesky,<:AbstractWeights}, r::Vector{T}) where T<:BlasReal + X = p.wts isa UnitWeights ? copy!(p.scratchm1, p.X) : mul!(p.scratchm1, Diagonal(p.wts), p.X) ldiv!(p.chol, mul!(p.delbeta, transpose(X), r)) p end -function delbeta!(p::DensePredChol{T,<:CholeskyPivoted,W<:AbstractWeights}, r::Vector{T}) where T<:BlasReal +function delbeta!(p::DensePredChol{T,<:CholeskyPivoted,<:AbstractWeights}, r::Vector{T}) where T<:BlasReal ch = p.chol - X = W isa UnitWeights ? copy!(p.scratchm1, p.X) : mul!(p.scratchm1, Diagonal(p.wts), p.X) + X = p.wts isa UnitWeights ? copy!(p.scratchm1, p.X) : mul!(p.scratchm1, Diagonal(p.wts), p.X) delbeta = mul!(p.delbeta, adjoint(X), r) rnk = rank(ch) if rnk == length(delbeta) diff --git a/src/lm.jl b/src/lm.jl index 3e8ee036..c96b8567 100644 --- a/src/lm.jl +++ b/src/lm.jl @@ -64,7 +64,7 @@ end weights(r::LmResp) = r.wts nobs(r::LmResp{<:Any,W}) where {W<:FrequencyWeights} = sum(r.wts) -nobs(r::LmResp) = oftype(sum(one(eltype(r.wts))), length(r.y)) +nobs(r::LmResp{<:Any,W}) where {W<:AbstractWeights} = oftype(sum(one(eltype(r.wts))), length(r.y)) function loglikelihood(r::LmResp{T,<:Union{UnitWeights, FrequencyWeights}}) where T n = nobs(r) @@ -158,9 +158,9 @@ function fit(::Type{LinearModel}, X::AbstractMatrix{<:Real}, y::AbstractVector{< dropcollinear = allowrankdeficient_dep end # For backward compatibility accept wts as AbstractArray and coerce them to FrequencyWeights - _wts = if isa(wts, AbstractWeights) + _wts = if wts isa AbstractWeights wts - elseif isa(wts, AbstractVector) + elseif wts isa AbstractVector Base.depwarn("Passing weights as vector is deprecated in favor of explicitly using " * "`AnalyticalWeights`, `ProbabilityWeights`, or `FrequencyWeights`. Proceeding " * "by coercing `wts` to `FrequencyWeights`", :fit) From ca15eb89fb808750dd40589c89fd5c7afd36dd98 Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Sat, 24 Sep 2022 18:52:46 +0200 Subject: [PATCH 047/106] Small fix --- docs/src/index.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/src/index.md b/docs/src/index.md index 9a1137bc..2da0d83a 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -218,13 +218,13 @@ regardless of the type of weights. ```jldoctest weights julia> loglikelihood(m_aweights) --16.29630756138424 +-16.296307561384253 julia> loglikelihood(m_fweights) -25.51860961756451 julia> loglikelihood(m_pweights) --16.29630756138424 +16.296307561384253 ``` ## Comparing models with F-test From 0c18ae96e86d6caed0bf6b9fb9c44c76350881c4 Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Sun, 25 Sep 2022 11:17:19 +0200 Subject: [PATCH 048/106] Small fix --- docs/src/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/index.md b/docs/src/index.md index 2da0d83a..cb8c78b1 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -224,7 +224,7 @@ julia> loglikelihood(m_fweights) -25.51860961756451 julia> loglikelihood(m_pweights) -16.296307561384253 +-16.296307561384253 ``` ## Comparing models with F-test From 54d68d113d7d23927e12ccb10a0706f5dcfee47b Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Mon, 3 Oct 2022 13:06:27 +0200 Subject: [PATCH 049/106] Apply suggestions from code review Co-authored-by: Milan Bouchet-Valat --- src/glmfit.jl | 10 ++++++---- src/linpred.jl | 3 +-- src/lm.jl | 2 +- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/glmfit.jl b/src/glmfit.jl index aa76d503..70083340 100644 --- a/src/glmfit.jl +++ b/src/glmfit.jl @@ -37,7 +37,8 @@ function GlmResp(y::V, d::D, l::L, η::V, μ::V, off::V, wts::W) where {V<:FPVec ## We don't support custom types of weights that a user may define if !(wts isa Union{FrequencyWeights, AnalyticWeights, ProbabilityWeights, UnitWeights}) - throw(ArgumentError("The type of `wts` was $W. The supported weights type are `FrequencyWeights`, `AnalyticWeights`, `ProbabilityWeights`, or a `UnitWeights`.")) + throw(ArgumentError("The type of `wts` was $W. The supported weights types are " * + "`FrequencyWeights`, `AnalyticWeights`, `ProbabilityWeights` and `UnitWeights`.")) end # Lengths of y, η, and η all need to be n @@ -759,7 +760,6 @@ function residuals(r::GlmResp; weighted::Bool=false) y, η, μ = r.y, r.eta, r.mu dres = similar(μ) - @inbounds for i in eachindex(y, μ) μi = μ[i] yi = y[i] @@ -785,6 +785,8 @@ function momentmatrix(m::GeneralizedLinearModel) return (X .* r) ./ d end -variancestructure(rr::GlmResp{<: Any, <: Union{Normal, Poisson, Binomial, Bernoulli, NegativeBinomial}}, r) = 1 -variancestructure(rr::GlmResp{<: Any, <: Union{Gamma, Geometric, InverseGaussian}}, r) = +variancestructure(rr::GlmResp{<:Any, <:Union{Normal, Poisson, Binomial, Bernoulli, NegativeBinomial}}, + r::AbstractArray) = 1 +variancestructure(rr::GlmResp{<:Any, <:Union{Gamma, Geometric, InverseGaussian}}, + r::AbstractArray) = sum(abs2, r)/sum(rr.wrkwt) diff --git a/src/linpred.jl b/src/linpred.jl index 341088f1..ce4113e7 100644 --- a/src/linpred.jl +++ b/src/linpred.jl @@ -107,8 +107,7 @@ function DensePredChol(X::AbstractMatrix, pivot::Bool, wts::AbstractWeights) T = eltype(F) scr = similar(X, T) else - T = promote_type(eltype(wts), eltype(X)) - T = promote_type(eltype(float(zero(T))), T) + T = float(promote_type(eltype(wts), eltype(X))) scr = similar(X, T) mul!(scr, Diagonal(wts), X) F = Hermitian(float(scr'X)) diff --git a/src/lm.jl b/src/lm.jl index c96b8567..ed76003a 100644 --- a/src/lm.jl +++ b/src/lm.jl @@ -352,7 +352,7 @@ function StatsBase.cooksdistance(obj::LinearModel) hii = diag(X * inv(XtX) * X') D = @. u^2 * (hii / (1 - hii)^2) / (k*mse) else - pp = obj.pp + pp = obj.pp C = invchol(pp) nancols = [all(isnan, col) for col in eachcol(C)] nnancols = .!nancols From d6d4e6b0c0da3401a0bff197914db33f0fb14d0f Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Mon, 3 Oct 2022 16:52:53 +0200 Subject: [PATCH 050/106] Fix vcov dispatch for vcov --- src/linpred.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/linpred.jl b/src/linpred.jl index ce4113e7..88437055 100644 --- a/src/linpred.jl +++ b/src/linpred.jl @@ -258,7 +258,7 @@ function vcov(x::LinPredModel) _vcov(x.pp, u, d) end -_vcov(pp::LinPred, u::AbstractVector, d::Real) = rmul!(invchol(pp), d) +_vcov(pp::DensePredChol{T, <:Cholesky, <:Any}, u::AbstractVector, d::Real) where {T} = rmul!(invchol(pp), d) function _vcov(pp::DensePredChol{T, <:Cholesky, <:ProbabilityWeights}, u::AbstractVector, d::Real) where {T} wts = pp.wts From b457d74a8ad8932e9675091f27ea05cf2724b6fd Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Mon, 3 Oct 2022 16:57:22 +0200 Subject: [PATCH 051/106] Fix dispatch of _vcov --- src/linpred.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/linpred.jl b/src/linpred.jl index 88437055..6dd79c3c 100644 --- a/src/linpred.jl +++ b/src/linpred.jl @@ -258,7 +258,7 @@ function vcov(x::LinPredModel) _vcov(x.pp, u, d) end -_vcov(pp::DensePredChol{T, <:Cholesky, <:Any}, u::AbstractVector, d::Real) where {T} = rmul!(invchol(pp), d) +_vcov(pp::DensePredChol{T, <:Union{Cholesky, CholeskyPivoted}, <:Any}, u::AbstractVector, d::Real) where {T} = rmul!(invchol(pp), d) function _vcov(pp::DensePredChol{T, <:Cholesky, <:ProbabilityWeights}, u::AbstractVector, d::Real) where {T} wts = pp.wts From b087679f997590401892febee7d56f3d1f044f91 Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Mon, 3 Oct 2022 17:29:53 +0200 Subject: [PATCH 052/106] Revert changes --- src/linpred.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/linpred.jl b/src/linpred.jl index 6dd79c3c..ce4113e7 100644 --- a/src/linpred.jl +++ b/src/linpred.jl @@ -258,7 +258,7 @@ function vcov(x::LinPredModel) _vcov(x.pp, u, d) end -_vcov(pp::DensePredChol{T, <:Union{Cholesky, CholeskyPivoted}, <:Any}, u::AbstractVector, d::Real) where {T} = rmul!(invchol(pp), d) +_vcov(pp::LinPred, u::AbstractVector, d::Real) = rmul!(invchol(pp), d) function _vcov(pp::DensePredChol{T, <:Cholesky, <:ProbabilityWeights}, u::AbstractVector, d::Real) where {T} wts = pp.wts From a44e137f8f1f22c811a3c158616cc7f759408dee Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Mon, 3 Oct 2022 17:41:54 +0200 Subject: [PATCH 053/106] Update src/glmfit.jl Co-authored-by: Milan Bouchet-Valat --- src/glmfit.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/glmfit.jl b/src/glmfit.jl index 70083340..a94c5736 100644 --- a/src/glmfit.jl +++ b/src/glmfit.jl @@ -503,7 +503,7 @@ function StatsBase.fit!(m::AbstractGLM, r = m.rr V = typeof(r.y) - r.y = copy!(r.y, y) + copy!(r.y, y) isa(offset, Nothing) || copy!(r.offset, offset) initialeta!(r.eta, r.d, r.l, r.y, r.wts, r.offset) updateμ!(r, r.eta) From 11db2c4e81cffc7c163a1b29e332c75efdf77db7 Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Mon, 3 Oct 2022 17:45:11 +0200 Subject: [PATCH 054/106] Fix weighted keyword in modelmatrix --- src/linpred.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/linpred.jl b/src/linpred.jl index ce4113e7..7b5454d7 100644 --- a/src/linpred.jl +++ b/src/linpred.jl @@ -311,8 +311,8 @@ end modelframe(obj::LinPredModel) = obj.fr -function modelmatrix(obj::LinPredModel; weighted::Bool=false) - if isweighted(obj) +function modelmatrix(obj::LinPredModel; weighted::Bool=isweighted(obj)) + if weighted mul!(obj.pp.scratchm1, Diagonal(sqrt.(obj.pp.wts)), obj.pp.X) else obj.pp.X From b649d4f0ef62f076d60b73b16874ab5fc77942be Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Mon, 3 Oct 2022 17:46:29 +0200 Subject: [PATCH 055/106] perf in nulldeviance for unweighted models --- src/lm.jl | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/lm.jl b/src/lm.jl index ed76003a..684533ef 100644 --- a/src/lm.jl +++ b/src/lm.jl @@ -206,7 +206,7 @@ function nulldeviance(obj::LinearModel) wts = weights(obj) if hasintercept(obj) - m = mean(y, wts) + m = mean(y, wts) else @warn("Starting from GLM.jl 1.8, null model is defined as having no predictor at all " * "when a model without an intercept is passed.") @@ -214,8 +214,14 @@ function nulldeviance(obj::LinearModel) end v = zero(eltype(y))*zero(eltype(wts)) - @inbounds @simd for i = eachindex(y,wts) - v += abs2(y[i] - m)*wts[i] + if wts isa UnitWeights + @inbounds @simd for i = eachindex(y,wts) + v += abs2(y[i] - m) + end + else + @inbounds @simd for i = eachindex(y,wts) + v += abs2(y[i] - m)*wts[i] + end end return v end From 29c43cb893412cc99d5ae7961ccc3cfd4db6d89e Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Wed, 19 Oct 2022 19:15:42 +0200 Subject: [PATCH 056/106] Fixed std error for probability weights --- src/glmfit.jl | 19 ++++++++++-------- src/linpred.jl | 53 ++++++++++++++++++-------------------------------- src/lm.jl | 4 ++++ 3 files changed, 34 insertions(+), 42 deletions(-) diff --git a/src/glmfit.jl b/src/glmfit.jl index a94c5736..cd75085e 100644 --- a/src/glmfit.jl +++ b/src/glmfit.jl @@ -322,6 +322,10 @@ function loglikelihood(r::GlmResp{T,D,L,<:AbstractWeights}) where {T,D,L} return ll end +function loglikelihood(r::GlmResp{T,D,L,<:AbstractWeights}) where {T,D,L} + throw(ArgumentError("The `loglikelihood` for probability weighted models is not currently supported.")) +end + function nullloglikelihood(m::GeneralizedLinearModel) r = m.rr wts = weights(m) @@ -780,13 +784,12 @@ momentmatrix(m::RegressionModel) = momentmatrix(m.model) function momentmatrix(m::GeneralizedLinearModel) X = modelmatrix(m; weighted=false) - r = m.rr.wrkwt .* m.rr.wrkresid - d = variancestructure(m.rr, r) - return (X .* r) ./ d + r = m.rr.wrkwt .* m.rr.wrkresid + return mul!(pp.scratchm1, Diagonal(r), pp.X) end -variancestructure(rr::GlmResp{<:Any, <:Union{Normal, Poisson, Binomial, Bernoulli, NegativeBinomial}}, - r::AbstractArray) = 1 -variancestructure(rr::GlmResp{<:Any, <:Union{Gamma, Geometric, InverseGaussian}}, - r::AbstractArray) = - sum(abs2, r)/sum(rr.wrkwt) +# variancestructure(rr::GlmResp{<:Any, <:Union{Normal, Poisson, Binomial, Bernoulli, NegativeBinomial}}, +# r::AbstractArray) = 1 +# variancestructure(rr::GlmResp{<:Any, <:Union{Gamma, Geometric, InverseGaussian}}, +# r::AbstractArray) = +# sum(abs2, r)/sum(rr.wrkwt) diff --git a/src/linpred.jl b/src/linpred.jl index 7b5454d7..fe510264 100644 --- a/src/linpred.jl +++ b/src/linpred.jl @@ -137,7 +137,7 @@ end cholesky!(p::DensePredQR{T}) where {T<:FP} = Cholesky{T,typeof(p.X)}(p.qr.R, 'U', 0) function delbeta!(p::DensePredChol{T,<:Cholesky,<:AbstractWeights}, r::Vector{T}) where T<:BlasReal - X = p.wts isa UnitWeights ? copy!(p.scratchm1, p.X) : mul!(p.scratchm1, Diagonal(p.wts), p.X) + X = p.wts isa UnitWeights ? p.scratchm1 .= p.X : mul!(p.scratchm1, Diagonal(p.wts), p.X) ldiv!(p.chol, mul!(p.delbeta, transpose(X), r)) p end @@ -252,49 +252,34 @@ end invchol(x::SparsePredChol) = cholesky!(x) \ Matrix{Float64}(I, size(x.X, 2), size(x.X, 2)) +working_residuals(x::LinPredModel) = x.rr.wrkresid +working_weights(x::LinPredModel) = x.rr.wrkwt + function vcov(x::LinPredModel) d = dispersion(x, true) - u = residuals(x; weighted = isweighted(x)) - _vcov(x.pp, u, d) + u = working_residuals(x).*working_weights(x) + V = vcov(x.pp, u, d) + return (nobs(x)/dof_residual(x)).*V end -_vcov(pp::LinPred, u::AbstractVector, d::Real) = rmul!(invchol(pp), d) - -function _vcov(pp::DensePredChol{T, <:Cholesky, <:ProbabilityWeights}, u::AbstractVector, d::Real) where {T} - wts = pp.wts - Z = mul!(pp.scratchm1, Diagonal(sqrt.(wts).*u), pp.X) - XtW2X = Z'Z - invXtWX = invchol(pp) - V = invXtWX*XtW2X*invXtWX - n = length(wts) - k = length(pp.delbeta) - n/(n-k)*V -end - -function _vcov(pp::DensePredChol{T, <:CholeskyPivoted, <:ProbabilityWeights}, u::AbstractVector, d::Real) where {T} - wts = pp.wts - Z = mul!(pp.scratchm1, Diagonal(sqrt.(wts).*u), pp.X) - ch = pp.chol - rnk = rank(ch) - p = length(pp.delbeta) - invXtWX = invchol(pp) - if rnk == p - B = Z'Z - A = invXtWX - V = A*B*A - else - nancols = [all(isnan, col) for col in eachcol(invXtWX)] +function vcov(pp::DensePredChol{T, C, <:ProbabilityWeights}, u::AbstractVector, d::Real) where {T, C} + Z = mul!(pp.scratchm1, Diagonal(u), pp.X) + @show Z + A = invchol(pp) + if C isa CholeskyPivoted && rank(pp.chol) != size(B, 1) + nancols = [all(isnan, col) for col in eachcol(B)] nnancols = .!nancols Zc = view(Z, :, nnancols) - B = Zc'Zc - A = view(invXtWX, nnancols, nnancols) + B = view(Zc'Zc, nnancols, nnancols) V = similar(pp.scratchm2) - V[nnancols, nnancols] = A*B*A + V[nnancols, nnancols] .= A*B*A V[nancols, :] .= NaN V[:, nancols] .= NaN + else + B = mul!(pp.scratchm2, Z', Z) + V = A*B*A end - n = length(wts) - n/(n-rnk)*V + return V end function cor(x::LinPredModel) diff --git a/src/lm.jl b/src/lm.jl index 684533ef..5c38a1c6 100644 --- a/src/lm.jl +++ b/src/lm.jl @@ -77,6 +77,10 @@ function loglikelihood(r::LmResp{T,<:AbstractWeights}) where T 0.5*(n - N * (log(2π * deviance(r)/N) + 1)) end +function loglikelihood(r::LmResp{T,<:ProbabilityWeights}) where T + throw(ArgumentError("The `loglikelihood` for probability weighted models is not currently supported.")) +end + function residuals(r::LmResp; weighted=false) wts = weights(r) res = r.y - r.mu From 279e5339008d3b8082d5c5f931397308ae450dff Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Thu, 20 Oct 2022 18:13:47 +0200 Subject: [PATCH 057/106] Getting there (& switch Analytics to Importance) --- src/GLM.jl | 18 ++++++- src/glmfit.jl | 38 +++++++------- src/linpred.jl | 39 +++++++++++---- src/lm.jl | 14 +++--- test/runtests.jl | 128 ++++++++++++++++++++++++++++++++++++++++++----- 5 files changed, 189 insertions(+), 48 deletions(-) diff --git a/src/GLM.jl b/src/GLM.jl index ea6b1390..05290281 100644 --- a/src/GLM.jl +++ b/src/GLM.jl @@ -19,8 +19,22 @@ module GLM export coef, coeftable, confint, deviance, nulldeviance, dof, dof_residual, loglikelihood, nullloglikelihood, nobs, stderror, vcov, residuals, predict, fitted, fit, fit!, model_response, response, modelmatrix, r2, r², adjr2, adjr², - cooksdistance, hasintercept, dispersion, weights, AnalyticWeights, ProbabilityWeights, FrequencyWeights, - UnitWeights, aweights, fweights, pweights + cooksdistance, hasintercept, dispersion, weights, ImportanceWeights, ProbabilityWeights, FrequencyWeights, + UnitWeights, uweights, fweights, pweights, iweights + + + ## Should eventually be added to StatsBase + """ + iweights(vs) + Construct an `ImportanceWeights` vector from array `vs`. + See the documentation for [`ImportanceWeights`](@ref) for more details. + """ + const RealArray{T<:Real,N} = AbstractArray{T,N} + const RealVector{T<:Real} = AbstractArray{T,1} + + StatsBase.@weights ImportanceWeights + iweights(vs::Vector{<:Real}) = ImportanceWeights(vs) + iweights(vs::RealArray) = ImportanceWeights(vec(vs)) export # types diff --git a/src/glmfit.jl b/src/glmfit.jl index cd75085e..255b418c 100644 --- a/src/glmfit.jl +++ b/src/glmfit.jl @@ -36,9 +36,9 @@ function GlmResp(y::V, d::D, l::L, η::V, μ::V, off::V, wts::W) where {V<:FPVec checky(y, d) ## We don't support custom types of weights that a user may define - if !(wts isa Union{FrequencyWeights, AnalyticWeights, ProbabilityWeights, UnitWeights}) + if !(wts isa Union{FrequencyWeights, ImportanceWeights, ProbabilityWeights, UnitWeights}) throw(ArgumentError("The type of `wts` was $W. The supported weights types are " * - "`FrequencyWeights`, `AnalyticWeights`, `ProbabilityWeights` and `UnitWeights`.")) + "`FrequencyWeights`, `ImportanceWeights`, `ProbabilityWeights` and `UnitWeights`.")) end # Lengths of y, η, and η all need to be n @@ -76,7 +76,7 @@ GlmResp(y::AbstractVector{<:Real}, d::D, l::L, off::AbstractVector{<:Real}, deviance(r::GlmResp) = sum(r.devresid) weights(r::GlmResp) = r.wts -isweighted(r::GlmResp) = weights(r) isa Union{AnalyticWeights, FrequencyWeights, ProbabilityWeights} +isweighted(r::GlmResp) = weights(r) isa Union{ImportanceWeights, FrequencyWeights, ProbabilityWeights} """ cancancel(r::GlmResp{V,D,L}) @@ -302,8 +302,7 @@ loglikelihood(m::AbstractGLM) = loglikelihood(m.rr) function loglikelihood(r::GlmResp{T,D,L,<:AbstractWeights}) where {T,D,L} y = r.y mu = r.mu - wts = weights(r) - sumwt = sum(wts) + wts = weights(r) d = r.d ll = zero(eltype(mu)) n = nobs(r) @@ -314,18 +313,19 @@ function loglikelihood(r::GlmResp{T,D,L,<:AbstractWeights}) where {T,D,L} @inbounds for i in eachindex(y, mu) ll += loglik_obs(d, y[i], mu[i], wts[i], ϕ) end + elseif wts isa ImportanceWeights + @inbounds for i in eachindex(y, mu, wts) + #ll += loglik_obs(d, y[i], mu[i], wts[i], ϕ) + ll += loglik_apweights_obs(d, y[i], mu[i], wts[i], δ, wts.sum, N) + end else @inbounds for i in eachindex(y, mu, wts) - ll += loglik_apweights_obs(d, y[i], mu[i], wts[i], δ, sumwt, N) + throw(ArgumentError("The `loglikelihood` for probability weighted models is not currently supported.")) end end return ll end -function loglikelihood(r::GlmResp{T,D,L,<:AbstractWeights}) where {T,D,L} - throw(ArgumentError("The `loglikelihood` for probability weighted models is not currently supported.")) -end - function nullloglikelihood(m::GeneralizedLinearModel) r = m.rr wts = weights(m) @@ -582,7 +582,7 @@ function fit(::Type{M}, wts elseif wts isa AbstractVector Base.depwarn("Passing weights as vector is deprecated in favor of explicitly using " * - "`AnalyticalWeights`, `ProbabilityWeights`, or `FrequencyWeights`. Proceeding " * + "`ImportanceWeights`, `ProbabilityWeights`, or `FrequencyWeights`. Proceeding " * "by coercing `wts` to `FrequencyWeights`", :fit) fweights(wts) else @@ -784,12 +784,14 @@ momentmatrix(m::RegressionModel) = momentmatrix(m.model) function momentmatrix(m::GeneralizedLinearModel) X = modelmatrix(m; weighted=false) - r = m.rr.wrkwt .* m.rr.wrkresid - return mul!(pp.scratchm1, Diagonal(r), pp.X) + r = m.rr.wrkwt .* m.rr.wrkresid + d = varstruct(m.rr, r) + return mul!(m.pp.scratchm1, Diagonal(r.*d), m.pp.X) end -# variancestructure(rr::GlmResp{<:Any, <:Union{Normal, Poisson, Binomial, Bernoulli, NegativeBinomial}}, -# r::AbstractArray) = 1 -# variancestructure(rr::GlmResp{<:Any, <:Union{Gamma, Geometric, InverseGaussian}}, -# r::AbstractArray) = -# sum(abs2, r)/sum(rr.wrkwt) +#res <- res * sum(weights(x, "working"), na.rm = TRUE)/sum(res^2, na.rm = TRUE) + +varstruct(rr::GlmResp{<:Any, <:Union{Normal, Poisson, Binomial, Bernoulli, NegativeBinomial}}, + r::AbstractArray) = 1 +varstruct(rr::GlmResp{<:Any, <:Union{Gamma, Geometric, InverseGaussian}}, + r::AbstractArray) = sum(rr.wrkwt)/sum(abs2, r) diff --git a/src/linpred.jl b/src/linpred.jl index fe510264..46fb35fd 100644 --- a/src/linpred.jl +++ b/src/linpred.jl @@ -259,29 +259,48 @@ function vcov(x::LinPredModel) d = dispersion(x, true) u = working_residuals(x).*working_weights(x) V = vcov(x.pp, u, d) - return (nobs(x)/dof_residual(x)).*V + if x.pp.wts isa ProbabilityWeights + V*(nobs(x)/dof_residual(x)) + else + V + end end +vcov(x::DensePredChol{T, C, P}, u::AbstractVector, d::Real) where {T,C,P} = rmul!(invchol(x), d) +vcov(x::SparsePredChol{T, C, M, P}, u::AbstractVector, d::Real) where {T,C,M,P} = rmul!(invchol(x), d) + function vcov(pp::DensePredChol{T, C, <:ProbabilityWeights}, u::AbstractVector, d::Real) where {T, C} Z = mul!(pp.scratchm1, Diagonal(u), pp.X) - @show Z A = invchol(pp) - if C isa CholeskyPivoted && rank(pp.chol) != size(B, 1) - nancols = [all(isnan, col) for col in eachcol(B)] + if pp.chol isa CholeskyPivoted && rank(pp.chol) != size(A, 1) + nancols = [all(isnan, col) for col in eachcol(A)] nnancols = .!nancols - Zc = view(Z, :, nnancols) - B = view(Zc'Zc, nnancols, nnancols) + Zv = view(Z, :, nnancols) + B = Zv'Zv + Av = view(A, nnancols, nnancols) V = similar(pp.scratchm2) - V[nnancols, nnancols] .= A*B*A + V[nnancols, nnancols] .= Av*B*Av V[nancols, :] .= NaN V[:, nancols] .= NaN else B = mul!(pp.scratchm2, Z', Z) V = A*B*A - end - return V + end + #n = length(pp.wts) + #df_correction = n/(n-rank(pp.chol)) + return V#*df_correction end +function vcov(pp::SparsePredChol{T, C, M, <:ProbabilityWeights}, u::AbstractVector, d::Real) where {T, C, M} + ## Note: SparsePredChol does not handle rankdeficient cases + Z = mul!(pp.scratchm1, Diagonal(u), pp.X) + A = invchol(pp) + B = Z'*Z + V = A*B*A + return V +end + + function cor(x::LinPredModel) Σ = vcov(x) invstd = inv.(sqrt.(diag(Σ))) @@ -317,7 +336,7 @@ weights(obj::RegressionModel) = weights(obj.model) weights(obj::LinPredModel) = weights(obj.rr) isweighted(obj::RegressionModel) = isweighted(obj.model) -isweighted(obj::LinPredModel) = weights(obj) isa Union{FrequencyWeights, AnalyticWeights, ProbabilityWeights} +isweighted(obj::LinPredModel) = weights(obj) isa Union{FrequencyWeights, ImportanceWeights, ProbabilityWeights} coef(x::LinPred) = x.beta0 coef(obj::LinPredModel) = coef(obj.pp) diff --git a/src/lm.jl b/src/lm.jl index 5c38a1c6..cee5d8c8 100644 --- a/src/lm.jl +++ b/src/lm.jl @@ -128,7 +128,7 @@ const FIT_LM_DOC = """ The keyword argument `wts` can be an `AbstractWeights` vector specifying prior weights for observations. Allowed types are: - `UnitWeights`: no weighting (all weights equal to 1). - - `AnalyticaWeights`: describe a non-random relative importance (usually between 0 and 1) + - `Analyticiweights`: describe a non-random relative importance (usually between 0 and 1) for each observation. - `FrequencyWeights`: describe the number of times (or frequency) each observation was seen. - `ProbabilityWeights`: represent the inverse of the sampling probability for each observation, @@ -162,11 +162,11 @@ function fit(::Type{LinearModel}, X::AbstractMatrix{<:Real}, y::AbstractVector{< dropcollinear = allowrankdeficient_dep end # For backward compatibility accept wts as AbstractArray and coerce them to FrequencyWeights - _wts = if wts isa AbstractWeights + _wts = if wts isa Union{FrequencyWeights, ImportanceWeights, ProbabilityWeights, UnitWeights} wts elseif wts isa AbstractVector Base.depwarn("Passing weights as vector is deprecated in favor of explicitly using " * - "`AnalyticalWeights`, `ProbabilityWeights`, or `FrequencyWeights`. Proceeding " * + "`ImportanceWeights`, `ProbabilityWeights`, or `FrequencyWeights`. Proceeding " * "by coercing `wts` to `FrequencyWeights`", :fit) fweights(wts) else @@ -207,8 +207,7 @@ For linear models, the deviance of the null model is equal to the total sum of s """ function nulldeviance(obj::LinearModel) y = obj.rr.y - wts = weights(obj) - + wts = obj.pp.wts if hasintercept(obj) m = mean(y, wts) else @@ -225,7 +224,7 @@ function nulldeviance(obj::LinearModel) else @inbounds @simd for i = eachindex(y,wts) v += abs2(y[i] - m)*wts[i] - end + end end return v end @@ -248,6 +247,9 @@ loglikelihood(obj::LinearModel) = loglikelihood(obj.rr) r2(obj::LinearModel) = 1 - deviance(obj)/nulldeviance(obj) adjr2(obj::LinearModel) = 1 - (1 - r²(obj))*(nobs(obj)-hasintercept(obj))/dof_residual(obj) +working_residuals(x::LinearModel) = residuals(x) +working_weights(x::LinearModel) = x.pp.wts + function dispersion(x::LinearModel, sqr::Bool=false) dofr = dof_residual(x) ssqr = deviance(x.rr)/dofr diff --git a/test/runtests.jl b/test/runtests.jl index 2adb819a..7568c223 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -110,8 +110,8 @@ end @test isa(weights(lm_model), FrequencyWeights) @test isa(weights(glm_model), FrequencyWeights) - lm_model = lm(f, df, wts = aweights(df.weights)) - glm_model = glm(f, df, Normal(), wts = aweights(df.weights)) + lm_model = lm(f, df, wts = iweights(df.weights)) + glm_model = glm(f, df, Normal(), wts = iweights(df.weights)) @test isapprox(coef(lm_model), [154.35104595140706, 0.4836896390157505]) @test isapprox(coef(glm_model), [154.35104595140706, 0.4836896390157505]) @test isapprox(stderror(lm_model), [16.297055281313032, 0.014186793927918842]) @@ -129,8 +129,9 @@ end lm_model = lm(f, df, wts = pweights(df.weights)) glm_model = glm(f, df, Normal(), wts = pweights(df.weights)) - @test vcov(lm_model) ≈ [2230.3626444482406 -2.423827176758377; -2.4238271767583766 0.0026792687760410199] - @test vcov(glm_model) ≈ [2230.3626444482406 -2.423827176758377; -2.4238271767583766 0.0026792687760410199] + ## Standard errors from STATA + @test stderror(lm_model) ≈ [ 47.22671, .0517617] atol=1e-05 + @test stderror(glm_model) ≈ [ 47.22671, .0517617] atol=1e-05 ## Test the non full rank case df.Income2 = df.Income*2 @@ -192,10 +193,10 @@ end @testset "Passing wts (depwarn)" begin df = DataFrame(x=["a", "b", "c"], y=[1, 2, 3], wts = [3,3,3]) @test_logs (:warn, "Passing weights as vector is deprecated in favor of explicitly using " * - "`AnalyticalWeights`, `ProbabilityWeights`, or `FrequencyWeights`. Proceeding " * + "`ImportanceWeights`, `ProbabilityWeights`, or `FrequencyWeights`. Proceeding " * "by coercing `wts` to `FrequencyWeights`") lm(@formula(y~x), df; wts=df.wts) @test_logs (:warn, "Passing weights as vector is deprecated in favor of explicitly using " * - "`AnalyticalWeights`, `ProbabilityWeights`, or `FrequencyWeights`. Proceeding " * + "`ImportanceWeights`, `ProbabilityWeights`, or `FrequencyWeights`. Proceeding " * "by coercing `wts` to `FrequencyWeights`") glm(@formula(y~x), df, Normal(), IdentityLink(); wts=df.wts) end @@ -609,10 +610,10 @@ admit_agr = DataFrame(count = [28., 97, 93, 55, 33, 54, 28, 12], end -@testset "Aggregated Binomial LogitLink (AnalyticWeights)" begin +@testset "Aggregated Binomial LogitLink (ImportanceWeights)" begin for distr in (Binomial, Bernoulli) gm14 = fit(GeneralizedLinearModel, @formula(admit ~ 1 + rank), admit_agr, distr(), - wts=aweights(admit_agr.count)) + wts=iweights(admit_agr.count)) @test dof(gm14) == 4 @test nobs(gm14) == 8 @test isapprox(deviance(gm14), 474.9667184280627) @@ -907,10 +908,10 @@ end @test isapprox(Matrix(modelmatrix(gmsparse; weighted=true)), modelmatrix(gmdense; weighted=true)) end - gmsparsev = [fit(LinearModel, X, y; wts=aweights(wts)), - fit(LinearModel, X, sparse(y); wts=aweights(wts)), - fit(LinearModel, Matrix(X), sparse(y); wts=aweights(wts))] - gmdense = fit(LinearModel, Matrix(X), y; wts=aweights(wts)) + gmsparsev = [fit(LinearModel, X, y; wts=iweights(wts)), + fit(LinearModel, X, sparse(y); wts=iweights(wts)), + fit(LinearModel, Matrix(X), sparse(y); wts=iweights(wts))] + gmdense = fit(LinearModel, Matrix(X), y; wts=iweights(wts)) for gmsparse in gmsparsev @test isapprox(deviance(gmsparse), deviance(gmdense)) @@ -918,6 +919,20 @@ end @test isapprox(vcov(gmsparse), vcov(gmdense)) @test isapprox(Matrix(modelmatrix(gmsparse; weighted=true)), modelmatrix(gmdense; weighted=true)) end + + gmsparsev = [fit(LinearModel, X, y; wts=pweights(wts)), + fit(LinearModel, X, sparse(y); wts=pweights(wts)), + fit(LinearModel, Matrix(X), sparse(y); wts=pweights(wts))] + gmdense = fit(LinearModel, Matrix(X), y; wts=pweights(wts)) + + for gmsparse in gmsparsev + @test isapprox(deviance(gmsparse), deviance(gmdense)) + @test isapprox(coef(gmsparse), coef(gmdense)) + @test isapprox(vcov(gmsparse), vcov(gmdense)) + @test isapprox(Matrix(modelmatrix(gmsparse; weighted=true)), modelmatrix(gmdense; weighted=true)) + end + + end @testset "Predict" begin @@ -1551,3 +1566,92 @@ end @test predict(mdl1) ≈ predict(mdl2) end end + +@testset "momentmatrix" begin + @testset "Poisson" begin + dobson = DataFrame(Counts = [18.,17,15,20,10,20,25,13,12], + Outcome = categorical(repeat(string.('A':'C'), outer = 3)), + Treatment = categorical(repeat(string.('a':'c'), inner = 3)), + Weights = [0.3, 0.2, .9, .8, .2, .3, .4, .8, .9]) + + f = @formula(Counts ~ 1 + Outcome + Treatment) + + gm_pois = fit(GeneralizedLinearModel, f, dobson, Poisson()) + + mm0_pois = [-2.9999999792805436 -0.0 -0.0 -0.0 -0.0; + 3.666666776430482 3.666666776430482 0.0 0.0 0.0; + -0.6666666790442577 -0.0 -0.6666666790442577 -0.0 -0.0; + -1.0000000123284563 -0.0 -0.0 -1.0000000123284563 -0.0; + -3.3333334972350723 -3.3333334972350723 -0.0 -3.3333334972350723 -0.0; + 4.333333497138949 0.0 4.333333497138949 4.333333497138949 0.0; + 4.000000005907649 0.0 0.0 0.0 4.000000005907649; + -0.33333334610634496 -0.33333334610634496 -0.0 -0.0 -0.33333334610634496; + -3.6666667654825043 -0.0 -3.6666667654825043 -0.0 -3.6666667654825043] + + gm_poisw = fit(GeneralizedLinearModel, f, dobson, Poisson(), wts = dobson.Weights) + + mm0_poisw = [-0.9624647521850039 -0.0 -0.0 -0.0 -0.0; + 0.6901050904949885 0.6901050904949885 0.0 0.0 0.0; + 0.2723596655008255 0.0 0.2723596655008255 0.0 0.0; + -0.9062167634177802 -0.0 -0.0 -0.9062167634177802 -0.0; + -0.7002548908882033 -0.7002548908882033 -0.0 -0.7002548908882033 -0.0; + 1.606471661159352 0.0 1.606471661159352 1.606471661159352 0.0; + 1.8686815106332157 0.0 0.0 0.0 1.8686815106332157; + 0.010149793505874801 0.010149793505874801 0.0 0.0 0.010149793505874801; + -1.8788313148033928 -0.0 -1.8788313148033928 -0.0 -1.8788313148033928] + + + + @test mm0_pois ≈ GLM.momentmatrix(gm_pois) atol=1e-06 + @test mm0_poisw ≈ GLM.momentmatrix(gm_poisw) atol=1e-06 + end + @testset "Binomial" begin + f = @formula(admit ~ 1 + rank) + + gm_bin = fit(GeneralizedLinearModel, f, admit_agr, Binomial()) + mm0_bin = [-0.5 -0.0 -0.0 -0.0 + -0.5 -0.5 -0.0 -0.0 + -0.5 -0.0 -0.5 -0.0 + -0.5 -0.0 -0.0 -0.5 + 0.5 0.0 0.0 0.0 + 0.5 0.5 0.0 0.0 + 0.5 0.0 0.5 0.0 + 0.5 0.0 0.0 0.5] + @test mm0_bin ≈ GLM.momentmatrix(gm_bin) + + gm_binw = fit(GeneralizedLinearModel, f, admit_agr, Binomial(), wts=iweights(admit_agr.count)) + mm0_binw = [-15.1475 -0.0 -0.0 -0.0 + -34.6887 -34.6887 -0.0 -0.0 + -21.5207 -0.0 -21.5207 -0.0 + -9.85075 -0.0 -0.0 -9.85075 + 15.1475 0.0 0.0 0.0 + 34.6887 34.6887 0.0 0.0 + 21.5207 0.0 21.5207 0.0 + 9.85075 0.0 0.0 9.85075] + + @test mm0_binw ≈ GLM.momentmatrix(gm_binw) atol=1e-03 + Vcov =[ 0.0660173 -0.0660173 -0.0660173 -0.0660173 + -0.0660173 0.0948451 0.0660173 0.0660173 + -0.0660173 0.0660173 0.112484 0.0660173 + -0.0660173 0.0660173 0.0660173 0.167532] + + ## This is due to divverences between chol and qr + @test vcov(gm_binw) ≈ Vcov atol=1e-03 + + gm_binw = fit(GeneralizedLinearModel, f, admit_agr, Binomial(), wts=pweights(admit_agr.count)) + @test mm0_binw ≈ GLM.momentmatrix(gm_binw) atol=1e-03 + ## This are obtained from stata + ## glm admit i.rank [pweight=count], family(binomial) irls + coef_stata = [.16430305, -.75002998, -1.364698, -1.6867296] + @test coef(gm_binw) ≈ coef_stata atol=1e-05 + ## Stata: uses different residuals degrees of freedom. In this case (n-1) instead of (n-4) + ## Also need to give low tolerance (this small differences seem to be due to QR vs Cholesky) + @test stderror(gm_binw)*sqrt(5/7) ≈ [1.5118579, 2.1380899, 2.1380899, 2.1380899] atol=1e-02 + + ## Stata is also off with fweights + gm_binw = fit(GeneralizedLinearModel, f, admit_agr, Binomial(), wts=fweights(admit_agr.count)) + ## vs Stata (here stata uses the same df) + stata_se = [.25693835, .30796933, .33538667, .4093073] + @test stderror(gm_binw) ≈ stata_se atol = 0.001 + end +end \ No newline at end of file From afb145e76b1021e21d67d6eb9c1cf1a1b9175752 Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Thu, 20 Oct 2022 18:30:13 +0200 Subject: [PATCH 058/106] .= instead of copy! --- src/scratch.jl | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 src/scratch.jl diff --git a/src/scratch.jl b/src/scratch.jl new file mode 100644 index 00000000..051f723e --- /dev/null +++ b/src/scratch.jl @@ -0,0 +1,69 @@ +using Revise +using GLM +using DataFrames +using Random +using CSV +using StatsBase +using RDatasets +Random.seed!(11) + +y = rand(10) +x = rand(10,2) +wts = rand(10) +df = DataFrame(x, :auto) +df.y = y +df.wts = wts +lm1 = lm(x,y) +lmw = lm(x,y; wts = wts) +lmf = lm(@formula(y~x1+x2-1), df) +lmfw = lm(@formula(y~-1+x1+x2), df; wts = iweights(wts)) +lmfw = lm(@formula(y~-1+x1+x2), df; wts = pweights(wts)) +lmfw = lm(@formula(y~-1+x1+x2), df; wts = fweights(wts)) + +glm(@formula(y~-1+x1+x2), df, Normal, IdentityLink; wts = fweights(wts)) + +cooksdistance(lm1) + + + +df = dataset("quantreg", "engel") +N = nrow(df) +df.weights = repeat(1:5, Int(N/5)) +f = @formula(FoodExp ~ Income) +lm_model = lm(f, df, wts = FrequencyWeights(df.weights)) +glm_model = glm(f, df, Normal(), wts = FrequencyWeights(df.weights)) +@test isapprox(coef(lm_model), [154.35104595140706, 0.4836896390157505]) +@test isapprox(coef(glm_model), [154.35104595140706, 0.4836896390157505]) +@test isapprox(stderror(lm_model), [9.382302620120193, 0.00816741377772968]) +@test isapprox(r2(lm_model), 0.8330258148644486) +@test isapprox(adjr2(lm_model), 0.832788298242634) +@test isapprox(vcov(lm_model), [88.02760245551447 -0.06772589439264813; + -0.06772589439264813 6.670664781664879e-5]) +@test isapprox(first(predict(lm_model)), 357.57694841780994) +@test isapprox(loglikelihood(lm_model), -4353.946729075838) +@test isapprox(loglikelihood(glm_model), -4353.946729075838) +@test isapprox(nullloglikelihood(lm_model), -4984.892139711452) +@test isapprox(mean(residuals(lm_model)), -5.412966629787718) + +lm_model = lm(f, df, wts = df.weights) +glm_model = glm(f, df, Normal(), wts = df.weights) +@test isa(weights(lm_model), FrequencyWeights) +@test isa(weights(glm_model), FrequencyWeights) + + + + +lm_model = lm(f, df, wts = iweights(df.weights)) +glm_model = glm(f, df, Normal(), wts = iweights(df.weights)) +@test isapprox(coef(lm_model), [154.35104595140706, 0.4836896390157505]) +@test isapprox(coef(glm_model), [154.35104595140706, 0.4836896390157505]) +@test isapprox(stderror(lm_model), [16.297055281313032, 0.014186793927918842]) +@test isapprox(r2(lm_model), 0.8330258148644486) +@test isapprox(adjr2(lm_model), 0.8323091874604334) +@test isapprox(vcov(lm_model), [265.59401084217296 -0.20434035947652907; + -0.20434035947652907 0.00020126512195323495]) +@test isapprox(first(predict(lm_model)), 357.57694841780994) +@test isapprox(loglikelihood(lm_model), -4353.946729075838) +@test isapprox(loglikelihood(glm_model), -4353.946729075838) +@test isapprox(nullloglikelihood(lm_model), -4984.892139711452) +@test isapprox(mean(residuals(lm_model)), -5.412966629787718) \ No newline at end of file From 2cead0a5b4d043bdeffb0da64e8d05c9d2d59346 Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Thu, 20 Oct 2022 18:41:21 +0200 Subject: [PATCH 059/106] Remove comments --- src/glmfit.jl | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/glmfit.jl b/src/glmfit.jl index 255b418c..3c9171cc 100644 --- a/src/glmfit.jl +++ b/src/glmfit.jl @@ -757,9 +757,6 @@ end nobs(r::GlmResp{V,D,L,W}) where {V,D,L,W<:AbstractWeights} = oftype(sum(one(eltype(r.wts))), length(r.y)) nobs(r::GlmResp{V,D,L,W}) where {V,D,L,W<:FrequencyWeights} = sum(r.wts) -##To be reviewed! -# Base.sqrt(::UnitWeights{T}) where T = one(T) - function residuals(r::GlmResp; weighted::Bool=false) y, η, μ = r.y, r.eta, r.mu dres = similar(μ) @@ -789,8 +786,6 @@ function momentmatrix(m::GeneralizedLinearModel) return mul!(m.pp.scratchm1, Diagonal(r.*d), m.pp.X) end -#res <- res * sum(weights(x, "working"), na.rm = TRUE)/sum(res^2, na.rm = TRUE) - varstruct(rr::GlmResp{<:Any, <:Union{Normal, Poisson, Binomial, Bernoulli, NegativeBinomial}}, r::AbstractArray) = 1 varstruct(rr::GlmResp{<:Any, <:Union{Gamma, Geometric, InverseGaussian}}, From a1ec49f7f4458e72ca7583bcf573b465f8776bd1 Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Thu, 20 Oct 2022 18:43:32 +0200 Subject: [PATCH 060/106] up --- src/linpred.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/linpred.jl b/src/linpred.jl index 46fb35fd..890ebebc 100644 --- a/src/linpred.jl +++ b/src/linpred.jl @@ -144,7 +144,7 @@ end function delbeta!(p::DensePredChol{T,<:CholeskyPivoted,<:AbstractWeights}, r::Vector{T}) where T<:BlasReal ch = p.chol - X = p.wts isa UnitWeights ? copy!(p.scratchm1, p.X) : mul!(p.scratchm1, Diagonal(p.wts), p.X) + X = p.wts isa UnitWeights ? p.scratchm1 .= p.X : mul!(p.scratchm1, Diagonal(p.wts), p.X) delbeta = mul!(p.delbeta, adjoint(X), r) rnk = rank(ch) if rnk == length(delbeta) From 97bf28de6ff7a607b7fa0404a9e0460ee735aab8 Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Sun, 23 Oct 2022 17:37:00 +0200 Subject: [PATCH 061/106] Speedup cooksdistance --- src/linpred.jl | 26 +++++++++++++++----------- src/lm.jl | 38 +++++++++++++++++++------------------- 2 files changed, 34 insertions(+), 30 deletions(-) diff --git a/src/linpred.jl b/src/linpred.jl index 890ebebc..16291946 100644 --- a/src/linpred.jl +++ b/src/linpred.jl @@ -285,10 +285,8 @@ function vcov(pp::DensePredChol{T, C, <:ProbabilityWeights}, u::AbstractVector, else B = mul!(pp.scratchm2, Z', Z) V = A*B*A - end - #n = length(pp.wts) - #df_correction = n/(n-rank(pp.chol)) - return V#*df_correction + end + return V end function vcov(pp::SparsePredChol{T, C, M, <:ProbabilityWeights}, u::AbstractVector, d::Real) where {T, C, M} @@ -315,12 +313,15 @@ end modelframe(obj::LinPredModel) = obj.fr -function modelmatrix(obj::LinPredModel; weighted::Bool=isweighted(obj)) - if weighted - mul!(obj.pp.scratchm1, Diagonal(sqrt.(obj.pp.wts)), obj.pp.X) +modelmatrix(obj::LinPredModel; weighted::Bool=isweighted(obj)) = modelmatrix(obj.pp; weighted=weighted) + +function modelmatrix(pp::LinPred; weighted::Bool=isweighted(obj)) + Z = if weighted + mul!(pp.scratchm1, Diagonal(sqrt.(pp.wts)), pp.X) else - obj.pp.X + pp.X end + return Z end response(obj::LinPredModel) = obj.rr.y @@ -333,10 +334,13 @@ residuals(obj::LinPredModel; weighted::Bool=false) = residuals(obj.rr; weighted= nobs(obj::LinPredModel) = nobs(obj.rr) weights(obj::RegressionModel) = weights(obj.model) -weights(obj::LinPredModel) = weights(obj.rr) +weights(m::LinPredModel) = weights(m.rr) +weights(pp::LinPred) = pp.wts + +isweighted(obj::RegressionModel) = isweighted(obj.model.pp) +isweighted(m::LinPredModel) = isweighted(m.pp) +isweighted(pp::LinPred) = weights(pp) isa Union{FrequencyWeights, ImportanceWeights, ProbabilityWeights} -isweighted(obj::RegressionModel) = isweighted(obj.model) -isweighted(obj::LinPredModel) = weights(obj) isa Union{FrequencyWeights, ImportanceWeights, ProbabilityWeights} coef(x::LinPred) = x.beta0 coef(obj::LinPredModel) = coef(obj.pp) diff --git a/src/lm.jl b/src/lm.jl index cee5d8c8..84472d57 100644 --- a/src/lm.jl +++ b/src/lm.jl @@ -343,35 +343,35 @@ end Compute [Cook's distance](https://en.wikipedia.org/wiki/Cook%27s_distance) for each observation in linear model `obj`, giving an estimate of the influence of each data point. -Currently only implemented for linear models without weights. """ ## To remove when https://github.com/JuliaStats/StatsAPI.jl/pull/16 is merged function crossmodelmatrix(model::RegressionModel; weighted::Bool=false) x = weighted ? modelmatrix(model; weighted=weighted) : modelmatrix(model) return Symmetric(x' * x) end - + +hatvalues(x::LinPredModel) = hatvalues(x.pp) + +function hatvalues(pp::DensePredChol{T, C, W}) where {T, C<:CholeskyPivoted, W} + X = modelmatrix(pp; weighted=isweighted(pp)) + _, k = size(X) + ch = pp.chol + rnk = rank(ch) + p = ch.p + idx = invperm(p)[1:rnk] + sum((view(X,:,1:rnk)/ch.U[1:rnk, idx]).^2, dims=2) +end + +function hatvalues(pp::DensePredChol{T, C, W}) where {T, C<:Cholesky, W} + X = modelmatrix(pp; weighted=isweighted(pp)) + sum((X/pp.chol.U).^2, dims=2) +end function StatsBase.cooksdistance(obj::LinearModel) - wts = weights(obj) u = residuals(obj; weighted=isweighted(obj)) mse = GLM.dispersion(obj,true) k = dof(obj)-1 - d_res = dof_residual(obj) - X = modelmatrix(obj; weighted=isweighted(obj)) - if k == size(X,2) - XtX = crossmodelmatrix(obj; weighted=isweighted(obj)) - hii = diag(X * inv(XtX) * X') - D = @. u^2 * (hii / (1 - hii)^2) / (k*mse) - else - pp = obj.pp - C = invchol(pp) - nancols = [all(isnan, col) for col in eachcol(C)] - nnancols = .!nancols - Xc = view(X, :, nnancols) - XtX = (Xc)'*Xc - hii = diag(Xc * inv(XtX) * Xc') - D = @. u^2 * (hii / (1 - hii)^2) / (k*mse) - end + hii = hatvalues(obj) + D = @. u^2 * (hii / (1 - hii)^2) / (k*mse) return D end From 9ce2d8986ee4dfb30d87782088002ab919ccc827 Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Mon, 24 Oct 2022 23:06:56 +0200 Subject: [PATCH 062/106] Revert back to AnalyticWeights --- src/GLM.jl | 18 ++---------------- src/glmfit.jl | 10 +++++----- src/linpred.jl | 2 +- src/lm.jl | 6 +++--- 4 files changed, 11 insertions(+), 25 deletions(-) diff --git a/src/GLM.jl b/src/GLM.jl index 05290281..188d251c 100644 --- a/src/GLM.jl +++ b/src/GLM.jl @@ -19,22 +19,8 @@ module GLM export coef, coeftable, confint, deviance, nulldeviance, dof, dof_residual, loglikelihood, nullloglikelihood, nobs, stderror, vcov, residuals, predict, fitted, fit, fit!, model_response, response, modelmatrix, r2, r², adjr2, adjr², - cooksdistance, hasintercept, dispersion, weights, ImportanceWeights, ProbabilityWeights, FrequencyWeights, - UnitWeights, uweights, fweights, pweights, iweights - - - ## Should eventually be added to StatsBase - """ - iweights(vs) - Construct an `ImportanceWeights` vector from array `vs`. - See the documentation for [`ImportanceWeights`](@ref) for more details. - """ - const RealArray{T<:Real,N} = AbstractArray{T,N} - const RealVector{T<:Real} = AbstractArray{T,1} - - StatsBase.@weights ImportanceWeights - iweights(vs::Vector{<:Real}) = ImportanceWeights(vs) - iweights(vs::RealArray) = ImportanceWeights(vec(vs)) + cooksdistance, hasintercept, dispersion, weights, AnalyticWeights, ProbabilityWeights, FrequencyWeights, + UnitWeights, uweights, fweights, pweights, aweights export # types diff --git a/src/glmfit.jl b/src/glmfit.jl index 3c9171cc..195b72f1 100644 --- a/src/glmfit.jl +++ b/src/glmfit.jl @@ -36,9 +36,9 @@ function GlmResp(y::V, d::D, l::L, η::V, μ::V, off::V, wts::W) where {V<:FPVec checky(y, d) ## We don't support custom types of weights that a user may define - if !(wts isa Union{FrequencyWeights, ImportanceWeights, ProbabilityWeights, UnitWeights}) + if !(wts isa Union{FrequencyWeights, AnalyticWeights, ProbabilityWeights, UnitWeights}) throw(ArgumentError("The type of `wts` was $W. The supported weights types are " * - "`FrequencyWeights`, `ImportanceWeights`, `ProbabilityWeights` and `UnitWeights`.")) + "`FrequencyWeights`, `AnalyticWeights`, `ProbabilityWeights` and `UnitWeights`.")) end # Lengths of y, η, and η all need to be n @@ -76,7 +76,7 @@ GlmResp(y::AbstractVector{<:Real}, d::D, l::L, off::AbstractVector{<:Real}, deviance(r::GlmResp) = sum(r.devresid) weights(r::GlmResp) = r.wts -isweighted(r::GlmResp) = weights(r) isa Union{ImportanceWeights, FrequencyWeights, ProbabilityWeights} +isweighted(r::GlmResp) = weights(r) isa Union{AnalyticWeights, FrequencyWeights, ProbabilityWeights} """ cancancel(r::GlmResp{V,D,L}) @@ -313,7 +313,7 @@ function loglikelihood(r::GlmResp{T,D,L,<:AbstractWeights}) where {T,D,L} @inbounds for i in eachindex(y, mu) ll += loglik_obs(d, y[i], mu[i], wts[i], ϕ) end - elseif wts isa ImportanceWeights + elseif wts isa AnalyticWeights @inbounds for i in eachindex(y, mu, wts) #ll += loglik_obs(d, y[i], mu[i], wts[i], ϕ) ll += loglik_apweights_obs(d, y[i], mu[i], wts[i], δ, wts.sum, N) @@ -582,7 +582,7 @@ function fit(::Type{M}, wts elseif wts isa AbstractVector Base.depwarn("Passing weights as vector is deprecated in favor of explicitly using " * - "`ImportanceWeights`, `ProbabilityWeights`, or `FrequencyWeights`. Proceeding " * + "`AnalyticWeights`, `ProbabilityWeights`, or `FrequencyWeights`. Proceeding " * "by coercing `wts` to `FrequencyWeights`", :fit) fweights(wts) else diff --git a/src/linpred.jl b/src/linpred.jl index 16291946..6dc009b2 100644 --- a/src/linpred.jl +++ b/src/linpred.jl @@ -339,7 +339,7 @@ weights(pp::LinPred) = pp.wts isweighted(obj::RegressionModel) = isweighted(obj.model.pp) isweighted(m::LinPredModel) = isweighted(m.pp) -isweighted(pp::LinPred) = weights(pp) isa Union{FrequencyWeights, ImportanceWeights, ProbabilityWeights} +isweighted(pp::LinPred) = weights(pp) isa Union{FrequencyWeights, AnalyticWeights, ProbabilityWeights} coef(x::LinPred) = x.beta0 coef(obj::LinPredModel) = coef(obj.pp) diff --git a/src/lm.jl b/src/lm.jl index 84472d57..4dd36e0a 100644 --- a/src/lm.jl +++ b/src/lm.jl @@ -128,7 +128,7 @@ const FIT_LM_DOC = """ The keyword argument `wts` can be an `AbstractWeights` vector specifying prior weights for observations. Allowed types are: - `UnitWeights`: no weighting (all weights equal to 1). - - `Analyticiweights`: describe a non-random relative importance (usually between 0 and 1) + - `Analyticaweights`: describe a non-random relative importance (usually between 0 and 1) for each observation. - `FrequencyWeights`: describe the number of times (or frequency) each observation was seen. - `ProbabilityWeights`: represent the inverse of the sampling probability for each observation, @@ -162,11 +162,11 @@ function fit(::Type{LinearModel}, X::AbstractMatrix{<:Real}, y::AbstractVector{< dropcollinear = allowrankdeficient_dep end # For backward compatibility accept wts as AbstractArray and coerce them to FrequencyWeights - _wts = if wts isa Union{FrequencyWeights, ImportanceWeights, ProbabilityWeights, UnitWeights} + _wts = if wts isa Union{FrequencyWeights, AnalyticWeights, ProbabilityWeights, UnitWeights} wts elseif wts isa AbstractVector Base.depwarn("Passing weights as vector is deprecated in favor of explicitly using " * - "`ImportanceWeights`, `ProbabilityWeights`, or `FrequencyWeights`. Proceeding " * + "`AnalyticWeights`, `ProbabilityWeights`, or `FrequencyWeights`. Proceeding " * "by coercing `wts` to `FrequencyWeights`", :fit) fweights(wts) else From 9bddf6389826827735c35ea02a73c050cd8ae208 Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Mon, 24 Oct 2022 23:07:25 +0200 Subject: [PATCH 063/106] Add extensive tests for AnalyticWeights --- analytic_weights.jl | 170 ++++++++++++++++++++++++++++++++++++++++++++ test/runtests.jl | 82 +++++++++++++++++---- 2 files changed, 238 insertions(+), 14 deletions(-) create mode 100644 analytic_weights.jl diff --git a/analytic_weights.jl b/analytic_weights.jl new file mode 100644 index 00000000..bf59c949 --- /dev/null +++ b/analytic_weights.jl @@ -0,0 +1,170 @@ +rng=StableRNG(123) + +x1 = rand(rng, 15) +x2 = ifelse.(randn(rng, 15).>0,1,0) + +y = ifelse.(0.004 .- 0.01.*x1 .+ 1.5.*x2 .+ randn(rng, 15).>0, 1, 0) +w = rand(rng, 15)*6 +w = floor.(w) .+ 1 + + +df = DataFrame(y=y,x1=x1,x2=x2, w=w) + +clotting = DataFrame(u = log.([5,10,15,20,30,40,60,80,100]), + lot1 = [118,58,42,35,27,25,21,19,18], + w = [1.5,2.0,1.1,4.5,2.4,3.5,5.6,5.4,6.7]) + +quine.aweights = log.(3 .+ 3 .*quine.Days) +quine.pweights = 1.0./(quine.aweights./sum(quine.aweights)) +quine.fweights = floor.(quine.aweights) + +dobson = DataFrame(Counts = [18.,17,15,20,10,20,25,13,12], + Outcome = categorical(repeat(string.('A':'C'), outer = 3)), + Treatment = categorical(repeat(string.('a':'c'), inner = 3)), + w = [1,2,1,2,3,4,3,2,1] + ) + + +@testset "GLM: Binomial with LogitLink link - AnalyticWeights" begin + model = glm(@formula(y ~ 1 + x1 + x2), df, Binomial(), LogitLink(), wts=df) + @test deviance(model) ≈ 40.80540879298288 + @test loglikelihood(model) ≈ -20.402704396491437 + @test coef(model) ≈ + @test stderror(model) ≈ [1.3764416117357274, 2.1047035048697387, 2891.0832705575244] + @test aic(model) ≈ 46.805408792982874 + @test bic(model) ≈ 48.929559396289505 + @test momentmatrix(model) ≈ [4.975291538563986e-9 9.006548481517277e-10 4.975291538563986e-9; 7.457508769849823e-9 2.7405594965454296e-9 7.457508769849823e-9; 2.285271769575856e-8 1.528980344883474e-8 2.285271769575856e-8; 2.2327974533715886e-9 9.540869852611076e-11 2.2327974533715886e-9; -3.9002435408230824 -1.7082714161861667 -0.0; 1.9456115337317996 0.9402948505761761 0.0; 3.250941922802209e-8 2.4836342240320546e-8 3.250941922802209e-8; 0.7662630435773894 0.7083972308412865 0.0; 2.0874526025016062 1.0751322798196368 0.0; 1.995938694540086e-8 9.880729193072285e-9 1.995938694540086e-8; 2.354909005285928 1.651591865013276 0.0; -1.2548308790461924 -0.8703121995275815 -0.0; -1.3365591258889113 -1.1711904042878245 -0.0; -0.6626026393385315 -0.6256422062487993 -0.0; 2.9653540079963233e-8 2.192027023041504e-8 2.9653540079963233e-8] +end + +@testset "GLM: Binomial with LogitLink link - AnalyticWeights" begin + model = glm(@formula(y ~ 1 + x1 + x2), df, Binomial(), LogitLink(), wts=df) + @test deviance(model) ≈ 40.80540879298288 + @test loglikelihood(model) ≈ -20.402704396491437 + @test coef(model) ≈ + @test stderror(model) ≈ [1.3764416117357274, 2.1047035048697387, 2891.0832705575244] + @test aic(model) ≈ 46.805408792982874 + @test bic(model) ≈ 48.929559396289505 + @test momentmatrix(model) ≈ [4.975291538563986e-9 9.006548481517277e-10 4.975291538563986e-9; 7.457508769849823e-9 2.7405594965454296e-9 7.457508769849823e-9; 2.285271769575856e-8 1.528980344883474e-8 2.285271769575856e-8; 2.2327974533715886e-9 9.540869852611076e-11 2.2327974533715886e-9; -3.9002435408230824 -1.7082714161861667 -0.0; 1.9456115337317996 0.9402948505761761 0.0; 3.250941922802209e-8 2.4836342240320546e-8 3.250941922802209e-8; 0.7662630435773894 0.7083972308412865 0.0; 2.0874526025016062 1.0751322798196368 0.0; 1.995938694540086e-8 9.880729193072285e-9 1.995938694540086e-8; 2.354909005285928 1.651591865013276 0.0; -1.2548308790461924 -0.8703121995275815 -0.0; -1.3365591258889113 -1.1711904042878245 -0.0; -0.6626026393385315 -0.6256422062487993 -0.0; 2.9653540079963233e-8 2.192027023041504e-8 2.9653540079963233e-8] +end + +@testset "GLM: Binomial with ProbitLink link - AnalyticWeights" begin + model = glm(@formula(y ~ 1 + x1 + x2), df, Binomial(), ProbitLink(), wts=df) + @test deviance(model) ≈ 40.78137404276874 + @test loglikelihood(model) ≈ -20.39068702138437 + @test coef(model) ≈ + @test stderror(model) ≈ [0.8364009151837031, 1.2669759328012313, 383.275056480285] + @test aic(model) ≈ 46.78137404276874 + @test bic(model) ≈ 48.90552464607537 + @test momentmatrix(model) ≈ [2.141319874423096e-10 3.876335911964382e-11 2.141319874423096e-10; 2.660620461776351e-9 9.777512703308697e-10 2.660620461776351e-9; 1.6837104388797515e-7 1.1265006647327841e-7 1.6837104388797515e-7; 1.775927884351608e-11 7.588640333961262e-13 1.775927884351608e-11; -6.32108519917438 -2.768578180312368 -0.0; 3.160081293711658 1.5272360984516953 0.0; 5.617843709811802e-7 4.291885008799245e-7 5.617843709811802e-7; 1.3247656634751994 1.2247234619826348 0.0; 3.3733372105472994 1.737420874337357 0.0; 2.719173690921834e-8 1.3461044140494708e-8 2.719173690921834e-8; 3.7980026833505782 2.6636911749202743 0.0; -2.0047842518828927 -1.390456850377069 -0.0; -2.2151750132435413 -1.9410976058418117 -0.0; -1.11514238917241 -1.0529389761258265 -0.0; 4.12161049030174e-7 3.046746374574499e-7 4.12161049030174e-7] +end + +@testset "GLM: Binomial with CauchitLink link - AnalyticWeights" begin + model = glm(@formula(y ~ 1 + x1 + x2), df, Binomial(), CauchitLink(), wts=df) + @test deviance(model) ≈ 40.8975259939964 + @test loglikelihood(model) ≈ -20.4487629969982 + @test coef(model) ≈ + @test stderror(model) ≈ [1.3251544363304, 2.0816819672326212, 3.9763720496138e10] + @test aic(model) ≈ 46.8975259939964 + @test bic(model) ≈ 49.021676597303035 + @test momentmatrix(model) ≈ [3.292881549556344e-15 5.960956677632797e-16 3.292881549556344e-15; 2.469661355441319e-15 9.075757186192704e-16 2.469661355441319e-15; 2.469661724388082e-15 1.6523479987683206e-15 2.469661724388082e-15; 2.469660974846597e-15 1.055300108189554e-16 2.469660974846597e-15; -4.288393332164326 -1.8782775162685588 -0.0; 2.090455393992148 1.010296458594581 0.0; 2.469661833186925e-15 1.8867567604535068e-15 2.469661833186925e-15; 0.6503620950105018 0.6012487630862063 0.0; 2.3400182402064034 1.205214979443208 0.0; 4.116102547017224e-15 2.0376424741524278e-15 4.116102547017224e-15; 2.8310688979703755 1.9855418407609555 0.0; -1.610846692068502 -1.117233844883936 -0.0; -1.3776439416401394 -1.2071919107215705 -0.0; -0.6350206613065066 -0.5995987700109224 -0.0; 2.469661799647931e-15 1.8256051007749665e-15 2.469661799647931e-15] +end + +@testset "GLM: Binomial with CloglogLink link - AnalyticWeights" begin + model = glm(@formula(y ~ 1 + x1 + x2), df, Binomial(), CloglogLink(), wts=df) + @test deviance(model) ≈ 41.205557080153405 + @test loglikelihood(model) ≈ -20.602778540076702 + @test coef(model) ≈ + @test stderror(model) ≈ [0.973218567746775, 1.5788168207479476, 134.2308296148033] + @test aic(model) ≈ 47.205557080153405 + @test bic(model) ≈ 49.329707683460036 + @test momentmatrix(model) ≈ [8.881784197001252e-16 1.6078297995730002e-16 8.881784197001252e-16; 6.661338147750942e-16 2.447974797472054e-16 6.661338147750942e-16; 3.3706748473230575e-8 2.2551784252367408e-8 3.3706748473230575e-8; 6.661338147750942e-16 2.8464274811830893e-17 6.661338147750942e-16; -6.002070351507831 -2.62885256064177 -0.0; 3.090828500347432 1.4937669069612967 0.0; 2.193326882864893e-6 1.6756441179599079e-6 2.193326882864893e-6; 0.8552543543316015 0.7906681933203132 0.0; 3.211027051982352 1.653823818956702 0.0; 1.0692624807746624e-12 5.293295349074617e-13 1.0692624807746624e-12; 3.040005920969103 2.1320777309844043 0.0; -1.607851182110078 -1.1151562510789412 -0.0; -1.7153341048186206 -1.503100614703514 -0.0; -0.8718601911242182 -0.8232272271960682 -0.0; 8.127843792739411e-7 6.008204479027258e-7 8.127843792739411e-7] +end + +@testset "GLM: Gamma with InverseLink link - AnalyticWeights" begin + model = glm(@formula(lot1 ~ 1 + u), clotting, Gamma(), InverseLink(), wts=clotting) + @test deviance(model) ≈ 0.03933389380881642 + @test loglikelihood(model) ≈ -43.359078787690514 + @test coef(model) ≈ + @test stderror(model) ≈ [0.0009144223353860925, 0.0003450913537314497] + @test aic(model) ≈ 92.71815757538103 + @test bic(model) ≈ 93.30983130738969 + @test momentmatrix(model) ≈ [1900.1063511093867 3058.103199132267; -1643.317155973023 -3783.877586404854; -420.13783432322964 -1137.7543467296691; -981.2887166533023 -2939.6782781526754; 313.30087123532877 1065.5981029180723; -186.60227446859759 -688.353296378139; 324.34628373045786 1327.9854430687467; 430.8197010892654 1887.863404915401; 262.77277766267576 1210.113361381432] +end + +@testset "GLM: Gamma with IdentityLink link - AnalyticWeights" begin + model = glm(@formula(lot1 ~ 1 + u), clotting, Gamma(), IdentityLink(), wts=clotting) + @test deviance(model) ≈ 1.3435348802929383 + @test loglikelihood(model) ≈ -101.19916126647321 + @test coef(model) ≈ + @test stderror(model) ≈ [16.07962739541372, 3.766841480457265] + @test aic(model) ≈ 208.39832253294642 + @test bic(model) ≈ 208.9899962649551 + @test momentmatrix(model) ≈ [0.26061914480947884 0.4194503323625281; 0.06148544891860896 0.14157547811603585; -0.019061929106842457 -0.051620660951180786; -0.1795782998461795 -0.5379685084791557; -0.1764962075232437 -0.6002984389013568; -0.2277661940139623 -0.8402020334398342; -0.3204523427685144 -1.3120423070655995; -0.054878647210950426 -0.2404796937532563; 0.6561290267416002 3.0215858321118008] +end + +@testset "GLM: Gamma with LogLink link - AnalyticWeights" begin + model = glm(@formula(lot1 ~ 1 + u), clotting, Gamma(), LogLink(), wts=clotting) + @test deviance(model) ≈ 0.41206342934199663 + @test loglikelihood(model) ≈ -81.79777246247532 + @test coef(model) ≈ + @test stderror(model) ≈ [0.20287310816341905, 0.053062600599660774] + @test aic(model) ≈ 169.59554492495064 + @test bic(model) ≈ 170.18721865695932 + @test momentmatrix(model) ≈ [14.39716447431257 23.171342336508012; 0.0374983950207553 0.0863432453859933; -2.5490869750808054 -6.903055495494598; -12.821435846444906 -38.40958915849704; -8.713283462827741 -29.635596899449876; -6.520303896525519 -24.05261507847203; -4.123729229896082 -16.88396834850135; 3.70269025008355 16.225287295813413; 16.590486289982852 76.40201283367323] +end + +@testset "GLM: Gamma with InverseLink link - AnalyticWeights" begin + model = glm(@formula(lot1 ~ 1 + u), clotting, Gamma(), InverseLink(), wts=clotting) + @test deviance(model) ≈ 0.03933389380881642 + @test loglikelihood(model) ≈ -43.359078787690514 + @test coef(model) ≈ + @test stderror(model) ≈ [0.0009144223353860925, 0.0003450913537314497] + @test aic(model) ≈ 92.71815757538103 + @test bic(model) ≈ 93.30983130738969 + @test momentmatrix(model) ≈ [1900.1063511093867 3058.103199132267; -1643.317155973023 -3783.877586404854; -420.13783432322964 -1137.7543467296691; -981.2887166533023 -2939.6782781526754; 313.30087123532877 1065.5981029180723; -186.60227446859759 -688.353296378139; 324.34628373045786 1327.9854430687467; 430.8197010892654 1887.863404915401; 262.77277766267576 1210.113361381432] +end + +@testset "GLM: InverseGaussian with InverseSquareLink link - AnalyticWeights" begin + model = glm(@formula(lot1 ~ 1 + u), clotting, InverseGaussian(), InverseSquareLink(), wts=clotting) + @test deviance(model) ≈ 0.021377370485120707 + @test loglikelihood(model) ≈ -86.82546665077861 + @test coef(model) ≈ + @test stderror(model) ≈ [0.00016779409928094252, 9.025235597677238e-5] + @test aic(model) ≈ 179.65093330155722 + @test bic(model) ≈ 180.2426070335659 + @test momentmatrix(model) ≈ [28815.030725087538 46376.00289690935; -21039.070620903 -48444.250382140235; -6195.618377983015 -16778.045594449453; -15686.073415243622 -46991.276375382586; -1716.0787284468345 -5836.722477919495; -2086.203482054124 -7695.75316205041; 3418.087237993986 13994.826896081435; 6065.271775021221 26578.18246467872; 8424.676595366931 38797.069483575455] +end + +@testset "GLM: with LogLink link - AnalyticWeights" begin + model = glm(@formula(Days ~ Eth + Sex + Age + Lrn), quine, (), LogLink(), wts=quine) + @test deviance(model) ≈ 624.7631999565588 + @test loglikelihood(model) ≈ -2004.5939464322778 + @test coef(model) ≈ + @test stderror(model) ≈ [0.1950707397084349, 0.13200639191036218, 0.1373161597645507, 0.2088476016141468, 0.20252412726336674, 0.21060778935484836, 0.16126722793064027] + @test aic(model) ≈ 4023.1878928645556 + @test bic(model) ≈ 4044.073139216514 + @test momentmatrix(model) ≈ [-3.866780529709063 -0.0 -3.866780529709063 -0.0 -0.0 -0.0 -3.866780529709063; -4.370085797122667 -0.0 -4.370085797122667 -0.0 -0.0 -0.0 -4.370085797122667; -3.956562495375882 -0.0 -3.956562495375882 -0.0 -0.0 -0.0 -3.956562495375882; -4.102299119258251 -0.0 -4.102299119258251 -0.0 -0.0 -0.0 -0.0; -4.102299119258251 -0.0 -4.102299119258251 -0.0 -0.0 -0.0 -0.0; -2.8243330916399567 -0.0 -2.8243330916399567 -0.0 -0.0 -0.0 -0.0; -0.7247974261272416 -0.0 -0.7247974261272416 -0.0 -0.0 -0.0 -0.0; -0.0382123316932152 -0.0 -0.0382123316932152 -0.0 -0.0 -0.0 -0.0; -3.813241073891047 -0.0 -3.813241073891047 -3.813241073891047 -0.0 -0.0 -3.813241073891047; -3.813241073891047 -0.0 -3.813241073891047 -3.813241073891047 -0.0 -0.0 -3.813241073891047; -1.593192001014045 -0.0 -1.593192001014045 -1.593192001014045 -0.0 -0.0 -1.593192001014045; -2.7127578570401822 -0.0 -2.7127578570401822 -2.7127578570401822 -0.0 -0.0 -0.0; 0.14484002662039835 0.0 0.14484002662039835 0.14484002662039835 0.0 0.0 0.0; -4.754224412754331 -0.0 -4.754224412754331 -0.0 -4.754224412754331 -0.0 -4.754224412754331; -0.6279394841753847 -0.0 -0.6279394841753847 -0.0 -0.6279394841753847 -0.0 -0.6279394841753847; 5.160032033317412 0.0 5.160032033317412 0.0 5.160032033317412 0.0 5.160032033317412; 6.363463014626628 0.0 6.363463014626628 0.0 6.363463014626628 0.0 6.363463014626628; -2.991376095035898 -0.0 -2.991376095035898 -0.0 -2.991376095035898 -0.0 -0.0; -2.492994950052581 -0.0 -2.492994950052581 -0.0 -2.492994950052581 -0.0 -0.0; -2.492994950052581 -0.0 -2.492994950052581 -0.0 -2.492994950052581 -0.0 -0.0; -2.226530220466526 -0.0 -2.226530220466526 -0.0 -2.226530220466526 -0.0 -0.0; 5.713017320814697 0.0 5.713017320814697 0.0 5.713017320814697 0.0 0.0; 6.908456992944485 0.0 6.908456992944485 0.0 6.908456992944485 0.0 0.0; 8.12839634400043 0.0 8.12839634400043 0.0 8.12839634400043 0.0 0.0; -4.628254089687799 -0.0 -4.628254089687799 -0.0 -0.0 -4.628254089687799 -0.0; -2.183958840253964 -0.0 -2.183958840253964 -0.0 -0.0 -2.183958840253964 -0.0; -2.183958840253964 -0.0 -2.183958840253964 -0.0 -0.0 -2.183958840253964 -0.0; -0.9503472567532946 -0.0 -0.9503472567532946 -0.0 -0.0 -0.9503472567532946 -0.0; 0.6731546773300909 0.0 0.6731546773300909 0.0 0.0 0.6731546773300909 0.0; 1.2423198758199778 0.0 1.2423198758199778 0.0 0.0 1.2423198758199778 0.0; 1.8236065476231822 0.0 1.8236065476231822 0.0 0.0 1.8236065476231822 0.0; -4.171836641319677 -0.0 -0.0 -0.0 -0.0 -0.0 -4.171836641319677; -3.9882995353410657 -0.0 -0.0 -0.0 -0.0 -0.0 -0.0; -3.0399730926465205 -0.0 -0.0 -0.0 -0.0 -0.0 -0.0; 1.309672612863431 0.0 0.0 0.0 0.0 0.0 0.0; 10.661189363296968 0.0 0.0 0.0 0.0 0.0 0.0; -3.7634043246260425 -0.0 -0.0 -3.7634043246260425 -0.0 -0.0 -3.7634043246260425; -3.6605640772207546 -0.0 -0.0 -3.6605640772207546 -0.0 -0.0 -3.6605640772207546; -3.6605640772207546 -0.0 -0.0 -3.6605640772207546 -0.0 -0.0 -3.6605640772207546; -3.0720683496525485 -0.0 -0.0 -3.0720683496525485 -0.0 -0.0 -3.0720683496525485; -1.885334027349047 -0.0 -0.0 -1.885334027349047 -0.0 -0.0 -1.885334027349047; 2.106807550276347 0.0 0.0 2.106807550276347 0.0 0.0 2.106807550276347; 3.0150038937286183 0.0 0.0 3.0150038937286183 0.0 0.0 3.0150038937286183; 6.387064937752826 0.0 0.0 6.387064937752826 0.0 0.0 6.387064937752826; 17.72394862307137 0.0 0.0 17.72394862307137 0.0 0.0 17.72394862307137; 18.296957173355864 0.0 0.0 18.296957173355864 0.0 0.0 18.296957173355864; -3.0375213985118954 -0.0 -0.0 -3.0375213985118954 -0.0 -0.0 -0.0; -3.0375213985118954 -0.0 -0.0 -3.0375213985118954 -0.0 -0.0 -0.0; -0.8508688349707806 -0.0 -0.0 -0.8508688349707806 -0.0 -0.0 -0.0; 2.2977798382338515 0.0 0.0 2.2977798382338515 0.0 0.0 0.0; 3.4686807301080997 0.0 0.0 3.4686807301080997 0.0 0.0 0.0; -4.658715933989554 -0.0 -0.0 -0.0 -4.658715933989554 -0.0 -4.658715933989554; -4.187227633826471 -0.0 -0.0 -0.0 -4.187227633826471 -0.0 -4.187227633826471; -4.04126740785402 -0.0 -0.0 -0.0 -4.04126740785402 -0.0 -4.04126740785402; -2.940568463040927 -0.0 -0.0 -0.0 -2.940568463040927 -0.0 -2.940568463040927; 4.342318636532548 0.0 0.0 0.0 4.342318636532548 0.0 4.342318636532548; 4.653011109293142 0.0 0.0 0.0 4.653011109293142 0.0 4.653011109293142; 8.523536317826032 0.0 0.0 0.0 8.523536317826032 0.0 8.523536317826032; 15.787943104351504 0.0 0.0 0.0 15.787943104351504 0.0 15.787943104351504; -3.6818016272511183 -0.0 -0.0 -0.0 -3.6818016272511183 -0.0 -0.0; -2.057196136670586 -0.0 -0.0 -0.0 -0.0 -2.057196136670586 -0.0; -3.834339745304657 -0.0 -0.0 -0.0 -0.0 -3.834339745304657 -0.0; -4.1780090350069425 -0.0 -0.0 -0.0 -0.0 -4.1780090350069425 -0.0; -4.491340364181187 -0.0 -0.0 -0.0 -0.0 -4.491340364181187 -0.0; -4.3190736545666875 -0.0 -0.0 -0.0 -0.0 -4.3190736545666875 -0.0; -3.731819061288569 -0.0 -0.0 -0.0 -0.0 -3.731819061288569 -0.0; -2.238272513055515 -0.0 -0.0 -0.0 -0.0 -2.238272513055515 -0.0; 1.9859737921268132 0.0 0.0 0.0 0.0 1.9859737921268132 0.0; 3.2559592797891495 0.0 0.0 0.0 0.0 3.2559592797891495 0.0; -3.8426774654770597 -3.8426774654770597 -3.8426774654770597 -0.0 -0.0 -0.0 -3.8426774654770597; -0.9876822943882244 -0.9876822943882244 -0.9876822943882244 -0.0 -0.0 -0.0 -0.9876822943882244; 23.20842027925341 23.20842027925341 23.20842027925341 0.0 0.0 0.0 23.20842027925341; -1.920845416870046 -1.920845416870046 -1.920845416870046 -0.0 -0.0 -0.0 -0.0; -1.920845416870046 -1.920845416870046 -1.920845416870046 -0.0 -0.0 -0.0 -0.0; -3.2888901738202923 -3.2888901738202923 -3.2888901738202923 -0.0 -0.0 -0.0 -0.0; -2.758113321833414 -2.758113321833414 -2.758113321833414 -0.0 -0.0 -0.0 -0.0; -1.306843142455193 -1.306843142455193 -1.306843142455193 -0.0 -0.0 -0.0 -0.0; -0.8751747276035264 -0.8751747276035264 -0.8751747276035264 -0.0 -0.0 -0.0 -0.0; -1.8877508012966644 -1.8877508012966644 -1.8877508012966644 -1.8877508012966644 -0.0 -0.0 -1.8877508012966644; -1.8877508012966644 -1.8877508012966644 -1.8877508012966644 -1.8877508012966644 -0.0 -0.0 -1.8877508012966644; -2.9308943363443847 -2.9308943363443847 -2.9308943363443847 -2.9308943363443847 -0.0 -0.0 -2.9308943363443847; -2.9308943363443847 -2.9308943363443847 -2.9308943363443847 -2.9308943363443847 -0.0 -0.0 -2.9308943363443847; -2.9308943363443847 -2.9308943363443847 -2.9308943363443847 -2.9308943363443847 -0.0 -0.0 -2.9308943363443847; -0.6051770620747217 -0.6051770620747217 -0.6051770620747217 -0.6051770620747217 -0.0 -0.0 -0.6051770620747217; 2.697606708942873 2.697606708942873 2.697606708942873 2.697606708942873 0.0 0.0 2.697606708942873; -2.628558928820721 -2.628558928820721 -2.628558928820721 -2.628558928820721 -0.0 -0.0 -0.0; -2.3542975772061085 -2.3542975772061085 -2.3542975772061085 -2.3542975772061085 -0.0 -0.0 -0.0; 0.11268811798135936 0.11268811798135936 0.11268811798135936 0.0 0.11268811798135936 0.0 0.11268811798135936; 3.1826005245112854 3.1826005245112854 3.1826005245112854 0.0 3.1826005245112854 0.0 3.1826005245112854; 5.692953263520725 5.692953263520725 5.692953263520725 0.0 5.692953263520725 0.0 5.692953263520725; -2.7839804243079254 -2.7839804243079254 -2.7839804243079254 -0.0 -2.7839804243079254 -0.0 -0.0; -1.9433894208611948 -1.9433894208611948 -1.9433894208611948 -0.0 -1.9433894208611948 -0.0 -0.0; -2.962526696741388 -2.962526696741388 -2.962526696741388 -0.0 -2.962526696741388 -0.0 -0.0; -3.4432739212266052 -3.4432739212266052 -3.4432739212266052 -0.0 -3.4432739212266052 -0.0 -0.0; -3.0516553688541084 -3.0516553688541084 -3.0516553688541084 -0.0 -3.0516553688541084 -0.0 -0.0; 0.3128048727055356 0.3128048727055356 0.3128048727055356 0.0 0.3128048727055356 0.0 0.0; 5.983398649554576 5.983398649554576 5.983398649554576 0.0 5.983398649554576 0.0 0.0; -1.9961184031161041 -1.9961184031161041 -1.9961184031161041 -0.0 -0.0 -1.9961184031161041 -0.0; 4.212201806010905 4.212201806010905 4.212201806010905 0.0 0.0 4.212201806010905 0.0; -3.152192412974143 -3.152192412974143 -3.152192412974143 -0.0 -0.0 -3.152192412974143 -0.0; -2.03792823060008 -2.03792823060008 -2.03792823060008 -0.0 -0.0 -2.03792823060008 -0.0; 2.9007973162738843 2.9007973162738843 2.9007973162738843 0.0 0.0 2.9007973162738843 0.0; 9.364366020386104 9.364366020386104 9.364366020386104 0.0 0.0 9.364366020386104 0.0; 24.059031354439128 24.059031354439128 24.059031354439128 0.0 0.0 24.059031354439128 0.0; 2.864621620127876 2.864621620127876 0.0 0.0 0.0 0.0 2.864621620127876; -1.374372490365048 -1.374372490365048 -0.0 -0.0 -0.0 -0.0 -0.0; -0.9287032240778311 -0.9287032240778311 -0.0 -0.0 -0.0 -0.0 -0.0; 3.919550403175515 3.919550403175515 0.0 0.0 0.0 0.0 0.0; 12.426707944681816 12.426707944681816 0.0 0.0 0.0 0.0 0.0; -2.750339462985501 -2.750339462985501 -0.0 -2.750339462985501 -0.0 -0.0 -2.750339462985501; -2.0720837572297617 -2.0720837572297617 -0.0 -2.0720837572297617 -0.0 -0.0 -2.0720837572297617; -1.8681224147832116 -1.8681224147832116 -0.0 -1.8681224147832116 -0.0 -0.0 -1.8681224147832116; -2.778411659017331 -2.778411659017331 -0.0 -2.778411659017331 -0.0 -0.0 -2.778411659017331; -2.750339462985501 -2.750339462985501 -0.0 -2.750339462985501 -0.0 -0.0 -2.750339462985501; -2.750339462985501 -2.750339462985501 -0.0 -2.750339462985501 -0.0 -0.0 -2.750339462985501; -2.750339462985501 -2.750339462985501 -0.0 -2.750339462985501 -0.0 -0.0 -2.750339462985501; -2.750339462985501 -2.750339462985501 -0.0 -2.750339462985501 -0.0 -0.0 -2.750339462985501; -2.0720837572297617 -2.0720837572297617 -0.0 -2.0720837572297617 -0.0 -0.0 -2.0720837572297617; -0.18952790670731487 -0.18952790670731487 -0.0 -0.18952790670731487 -0.0 -0.0 -0.18952790670731487; 2.1145280030507307 2.1145280030507307 0.0 2.1145280030507307 0.0 0.0 2.1145280030507307; -1.7407825357737137 -1.7407825357737137 -0.0 -1.7407825357737137 -0.0 -0.0 -0.0; 4.548120970699322 4.548120970699322 0.0 4.548120970699322 0.0 0.0 0.0; -1.2257166987183963 -1.2257166987183963 -0.0 -1.2257166987183963 -0.0 -0.0 -0.0; -1.2257166987183963 -1.2257166987183963 -0.0 -1.2257166987183963 -0.0 -0.0 -0.0; -0.6449075179371568 -0.6449075179371568 -0.0 -0.6449075179371568 -0.0 -0.0 -0.0; 17.819813171012125 17.819813171012125 0.0 17.819813171012125 0.0 0.0 0.0; -1.999110422648601 -1.999110422648601 -0.0 -0.0 -1.999110422648601 -0.0 -1.999110422648601; -3.9564518053768536 -3.9564518053768536 -0.0 -0.0 -3.9564518053768536 -0.0 -3.9564518053768536; -2.1216196203872557 -2.1216196203872557 -0.0 -0.0 -2.1216196203872557 -0.0 -2.1216196203872557; -3.601990642806918 -3.601990642806918 -0.0 -0.0 -3.601990642806918 -0.0 -3.601990642806918; -3.601990642806918 -3.601990642806918 -0.0 -0.0 -3.601990642806918 -0.0 -3.601990642806918; -3.8495441274063715 -3.8495441274063715 -0.0 -0.0 -3.8495441274063715 -0.0 -3.8495441274063715; -3.6199500530041027 -3.6199500530041027 -0.0 -0.0 -3.6199500530041027 -0.0 -3.6199500530041027; -3.209822061567088 -3.209822061567088 -0.0 -0.0 -3.209822061567088 -0.0 -3.209822061567088; -2.702521155801149 -2.702521155801149 -0.0 -0.0 -2.702521155801149 -0.0 -2.702521155801149; -2.921923505820458 -2.921923505820458 -0.0 -0.0 -2.921923505820458 -0.0 -0.0; -3.058405902935942 -3.058405902935942 -0.0 -0.0 -0.0 -3.058405902935942 -0.0; -3.1473667781351766 -3.1473667781351766 -0.0 -0.0 -0.0 -3.1473667781351766 -0.0; 1.4593378269316923 1.4593378269316923 0.0 0.0 0.0 1.4593378269316923 0.0; -3.7560337640183694 -3.7560337640183694 -0.0 -0.0 -0.0 -3.7560337640183694 -0.0; -3.7560337640183694 -3.7560337640183694 -0.0 -0.0 -0.0 -3.7560337640183694 -0.0; -3.8041614268127484 -3.8041614268127484 -0.0 -0.0 -0.0 -3.8041614268127484 -0.0; -1.3131162740760067 -1.3131162740760067 -0.0 -0.0 -0.0 -1.3131162740760067 -0.0; -0.18645252170591944 -0.18645252170591944 -0.0 -0.0 -0.0 -0.18645252170591944 -0.0; 1.4593378269316923 1.4593378269316923 0.0 0.0 0.0 1.4593378269316923 0.0; 8.572921389223637 8.572921389223637 0.0 0.0 0.0 8.572921389223637 0.0] +end + +@testset "GLM: with LogLink link - AnalyticWeights" begin + model = glm(@formula(Days ~ Eth + Sex + Age + Lrn), quine, (), LogLink(), wts=quine) + @test deviance(model) ≈ 624.7631999565588 + @test loglikelihood(model) ≈ -2004.5939464322778 + @test coef(model) ≈ + @test stderror(model) ≈ [0.1950707397084349, 0.13200639191036218, 0.1373161597645507, 0.2088476016141468, 0.20252412726336674, 0.21060778935484836, 0.16126722793064027] + @test aic(model) ≈ 4023.1878928645556 + @test bic(model) ≈ 4044.073139216514 + @test momentmatrix(model) ≈ [-3.866780529709063 -0.0 -3.866780529709063 -0.0 -0.0 -0.0 -3.866780529709063; -4.370085797122667 -0.0 -4.370085797122667 -0.0 -0.0 -0.0 -4.370085797122667; -3.956562495375882 -0.0 -3.956562495375882 -0.0 -0.0 -0.0 -3.956562495375882; -4.102299119258251 -0.0 -4.102299119258251 -0.0 -0.0 -0.0 -0.0; -4.102299119258251 -0.0 -4.102299119258251 -0.0 -0.0 -0.0 -0.0; -2.8243330916399567 -0.0 -2.8243330916399567 -0.0 -0.0 -0.0 -0.0; -0.7247974261272416 -0.0 -0.7247974261272416 -0.0 -0.0 -0.0 -0.0; -0.0382123316932152 -0.0 -0.0382123316932152 -0.0 -0.0 -0.0 -0.0; -3.813241073891047 -0.0 -3.813241073891047 -3.813241073891047 -0.0 -0.0 -3.813241073891047; -3.813241073891047 -0.0 -3.813241073891047 -3.813241073891047 -0.0 -0.0 -3.813241073891047; -1.593192001014045 -0.0 -1.593192001014045 -1.593192001014045 -0.0 -0.0 -1.593192001014045; -2.7127578570401822 -0.0 -2.7127578570401822 -2.7127578570401822 -0.0 -0.0 -0.0; 0.14484002662039835 0.0 0.14484002662039835 0.14484002662039835 0.0 0.0 0.0; -4.754224412754331 -0.0 -4.754224412754331 -0.0 -4.754224412754331 -0.0 -4.754224412754331; -0.6279394841753847 -0.0 -0.6279394841753847 -0.0 -0.6279394841753847 -0.0 -0.6279394841753847; 5.160032033317412 0.0 5.160032033317412 0.0 5.160032033317412 0.0 5.160032033317412; 6.363463014626628 0.0 6.363463014626628 0.0 6.363463014626628 0.0 6.363463014626628; -2.991376095035898 -0.0 -2.991376095035898 -0.0 -2.991376095035898 -0.0 -0.0; -2.492994950052581 -0.0 -2.492994950052581 -0.0 -2.492994950052581 -0.0 -0.0; -2.492994950052581 -0.0 -2.492994950052581 -0.0 -2.492994950052581 -0.0 -0.0; -2.226530220466526 -0.0 -2.226530220466526 -0.0 -2.226530220466526 -0.0 -0.0; 5.713017320814697 0.0 5.713017320814697 0.0 5.713017320814697 0.0 0.0; 6.908456992944485 0.0 6.908456992944485 0.0 6.908456992944485 0.0 0.0; 8.12839634400043 0.0 8.12839634400043 0.0 8.12839634400043 0.0 0.0; -4.628254089687799 -0.0 -4.628254089687799 -0.0 -0.0 -4.628254089687799 -0.0; -2.183958840253964 -0.0 -2.183958840253964 -0.0 -0.0 -2.183958840253964 -0.0; -2.183958840253964 -0.0 -2.183958840253964 -0.0 -0.0 -2.183958840253964 -0.0; -0.9503472567532946 -0.0 -0.9503472567532946 -0.0 -0.0 -0.9503472567532946 -0.0; 0.6731546773300909 0.0 0.6731546773300909 0.0 0.0 0.6731546773300909 0.0; 1.2423198758199778 0.0 1.2423198758199778 0.0 0.0 1.2423198758199778 0.0; 1.8236065476231822 0.0 1.8236065476231822 0.0 0.0 1.8236065476231822 0.0; -4.171836641319677 -0.0 -0.0 -0.0 -0.0 -0.0 -4.171836641319677; -3.9882995353410657 -0.0 -0.0 -0.0 -0.0 -0.0 -0.0; -3.0399730926465205 -0.0 -0.0 -0.0 -0.0 -0.0 -0.0; 1.309672612863431 0.0 0.0 0.0 0.0 0.0 0.0; 10.661189363296968 0.0 0.0 0.0 0.0 0.0 0.0; -3.7634043246260425 -0.0 -0.0 -3.7634043246260425 -0.0 -0.0 -3.7634043246260425; -3.6605640772207546 -0.0 -0.0 -3.6605640772207546 -0.0 -0.0 -3.6605640772207546; -3.6605640772207546 -0.0 -0.0 -3.6605640772207546 -0.0 -0.0 -3.6605640772207546; -3.0720683496525485 -0.0 -0.0 -3.0720683496525485 -0.0 -0.0 -3.0720683496525485; -1.885334027349047 -0.0 -0.0 -1.885334027349047 -0.0 -0.0 -1.885334027349047; 2.106807550276347 0.0 0.0 2.106807550276347 0.0 0.0 2.106807550276347; 3.0150038937286183 0.0 0.0 3.0150038937286183 0.0 0.0 3.0150038937286183; 6.387064937752826 0.0 0.0 6.387064937752826 0.0 0.0 6.387064937752826; 17.72394862307137 0.0 0.0 17.72394862307137 0.0 0.0 17.72394862307137; 18.296957173355864 0.0 0.0 18.296957173355864 0.0 0.0 18.296957173355864; -3.0375213985118954 -0.0 -0.0 -3.0375213985118954 -0.0 -0.0 -0.0; -3.0375213985118954 -0.0 -0.0 -3.0375213985118954 -0.0 -0.0 -0.0; -0.8508688349707806 -0.0 -0.0 -0.8508688349707806 -0.0 -0.0 -0.0; 2.2977798382338515 0.0 0.0 2.2977798382338515 0.0 0.0 0.0; 3.4686807301080997 0.0 0.0 3.4686807301080997 0.0 0.0 0.0; -4.658715933989554 -0.0 -0.0 -0.0 -4.658715933989554 -0.0 -4.658715933989554; -4.187227633826471 -0.0 -0.0 -0.0 -4.187227633826471 -0.0 -4.187227633826471; -4.04126740785402 -0.0 -0.0 -0.0 -4.04126740785402 -0.0 -4.04126740785402; -2.940568463040927 -0.0 -0.0 -0.0 -2.940568463040927 -0.0 -2.940568463040927; 4.342318636532548 0.0 0.0 0.0 4.342318636532548 0.0 4.342318636532548; 4.653011109293142 0.0 0.0 0.0 4.653011109293142 0.0 4.653011109293142; 8.523536317826032 0.0 0.0 0.0 8.523536317826032 0.0 8.523536317826032; 15.787943104351504 0.0 0.0 0.0 15.787943104351504 0.0 15.787943104351504; -3.6818016272511183 -0.0 -0.0 -0.0 -3.6818016272511183 -0.0 -0.0; -2.057196136670586 -0.0 -0.0 -0.0 -0.0 -2.057196136670586 -0.0; -3.834339745304657 -0.0 -0.0 -0.0 -0.0 -3.834339745304657 -0.0; -4.1780090350069425 -0.0 -0.0 -0.0 -0.0 -4.1780090350069425 -0.0; -4.491340364181187 -0.0 -0.0 -0.0 -0.0 -4.491340364181187 -0.0; -4.3190736545666875 -0.0 -0.0 -0.0 -0.0 -4.3190736545666875 -0.0; -3.731819061288569 -0.0 -0.0 -0.0 -0.0 -3.731819061288569 -0.0; -2.238272513055515 -0.0 -0.0 -0.0 -0.0 -2.238272513055515 -0.0; 1.9859737921268132 0.0 0.0 0.0 0.0 1.9859737921268132 0.0; 3.2559592797891495 0.0 0.0 0.0 0.0 3.2559592797891495 0.0; -3.8426774654770597 -3.8426774654770597 -3.8426774654770597 -0.0 -0.0 -0.0 -3.8426774654770597; -0.9876822943882244 -0.9876822943882244 -0.9876822943882244 -0.0 -0.0 -0.0 -0.9876822943882244; 23.20842027925341 23.20842027925341 23.20842027925341 0.0 0.0 0.0 23.20842027925341; -1.920845416870046 -1.920845416870046 -1.920845416870046 -0.0 -0.0 -0.0 -0.0; -1.920845416870046 -1.920845416870046 -1.920845416870046 -0.0 -0.0 -0.0 -0.0; -3.2888901738202923 -3.2888901738202923 -3.2888901738202923 -0.0 -0.0 -0.0 -0.0; -2.758113321833414 -2.758113321833414 -2.758113321833414 -0.0 -0.0 -0.0 -0.0; -1.306843142455193 -1.306843142455193 -1.306843142455193 -0.0 -0.0 -0.0 -0.0; -0.8751747276035264 -0.8751747276035264 -0.8751747276035264 -0.0 -0.0 -0.0 -0.0; -1.8877508012966644 -1.8877508012966644 -1.8877508012966644 -1.8877508012966644 -0.0 -0.0 -1.8877508012966644; -1.8877508012966644 -1.8877508012966644 -1.8877508012966644 -1.8877508012966644 -0.0 -0.0 -1.8877508012966644; -2.9308943363443847 -2.9308943363443847 -2.9308943363443847 -2.9308943363443847 -0.0 -0.0 -2.9308943363443847; -2.9308943363443847 -2.9308943363443847 -2.9308943363443847 -2.9308943363443847 -0.0 -0.0 -2.9308943363443847; -2.9308943363443847 -2.9308943363443847 -2.9308943363443847 -2.9308943363443847 -0.0 -0.0 -2.9308943363443847; -0.6051770620747217 -0.6051770620747217 -0.6051770620747217 -0.6051770620747217 -0.0 -0.0 -0.6051770620747217; 2.697606708942873 2.697606708942873 2.697606708942873 2.697606708942873 0.0 0.0 2.697606708942873; -2.628558928820721 -2.628558928820721 -2.628558928820721 -2.628558928820721 -0.0 -0.0 -0.0; -2.3542975772061085 -2.3542975772061085 -2.3542975772061085 -2.3542975772061085 -0.0 -0.0 -0.0; 0.11268811798135936 0.11268811798135936 0.11268811798135936 0.0 0.11268811798135936 0.0 0.11268811798135936; 3.1826005245112854 3.1826005245112854 3.1826005245112854 0.0 3.1826005245112854 0.0 3.1826005245112854; 5.692953263520725 5.692953263520725 5.692953263520725 0.0 5.692953263520725 0.0 5.692953263520725; -2.7839804243079254 -2.7839804243079254 -2.7839804243079254 -0.0 -2.7839804243079254 -0.0 -0.0; -1.9433894208611948 -1.9433894208611948 -1.9433894208611948 -0.0 -1.9433894208611948 -0.0 -0.0; -2.962526696741388 -2.962526696741388 -2.962526696741388 -0.0 -2.962526696741388 -0.0 -0.0; -3.4432739212266052 -3.4432739212266052 -3.4432739212266052 -0.0 -3.4432739212266052 -0.0 -0.0; -3.0516553688541084 -3.0516553688541084 -3.0516553688541084 -0.0 -3.0516553688541084 -0.0 -0.0; 0.3128048727055356 0.3128048727055356 0.3128048727055356 0.0 0.3128048727055356 0.0 0.0; 5.983398649554576 5.983398649554576 5.983398649554576 0.0 5.983398649554576 0.0 0.0; -1.9961184031161041 -1.9961184031161041 -1.9961184031161041 -0.0 -0.0 -1.9961184031161041 -0.0; 4.212201806010905 4.212201806010905 4.212201806010905 0.0 0.0 4.212201806010905 0.0; -3.152192412974143 -3.152192412974143 -3.152192412974143 -0.0 -0.0 -3.152192412974143 -0.0; -2.03792823060008 -2.03792823060008 -2.03792823060008 -0.0 -0.0 -2.03792823060008 -0.0; 2.9007973162738843 2.9007973162738843 2.9007973162738843 0.0 0.0 2.9007973162738843 0.0; 9.364366020386104 9.364366020386104 9.364366020386104 0.0 0.0 9.364366020386104 0.0; 24.059031354439128 24.059031354439128 24.059031354439128 0.0 0.0 24.059031354439128 0.0; 2.864621620127876 2.864621620127876 0.0 0.0 0.0 0.0 2.864621620127876; -1.374372490365048 -1.374372490365048 -0.0 -0.0 -0.0 -0.0 -0.0; -0.9287032240778311 -0.9287032240778311 -0.0 -0.0 -0.0 -0.0 -0.0; 3.919550403175515 3.919550403175515 0.0 0.0 0.0 0.0 0.0; 12.426707944681816 12.426707944681816 0.0 0.0 0.0 0.0 0.0; -2.750339462985501 -2.750339462985501 -0.0 -2.750339462985501 -0.0 -0.0 -2.750339462985501; -2.0720837572297617 -2.0720837572297617 -0.0 -2.0720837572297617 -0.0 -0.0 -2.0720837572297617; -1.8681224147832116 -1.8681224147832116 -0.0 -1.8681224147832116 -0.0 -0.0 -1.8681224147832116; -2.778411659017331 -2.778411659017331 -0.0 -2.778411659017331 -0.0 -0.0 -2.778411659017331; -2.750339462985501 -2.750339462985501 -0.0 -2.750339462985501 -0.0 -0.0 -2.750339462985501; -2.750339462985501 -2.750339462985501 -0.0 -2.750339462985501 -0.0 -0.0 -2.750339462985501; -2.750339462985501 -2.750339462985501 -0.0 -2.750339462985501 -0.0 -0.0 -2.750339462985501; -2.750339462985501 -2.750339462985501 -0.0 -2.750339462985501 -0.0 -0.0 -2.750339462985501; -2.0720837572297617 -2.0720837572297617 -0.0 -2.0720837572297617 -0.0 -0.0 -2.0720837572297617; -0.18952790670731487 -0.18952790670731487 -0.0 -0.18952790670731487 -0.0 -0.0 -0.18952790670731487; 2.1145280030507307 2.1145280030507307 0.0 2.1145280030507307 0.0 0.0 2.1145280030507307; -1.7407825357737137 -1.7407825357737137 -0.0 -1.7407825357737137 -0.0 -0.0 -0.0; 4.548120970699322 4.548120970699322 0.0 4.548120970699322 0.0 0.0 0.0; -1.2257166987183963 -1.2257166987183963 -0.0 -1.2257166987183963 -0.0 -0.0 -0.0; -1.2257166987183963 -1.2257166987183963 -0.0 -1.2257166987183963 -0.0 -0.0 -0.0; -0.6449075179371568 -0.6449075179371568 -0.0 -0.6449075179371568 -0.0 -0.0 -0.0; 17.819813171012125 17.819813171012125 0.0 17.819813171012125 0.0 0.0 0.0; -1.999110422648601 -1.999110422648601 -0.0 -0.0 -1.999110422648601 -0.0 -1.999110422648601; -3.9564518053768536 -3.9564518053768536 -0.0 -0.0 -3.9564518053768536 -0.0 -3.9564518053768536; -2.1216196203872557 -2.1216196203872557 -0.0 -0.0 -2.1216196203872557 -0.0 -2.1216196203872557; -3.601990642806918 -3.601990642806918 -0.0 -0.0 -3.601990642806918 -0.0 -3.601990642806918; -3.601990642806918 -3.601990642806918 -0.0 -0.0 -3.601990642806918 -0.0 -3.601990642806918; -3.8495441274063715 -3.8495441274063715 -0.0 -0.0 -3.8495441274063715 -0.0 -3.8495441274063715; -3.6199500530041027 -3.6199500530041027 -0.0 -0.0 -3.6199500530041027 -0.0 -3.6199500530041027; -3.209822061567088 -3.209822061567088 -0.0 -0.0 -3.209822061567088 -0.0 -3.209822061567088; -2.702521155801149 -2.702521155801149 -0.0 -0.0 -2.702521155801149 -0.0 -2.702521155801149; -2.921923505820458 -2.921923505820458 -0.0 -0.0 -2.921923505820458 -0.0 -0.0; -3.058405902935942 -3.058405902935942 -0.0 -0.0 -0.0 -3.058405902935942 -0.0; -3.1473667781351766 -3.1473667781351766 -0.0 -0.0 -0.0 -3.1473667781351766 -0.0; 1.4593378269316923 1.4593378269316923 0.0 0.0 0.0 1.4593378269316923 0.0; -3.7560337640183694 -3.7560337640183694 -0.0 -0.0 -0.0 -3.7560337640183694 -0.0; -3.7560337640183694 -3.7560337640183694 -0.0 -0.0 -0.0 -3.7560337640183694 -0.0; -3.8041614268127484 -3.8041614268127484 -0.0 -0.0 -0.0 -3.8041614268127484 -0.0; -1.3131162740760067 -1.3131162740760067 -0.0 -0.0 -0.0 -1.3131162740760067 -0.0; -0.18645252170591944 -0.18645252170591944 -0.0 -0.0 -0.0 -0.18645252170591944 -0.0; 1.4593378269316923 1.4593378269316923 0.0 0.0 0.0 1.4593378269316923 0.0; 8.572921389223637 8.572921389223637 0.0 0.0 0.0 8.572921389223637 0.0] +end + +@testset "GLM: with SqrtLink link - AnalyticWeights" begin + model = glm(@formula(Days ~ Eth + Sex + Age + Lrn), quine, (), SqrtLink(), wts=quine) + @test deviance(model) ≈ 626.6464732988984 + @test loglikelihood(model) ≈ -2005.5355831034462 + @test coef(model) ≈ + @test stderror(model) ≈ [0.42307979153860564, 0.286636744566765, 0.29612422536777805, 0.42042723748229144, 0.45565954626859695, 0.4766324296069839, 0.3235019638755972] + @test aic(model) ≈ 4025.0711662068925 + @test bic(model) ≈ 4045.956412558851 + @test momentmatrix(model) ≈ [-1.4294351675636041 -0.0 -1.4294351675636041 -0.0 -0.0 -0.0 -1.4294351675636041; -1.5410055711037194 -0.0 -1.5410055711037194 -0.0 -0.0 -0.0 -1.5410055711037194; -1.3571249039047424 -0.0 -1.3571249039047424 -0.0 -0.0 -0.0 -1.3571249039047424; -1.7394058711709879 -0.0 -1.7394058711709879 -0.0 -0.0 -0.0 -0.0; -1.7394058711709879 -0.0 -1.7394058711709879 -0.0 -0.0 -0.0 -0.0; -1.229734152157926 -0.0 -1.229734152157926 -0.0 -0.0 -0.0 -0.0; -0.3742348640443611 -0.0 -0.3742348640443611 -0.0 -0.0 -0.0 -0.0; -0.09370480172054219 -0.0 -0.09370480172054219 -0.0 -0.0 -0.0 -0.0; -1.7293809063089827 -0.0 -1.7293809063089827 -1.7293809063089827 -0.0 -0.0 -1.7293809063089827; -1.7293809063089827 -0.0 -1.7293809063089827 -1.7293809063089827 -0.0 -0.0 -1.7293809063089827; -0.6748210571645206 -0.0 -0.6748210571645206 -0.6748210571645206 -0.0 -0.0 -0.6748210571645206; -1.5016227445218024 -0.0 -1.5016227445218024 -1.5016227445218024 -0.0 -0.0 -0.0; -0.058778966482651636 -0.0 -0.058778966482651636 -0.058778966482651636 -0.0 -0.0 -0.0; -1.6582836355486288 -0.0 -1.6582836355486288 -0.0 -1.6582836355486288 -0.0 -1.6582836355486288; 0.11341508381030255 0.0 0.11341508381030255 0.0 0.11341508381030255 0.0 0.11341508381030255; 2.4651888863431344 0.0 2.4651888863431344 0.0 2.4651888863431344 0.0 2.4651888863431344; 2.9517152556309942 0.0 2.9517152556309942 0.0 2.9517152556309942 0.0 2.9517152556309942; -1.2288386266845785 -0.0 -1.2288386266845785 -0.0 -1.2288386266845785 -0.0 -0.0; -1.0325293533841053 -0.0 -1.0325293533841053 -0.0 -1.0325293533841053 -0.0 -0.0; -1.0325293533841053 -0.0 -1.0325293533841053 -0.0 -1.0325293533841053 -0.0 -0.0; -0.9274622643228648 -0.0 -0.9274622643228648 -0.0 -0.9274622643228648 -0.0 -0.0; 2.212861910664014 0.0 2.212861910664014 0.0 2.212861910664014 0.0 0.0; 2.6862849076558937 0.0 2.6862849076558937 0.0 2.6862849076558937 0.0 0.0; 3.1694781034873523 0.0 3.1694781034873523 0.0 3.1694781034873523 0.0 0.0; -1.6534192741665588 -0.0 -1.6534192741665588 -0.0 -0.0 -1.6534192741665588 -0.0; -0.702446330017668 -0.0 -0.702446330017668 -0.0 -0.0 -0.702446330017668 -0.0; -0.702446330017668 -0.0 -0.702446330017668 -0.0 -0.0 -0.702446330017668 -0.0; -0.23123674216762394 -0.0 -0.23123674216762394 -0.0 -0.0 -0.23123674216762394 -0.0; 0.3871584524726257 0.0 0.3871584524726257 0.0 0.0 0.3871584524726257 0.0; 0.6036586921589513 0.0 0.6036586921589513 0.0 0.0 0.6036586921589513 0.0; 0.8246522973739006 0.0 0.8246522973739006 0.0 0.0 0.8246522973739006 0.0; -1.560441651521342 -0.0 -0.0 -0.0 -0.0 -0.0 -1.560441651521342; -1.7419685003857353 -0.0 -0.0 -0.0 -0.0 -0.0 -0.0; -1.4153955925789807 -0.0 -0.0 -0.0 -0.0 -0.0 -0.0; 0.23770439864218734 0.0 0.0 0.0 0.0 0.0 0.0; 3.853247675936175 0.0 0.0 0.0 0.0 0.0 0.0; -1.7692672149493731 -0.0 -0.0 -1.7692672149493731 -0.0 -0.0 -1.7692672149493731; -1.7282440188407218 -0.0 -0.0 -1.7282440188407218 -0.0 -0.0 -1.7282440188407218; -1.7282440188407218 -0.0 -0.0 -1.7282440188407218 -0.0 -0.0 -1.7282440188407218; -1.4769837741682987 -0.0 -0.0 -1.4769837741682987 -0.0 -0.0 -1.4769837741682987; -0.9582774689727417 -0.0 -0.0 -0.9582774689727417 -0.0 -0.0 -0.9582774689727417; 0.8052632685284861 0.0 0.0 0.8052632685284861 0.0 0.0 0.8052632685284861; 1.2077994352773953 0.0 0.0 1.2077994352773953 0.0 0.0 1.2077994352773953; 2.7042310768987665 0.0 0.0 2.7042310768987665 0.0 0.0 2.7042310768987665; 7.744950633464035 0.0 0.0 7.744950633464035 0.0 0.0 7.744950633464035; 7.999933021719232 0.0 0.0 7.999933021719232 0.0 0.0 7.999933021719232; -1.7392669278461907 -0.0 -0.0 -1.7392669278461907 -0.0 -0.0 -0.0; -1.7392669278461907 -0.0 -0.0 -1.7392669278461907 -0.0 -0.0 -0.0; -0.7262212443523531 -0.0 -0.0 -0.7262212443523531 -0.0 -0.0 -0.0; 0.7835691668207807 0.0 0.0 0.7835691668207807 0.0 0.0 0.0; 1.3489349925379315 0.0 0.0 1.3489349925379315 0.0 0.0 0.0; -1.6522100272913283 -0.0 -0.0 -0.0 -1.6522100272913283 -0.0 -1.6522100272913283; -1.4590418232851277 -0.0 -0.0 -0.0 -1.4590418232851277 -0.0 -1.4590418232851277; -1.4015111702500997 -0.0 -0.0 -0.0 -1.4015111702500997 -0.0 -1.4015111702500997; -0.9738202253475602 -0.0 -0.0 -0.0 -0.9738202253475602 -0.0 -0.9738202253475602; 1.8091899079230156 0.0 0.0 0.0 1.8091899079230156 0.0 1.8091899079230156; 1.9274245415701026 0.0 0.0 0.0 1.9274245415701026 0.0 1.9274245415701026; 3.399094699981504 0.0 0.0 0.0 3.399094699981504 0.0 3.399094699981504; 6.157344170373497 0.0 0.0 0.0 6.157344170373497 0.0 6.157344170373497; -1.5082203700488148 -0.0 -0.0 -0.0 -1.5082203700488148 -0.0 -0.0; -0.7518968254083281 -0.0 -0.0 -0.0 -0.0 -0.7518968254083281 -0.0; -1.403623374340758 -0.0 -0.0 -0.0 -0.0 -1.403623374340758 -0.0; -1.5307566638052945 -0.0 -0.0 -0.0 -0.0 -1.5307566638052945 -0.0; -1.6487615285777935 -0.0 -0.0 -0.0 -0.0 -1.6487615285777935 -0.0; -1.5960112869101046 -0.0 -0.0 -0.0 -0.0 -1.5960112869101046 -0.0; -1.3904968459197917 -0.0 -0.0 -0.0 -0.0 -1.3904968459197917 -0.0; -0.8618818687491527 -0.0 -0.0 -0.0 -0.0 -0.8618818687491527 -0.0; 0.6414580291618693 0.0 0.0 0.0 0.0 0.6414580291618693 0.0; 1.0942097094869556 0.0 0.0 0.0 0.0 1.0942097094869556 0.0; -1.7282583231217719 -1.7282583231217719 -1.7282583231217719 -0.0 -0.0 -0.0 -1.7282583231217719; -0.31744768418403196 -0.31744768418403196 -0.31744768418403196 -0.0 -0.0 -0.0 -0.31744768418403196; 11.375280355235768 11.375280355235768 11.375280355235768 0.0 0.0 0.0 11.375280355235768; -1.0256901959548927 -1.0256901959548927 -1.0256901959548927 -0.0 -0.0 -0.0 -0.0; -1.0256901959548927 -1.0256901959548927 -1.0256901959548927 -0.0 -0.0 -0.0 -0.0; -1.7598032161223125 -1.7598032161223125 -1.7598032161223125 -0.0 -0.0 -0.0 -0.0; -1.491030768800334 -1.491030768800334 -1.491030768800334 -0.0 -0.0 -0.0 -0.0; -0.7301769140610584 -0.7301769140610584 -0.7301769140610584 -0.0 -0.0 -0.0 -0.0; -0.5034045168716083 -0.5034045168716083 -0.5034045168716083 -0.0 -0.0 -0.0 -0.0; -1.113506915630734 -1.113506915630734 -1.113506915630734 -1.113506915630734 -0.0 -0.0 -1.113506915630734; -1.113506915630734 -1.113506915630734 -1.113506915630734 -1.113506915630734 -0.0 -0.0 -1.113506915630734; -1.6237007081122545 -1.6237007081122545 -1.6237007081122545 -1.6237007081122545 -0.0 -0.0 -1.6237007081122545; -1.6237007081122545 -1.6237007081122545 -1.6237007081122545 -1.6237007081122545 -0.0 -0.0 -1.6237007081122545; -1.6237007081122545 -1.6237007081122545 -1.6237007081122545 -1.6237007081122545 -0.0 -0.0 -1.6237007081122545; -0.07026189294137537 -0.07026189294137537 -0.07026189294137537 -0.07026189294137537 -0.0 -0.0 -0.07026189294137537; 2.0844355685058127 2.0844355685058127 2.0844355685058127 2.0844355685058127 0.0 0.0 2.0844355685058127; -1.7313903927438412 -1.7313903927438412 -1.7313903927438412 -1.7313903927438412 -0.0 -0.0 -0.0; -1.480745232754872 -1.480745232754872 -1.480745232754872 -1.480745232754872 -0.0 -0.0 -0.0; 0.21539031393949493 0.21539031393949493 0.21539031393949493 0.0 0.21539031393949493 0.0 0.21539031393949493; 1.6360787089859707 1.6360787089859707 1.6360787089859707 0.0 1.6360787089859707 0.0 1.6360787089859707; 2.7952193074887086 2.7952193074887086 2.7952193074887086 0.0 2.7952193074887086 0.0 2.7952193074887086; -1.448364418208364 -1.448364418208364 -1.448364418208364 -0.0 -1.448364418208364 -0.0 -0.0; -0.9833503482488964 -0.9833503482488964 -0.9833503482488964 -0.0 -0.9833503482488964 -0.0 -0.0; -1.5017276161539084 -1.5017276161539084 -1.5017276161539084 -0.0 -1.5017276161539084 -0.0 -0.0; -1.7640356839137032 -1.7640356839137032 -1.7640356839137032 -0.0 -1.7640356839137032 -0.0 -0.0; -1.5776069676233444 -1.5776069676233444 -1.5776069676233444 -0.0 -1.5776069676233444 -0.0 -0.0; 0.06361165131312438 0.06361165131312438 0.06361165131312438 0.0 0.06361165131312438 0.0 0.0; 2.8475608847598153 2.8475608847598153 2.8475608847598153 0.0 2.8475608847598153 0.0 0.0; -0.8892460264142052 -0.8892460264142052 -0.8892460264142052 -0.0 -0.0 -0.8892460264142052 -0.0; 1.7743695974457907 1.7743695974457907 1.7743695974457907 0.0 0.0 1.7743695974457907 0.0; -1.4305200814192562 -1.4305200814192562 -1.4305200814192562 -0.0 -0.0 -1.4305200814192562 -0.0; -0.9478929479399423 -0.9478929479399423 -0.9478929479399423 -0.0 -0.0 -0.9478929479399423 -0.0; 1.2024302930353608 1.2024302930353608 1.2024302930353608 0.0 0.0 1.2024302930353608 0.0; 4.02280289664674 4.02280289664674 4.02280289664674 0.0 0.0 4.02280289664674 0.0; 10.440933185941839 10.440933185941839 10.440933185941839 0.0 0.0 10.440933185941839 0.0; 1.262517093518885 1.262517093518885 0.0 0.0 0.0 0.0 1.262517093518885; -0.9176184029771589 -0.9176184029771589 -0.0 -0.0 -0.0 -0.0 -0.0; -0.6982138187318754 -0.6982138187318754 -0.0 -0.0 -0.0 -0.0 -0.0; 1.7133696015602422 1.7133696015602422 0.0 0.0 0.0 0.0 0.0; 5.976953806399672 5.976953806399672 0.0 0.0 0.0 0.0 0.0; -1.6123792319065735 -1.6123792319065735 -0.0 -1.6123792319065735 -0.0 -0.0 -1.6123792319065735; -1.1866621929181271 -1.1866621929181271 -0.0 -1.1866621929181271 -0.0 -0.0 -1.1866621929181271; -1.1194589330024307 -1.1194589330024307 -0.0 -1.1194589330024307 -0.0 -0.0 -1.1194589330024307; -1.6605118926433484 -1.6605118926433484 -0.0 -1.6605118926433484 -0.0 -0.0 -1.6605118926433484; -1.6123792319065735 -1.6123792319065735 -0.0 -1.6123792319065735 -0.0 -0.0 -1.6123792319065735; -1.6123792319065735 -1.6123792319065735 -0.0 -1.6123792319065735 -0.0 -0.0 -1.6123792319065735; -1.6123792319065735 -1.6123792319065735 -0.0 -1.6123792319065735 -0.0 -0.0 -1.6123792319065735; -1.6123792319065735 -1.6123792319065735 -0.0 -1.6123792319065735 -0.0 -0.0 -1.6123792319065735; -1.1866621929181271 -1.1866621929181271 -0.0 -1.1866621929181271 -0.0 -0.0 -1.1866621929181271; -0.016084003453250676 -0.016084003453250676 -0.0 -0.016084003453250676 -0.0 -0.0 -0.016084003453250676; 1.4107278812149031 1.4107278812149031 0.0 1.4107278812149031 0.0 0.0 1.4107278812149031; -1.1128985115655265 -1.1128985115655265 -0.0 -1.1128985115655265 -0.0 -0.0 -0.0; 3.7957001151581404 3.7957001151581404 0.0 3.7957001151581404 0.0 0.0 0.0; -0.7046958095802869 -0.7046958095802869 -0.0 -0.7046958095802869 -0.0 -0.0 -0.0; -0.7046958095802869 -0.7046958095802869 -0.0 -0.7046958095802869 -0.0 -0.0 -0.0; -0.2475403067755282 -0.2475403067755282 -0.0 -0.2475403067755282 -0.0 -0.0 -0.0; 14.054845699928913 14.054845699928913 0.0 14.054845699928913 0.0 0.0 0.0; -0.8850373634971601 -0.8850373634971601 -0.0 -0.0 -0.8850373634971601 -0.0 -0.8850373634971601; -1.7594068536637126 -1.7594068536637126 -0.0 -0.0 -1.7594068536637126 -0.0 -1.7594068536637126; -0.9681259531090506 -0.9681259531090506 -0.0 -0.0 -0.9681259531090506 -0.0 -0.9681259531090506; -1.5970364987524888 -1.5970364987524888 -0.0 -0.0 -1.5970364987524888 -0.0 -1.5970364987524888; -1.5970364987524888 -1.5970364987524888 -0.0 -0.0 -1.5970364987524888 -0.0 -1.5970364987524888; -1.7082890535667876 -1.7082890535667876 -0.0 -0.0 -1.7082890535667876 -0.0 -1.7082890535667876; -1.6168827210404924 -1.6168827210404924 -0.0 -0.0 -1.6168827210404924 -0.0 -1.6168827210404924; -1.4399676449006795 -1.4399676449006795 -0.0 -0.0 -1.4399676449006795 -0.0 -1.4399676449006795; -1.2202487676722908 -1.2202487676722908 -0.0 -0.0 -1.2202487676722908 -0.0 -1.2202487676722908; -1.5079358693315765 -1.5079358693315765 -0.0 -0.0 -1.5079358693315765 -0.0 -0.0; -1.3842064467607202 -1.3842064467607202 -0.0 -0.0 -0.0 -1.3842064467607202 -0.0; -1.5208922216041325 -1.5208922216041325 -0.0 -0.0 -0.0 -1.5208922216041325 -0.0; 0.3453894161447818 0.3453894161447818 0.0 0.0 0.0 0.3453894161447818 0.0; -1.717557999730545 -1.717557999730545 -0.0 -0.0 -0.0 -1.717557999730545 -0.0; -1.717557999730545 -1.717557999730545 -0.0 -0.0 -0.0 -1.717557999730545 -0.0; -1.76269912849327 -1.76269912849327 -0.0 -0.0 -0.0 -1.76269912849327 -0.0; -0.7863622513628796 -0.7863622513628796 -0.0 -0.0 -0.0 -0.7863622513628796 -0.0; -0.32795262618891574 -0.32795262618891574 -0.0 -0.0 -0.0 -0.32795262618891574 -0.0; 0.3453894161447818 0.3453894161447818 0.0 0.0 0.0 0.3453894161447818 0.0; 3.2758115948278728 3.2758115948278728 0.0 0.0 0.0 3.2758115948278728 0.0] +end + diff --git a/test/runtests.jl b/test/runtests.jl index 7568c223..a1cc0183 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -81,8 +81,6 @@ end t_lm_colli_b = lm(@formula(Y ~ XC), st_df, dropcollinear=true) @test isapprox(cooksdistance(t_lm_colli), cooksdistance(t_lm_colli_b)) - - end @testset "linear model with weights" begin @@ -110,8 +108,8 @@ end @test isa(weights(lm_model), FrequencyWeights) @test isa(weights(glm_model), FrequencyWeights) - lm_model = lm(f, df, wts = iweights(df.weights)) - glm_model = glm(f, df, Normal(), wts = iweights(df.weights)) + lm_model = lm(f, df, wts = aweights(df.weights)) + glm_model = glm(f, df, Normal(), wts = aweights(df.weights)) @test isapprox(coef(lm_model), [154.35104595140706, 0.4836896390157505]) @test isapprox(coef(glm_model), [154.35104595140706, 0.4836896390157505]) @test isapprox(stderror(lm_model), [16.297055281313032, 0.014186793927918842]) @@ -193,10 +191,10 @@ end @testset "Passing wts (depwarn)" begin df = DataFrame(x=["a", "b", "c"], y=[1, 2, 3], wts = [3,3,3]) @test_logs (:warn, "Passing weights as vector is deprecated in favor of explicitly using " * - "`ImportanceWeights`, `ProbabilityWeights`, or `FrequencyWeights`. Proceeding " * + "`AnalyticWeights`, `ProbabilityWeights`, or `FrequencyWeights`. Proceeding " * "by coercing `wts` to `FrequencyWeights`") lm(@formula(y~x), df; wts=df.wts) @test_logs (:warn, "Passing weights as vector is deprecated in favor of explicitly using " * - "`ImportanceWeights`, `ProbabilityWeights`, or `FrequencyWeights`. Proceeding " * + "`AnalyticWeights`, `ProbabilityWeights`, or `FrequencyWeights`. Proceeding " * "by coercing `wts` to `FrequencyWeights`") glm(@formula(y~x), df, Normal(), IdentityLink(); wts=df.wts) end @@ -610,10 +608,10 @@ admit_agr = DataFrame(count = [28., 97, 93, 55, 33, 54, 28, 12], end -@testset "Aggregated Binomial LogitLink (ImportanceWeights)" begin +@testset "Aggregated Binomial LogitLink (AnalyticWeights)" begin for distr in (Binomial, Bernoulli) gm14 = fit(GeneralizedLinearModel, @formula(admit ~ 1 + rank), admit_agr, distr(), - wts=iweights(admit_agr.count)) + wts=aweights(admit_agr.count)) @test dof(gm14) == 4 @test nobs(gm14) == 8 @test isapprox(deviance(gm14), 474.9667184280627) @@ -908,10 +906,10 @@ end @test isapprox(Matrix(modelmatrix(gmsparse; weighted=true)), modelmatrix(gmdense; weighted=true)) end - gmsparsev = [fit(LinearModel, X, y; wts=iweights(wts)), - fit(LinearModel, X, sparse(y); wts=iweights(wts)), - fit(LinearModel, Matrix(X), sparse(y); wts=iweights(wts))] - gmdense = fit(LinearModel, Matrix(X), y; wts=iweights(wts)) + gmsparsev = [fit(LinearModel, X, y; wts=aweights(wts)), + fit(LinearModel, X, sparse(y); wts=aweights(wts)), + fit(LinearModel, Matrix(X), sparse(y); wts=aweights(wts))] + gmdense = fit(LinearModel, Matrix(X), y; wts=aweights(wts)) for gmsparse in gmsparsev @test isapprox(deviance(gmsparse), deviance(gmdense)) @@ -1619,7 +1617,7 @@ end 0.5 0.0 0.0 0.5] @test mm0_bin ≈ GLM.momentmatrix(gm_bin) - gm_binw = fit(GeneralizedLinearModel, f, admit_agr, Binomial(), wts=iweights(admit_agr.count)) + gm_binw = fit(GeneralizedLinearModel, f, admit_agr, Binomial(), wts=aweights(admit_agr.count)) mm0_binw = [-15.1475 -0.0 -0.0 -0.0 -34.6887 -34.6887 -0.0 -0.0 -21.5207 -0.0 -21.5207 -0.0 @@ -1654,4 +1652,60 @@ end stata_se = [.25693835, .30796933, .33538667, .4093073] @test stderror(gm_binw) ≈ stata_se atol = 0.001 end -end \ No newline at end of file + + @testset "Binomial ProbitLink" begin + f = @formula(admit ~ 1 + rank) + + gm_bin = fit(GeneralizedLinearModel, f, admit_agr, Binomial(), ProbitLink()) + mm0_bin = [-0.7978846 0.0000000 0.0000000 0.0000000 + -0.7978846 -0.7978846 0.0000000 0.0000000 + -0.7978846 0.0000000 -0.7978846 0.0000000 + -0.7978846 0.0000000 0.0000000 -0.7978846 + 0.7978846 0.0000000 0.0000000 0.0000000 + 0.7978846 0.7978846 0.0000000 0.0000000 + 0.7978846 0.0000000 0.7978846 0.0000000 + 0.7978846 0.0000000 0.0000000 0.7978846] + @test mm0_bin ≈ GLM.momentmatrix(gm_bin) rtol=1e-06 + + gm_binw = fit(GeneralizedLinearModel, f, admit_agr, Binomial(), ProbitLink(), wts=aweights(admit_agr.count)) + + mm0_binw = [ -24.20695 0.00000 0.00000 0.00000 + -56.36158 -56.36158 0.00000 0.00000 + -36.86681 0.00000 -36.86681 0.00000 + -17.52584 0.00000 0.00000 -17.52584 + 24.20695 0.00000 0.00000 0.00000 + 56.36158 56.36158 0.00000 0.00000 + 36.86681 0.00000 36.86681 0.00000 + 17.52584 0.00000 0.00000 17.52584] + + @test mm0_binw ≈ GLM.momentmatrix(gm_binw) rtol=1e-05 + + Vcov =[ 0.02585008 -0.02585008 -0.02585008 -0.02585008 + -0.02585008 0.03677007 0.02585008 0.02585008 + -0.02585008 0.02585008 0.04168393 0.02585008 + -0.02585008 0.02585008 0.02585008 0.05792112] + + ## This is due to divverences between chol and qr + @test vcov(gm_binw) ≈ Vcov rtol=1e-06 + + gm_binw = fit(GeneralizedLinearModel, f, admit_agr, Binomial(), ProbitLink(), wts=pweights(admit_agr.count)) + @test mm0_binw ≈ GLM.momentmatrix(gm_binw) rtol=1e-05 + ## This are obtained from stata + ## glm admit i.rank [pweight=count], family(binomial) irls + #coef_stata = [] + #@test coef(gm_binw) ≈ coef_stata rtol=1e-05 + ## Stata: uses different residuals degrees of freedom. In this case (n-1) instead of (n-4) + ## Also need to give low tolerance (this small differences seem to be due to QR vs Cholesky) + #@test stderror(gm_binw)*sqrt(5/7) ≈ [] atol=1e-02 + + ## Stata is also off with fweights + gm_binw = fit(GeneralizedLinearModel, f, admit_agr, Binomial(), wts=fweights(admit_agr.count)) + ## vs Stata (here stata uses the same df) + stata_se = [.25693835, .30796933, .33538667, .4093073] + @test stderror(gm_binw) ≈ stata_se rtol = 1e-03 + end + + +end + +include("analytic_weights.jl") \ No newline at end of file From 3fe045ac8a6cdf6f18addd6beffdad06fa770a6f Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Mon, 24 Oct 2022 23:07:44 +0200 Subject: [PATCH 064/106] Add extensive tests for AnalyticWeights --- test/analytic_weights.jl | 738 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 738 insertions(+) create mode 100644 test/analytic_weights.jl diff --git a/test/analytic_weights.jl b/test/analytic_weights.jl new file mode 100644 index 00000000..fe969c86 --- /dev/null +++ b/test/analytic_weights.jl @@ -0,0 +1,738 @@ +rng = StableRNG(123) + +x1 = rand(rng, 25) +x2 = ifelse.(randn(rng, 25) .> 0, 1, 0) + +y = ifelse.(0.004 .- 0.01 .* x1 .+ 1.5 .* x2 .+ randn(rng, 25) .> 0, 1, 0) +w = rand(rng, 25) * 6 +w = floor.(w) .+ 1 + + +df = DataFrame(y=y, x1=x1, x2=x2, w=w) + +clotting = DataFrame(u=log.([5, 10, 15, 20, 30, 40, 60, 80, 100]), + lot1=[118, 58, 42, 35, 27, 25, 21, 19, 18], + w=[1.5, 2.0, 1.1, 4.5, 2.4, 3.5, 5.6, 5.4, 6.7]) + +quine.aweights = log.(3 .+ 3 .* quine.Days) +quine.pweights = 1.0 ./ (quine.aweights ./ sum(quine.aweights)) +quine.fweights = floor.(quine.aweights) + +dobson = DataFrame(Counts=[18.0, 17, 15, 20, 10, 20, 25, 13, 12], + Outcome=categorical(repeat(string.('A':'C'), outer=3)), + Treatment=categorical(repeat(string.('a':'c'), inner=3)), + w=[1, 2, 1, 2, 3, 4, 3, 2, 1] +) + + +@testset "GLM: Binomial with LogitLink link - AnalyticWeights" begin + model = glm(@formula(y ~ 1 + x1 + x2), df, Binomial(), LogitLink(), wts=aweights(df.w), atol=1e-08,rtol=1e-08) + @test deviance(model) ≈ 39.58120350785813 rtol = 1e-06 + @test loglikelihood(model) ≈ -19.79060175392906 rtol = 1e-06 + @test coef(model) ≈ [0.6333582770515337, 1.8861277804531265, 18.61281712203539] rtol = 1e-06 + @test stderror(model) ≈ [0.9021013750843575, 2.063002891039618, 2337.217357530545] rtol = 1e-07 + @test aic(model) ≈ 45.58120350785812 rtol = 1e-07 + @test bic(model) ≈ 49.237830982462725 rtol = 1e-07 + @test GLM.momentmatrix(model) ≈ [1.095702695280752 0.1983501744547734 0.0; + 0.6292210259386299 0.2312324009639611 0.0; + -0.869357286789858 -0.5816508224007081 -0.0; + 0.3287258318630744 0.0140466407925222 0.0; + 2.08484144507e-8 9.1314170785019e-9 2.085e-8; + 0.5274699949608909 0.2549210423027525 0.0; + -0.8883810924640739 -0.678699816121261 -0.0; + 0.169878909537636 0.15705018022658265 0.0; + 0.3346238606654708 0.1723463870596023 0.0; + 0.17263445723951362 0.085461258206114 0.0; + 1.90303118479143e-8 1.3346718776025414e-8 1.90e-8; + 1.60927355387381e-8 1.1161427644959331e-8 1.61e-8; + 2.2803477805569965e-9 1.9982067289774133e-9 2.28e-9; + 6.018177387787048e-9 5.682479294453922e-9 6.02e-9; + 1.4765287153134531e-8 1.0914686191045351e-8 1.48e-8; + 2.0721163707495327e-8 1.1594416669570361e-8 2.07e-8; + 0.8473483434776684 0.4295002258959193 0.0; + 0.5426271690329769 0.35055009398999054 0.0; + -4.541414638754089 -1.2097095921684677 -0.0; + 1.2385948537889822 0.31353946330992544 0.0; + 3.067144219729067e-8 1.078462894246146e-8 3.07e-8; + 1.7613428517425065e-8 9.289130102493344e-9 1.76e-8; + 1.8334365002191494e-8 1.3220774075528698e-8 1.83e-8; + 1.6687848932140258e-8 3.1458514759844027e-9 1.67e-8; + 0.4123258762224241 0.2630623634882926 0.0] rtol = 1e-07 +end + +@testset "GLM: Binomial with ProbitLink link - AnalyticWeights" begin + model = glm(@formula(y ~ 1 + x1 + x2), df, Binomial(), ProbitLink(), wts=aweights(df.w), rtol=1e-09) + @test deviance(model) ≈ 39.595360462143866 rtol = 1e-06 + @test loglikelihood(model) ≈ -19.797680231071933 rtol = 1e-06 + @test coef(model) ≈ [0.42120722997197313, 1.0416447141541567, 4.916910225354065] rtol = 1e-07 + @test stderror(model) ≈ [0.5216506352923727, 1.1455457218079563, 325.2782732702344] rtol = 1e-07 + @test aic(model) ≈ 45.595360462143866 rtol = 1e-07 + @test bic(model) ≈ 49.25198793674846 rtol = 1e-07 + @test GLM.momentmatrix(model) ≈ [1.8176341588673794 0.32903820904987535 0.0; + 1.0975399310212473 0.40333489019264895 0.0; + -1.6205390909958874 -1.0842353418245372 -0.0; + 0.5269343763678408 0.02251620404797942 0.0; + 2.1411260807372573e-7 9.377938695085409e-8 2.14e-7; + 0.9490575664910063 0.45867015444762754 0.0; + -1.7015508605205314 -1.2999401562600912 -0.0; + 0.33388087355781 0.3086672236664311 0.0; + 0.6070565699014323 0.31266152495891864 0.0; + 0.3115689943654785 0.15423965008066182 0.0; + 6.62696579317951e-8 4.647756142241091e-8 6.63e-8; + 5.7919749595281985e-8 4.0171361342870654e-8 5.79e-8; + 3.7134640590626956e-9 3.2540075395089014e-9 3.71e-9; + 7.229998935730756e-9 6.826704599068171e-9 7.23e-9; + 4.373923181354305e-8 3.233259092972413e-8 4.37e-8; + 1.3039185369174096e-7 7.296006649823636e-8 1.30e-7; + 1.5339832954887218 0.7775387501543354 0.0; + 1.016200760890786 0.6564899300523522 0.0; + -7.736929921295398 -2.060908127581581 -0.0; + 2.0943898311662204 0.5301764831468441 0.0; + 4.4180333118496e-7 1.553459717259102e-7 4.42e-7; + 1.2636718773015955e-7 6.664467660855266e-8 1.26e-7; + 5.869289625690482e-8 4.232301043195289e-8 5.86e-8; + 4.453972209837739e-7 8.396249934481249e-8 4.45e-7; + 0.7707735136764122 0.49175061259680825 0.0] rtol = 1e-07 +end + +@testset "GLM: Binomial with CauchitLink link - AnalyticWeights" begin + model = glm(@formula(y ~ 1 + x1 + x2), df, Binomial(), CauchitLink(), wts=aweights(df.w), rtol=1e-08, atol=1e-08) + @test deviance(model) ≈ 39.627559015619845 rtol = 1e-07 + @test loglikelihood(model) ≈ -19.813779507809922 rtol = 1e-07 + + + @test aic(model) ≈ 45.627559015619845 rtol = 1e-07 + @test bic(model) ≈ 49.28418649022444 rtol = 1e-07 + @test GLM.momentmatrix(model) ≈ [ 1.003054020887253 0.1815783979426737 0.0; + 0.4264622162277366 0.15672057689370572 0.0; + -0.41221991029044563 -0.27579920646405165 -0.0; + 0.39009720364187195 0.016669074233287458 0.0; + 0.0 0.0 0.0; + 0.311923278855423 0.15074944190941555 0.0; + -0.37849361968132017 -0.28915918208960645 -0.0; + 0.08167834727345773 0.07551025135974303 0.0; + 0.1919243490466221 0.0988496997230555 0.0; + 0.10090666946769812 0.049953010957329104 0.0; + 0.0 0.0 0.0; + 0.0 0.0 0.0; + 0.0 0.0 0.0; + 0.0 0.0 0.0; + 0.0 0.0 0.0; + 0.0 0.0 0.0; + 0.4897032828746528 0.2482186602895976 0.0; + 0.28232074585439737 0.18238593576314036 0.0; + -3.7015013060867705 -0.985979478109429 -0.0; + 0.9986020018483154 0.25278737010890295 0.0; + 0.0 0.0 0.0; + 0.0 0.0 0.0; + 0.0 0.0 0.0; + 0.0 4.7109001281144596e-17 0.0; + 0.21554272008110664 0.1375154474822352 0.0] rtol = 1e-07 +end + +@testset "GLM: Binomial with CloglogLink link - AnalyticWeights" begin + model = glm(@formula(y ~ 1 + x1 + x2), df, Binomial(), CloglogLink(), wts=aweights(df.w), rtol=5e-10, atol=1e-10) + @test deviance(model) ≈ 39.61484762863061 rtol = 1e-07 + @test loglikelihood(model) ≈ -19.807423814315307 rtol = 1e-07 + # @test coef(model) ≈ [0.12095167614339054, 0.8666201161364425, 2.5534670172943965] rtol=1e-07 + # @test stderror(model) ≈ [0.46442064138194333, 0.9661962332997427, 116.7042677626327] rtol=1e-07 + @test aic(model) ≈ 45.61484762863061 rtol = 1e-07 + @test bic(model) ≈ 49.27147510323522 rtol = 1e-07 + @test GLM.momentmatrix(model) ≈ [ 1.9242952153533148 0.3483465846271526 0.0; + 1.2514530854268051 0.45989642702311906 0.0; + -2.0153062620933504 -1.348357645985017 -0.0; + 0.5261952160086218 0.02248461930760421 0.0; + 1.5917320997016487e-7 6.971642718424943e-8 1.5e-7; + 1.1286597324859617 0.5454701085543264 0.0; + -2.188084897309478 -1.6716393787069568 -0.0; + 0.44263603677181956 0.4092095336554625 0.0; + 0.7298024393361812 0.3758811862272388 0.0; + 0.37203439025955143 0.1841725435114846 0.0; + 1.79124103038821e-9 1.2562689715087133e-9 0.0; + 1.7546756596715808e-9 1.216989204144432e-9 0.0; + 5.7157964501561416e-12 5.00859694540867e-12 0.0; + 3.1204806380028555e-12 2.9464180717205265e-12 0.0; + 6.677005273849182e-10 4.93572637661486e-10 6.7e-10; + 2.403535247525871e-8 1.3448853323683519e-8 2.4e-8; + 1.839078160139044 0.9321839020515984 0.0; + 1.2724386238625014 0.8220257013418844 0.0; + -8.529708800751662 -2.2720829026754306 -0.0; + 2.2835873542705203 0.5780701827470168 0.0; + 7.796454326518419e-7 2.7413731130574843e-7 7.79e-7; + 3.441301868580375e-8 1.8149050735676883e-8 3.44e-8; + 1.181434863206281e-9 8.519238822580661e-10 1.18e-9; + 3.115862778487023e-6 5.873759740112369e-7 3.11e-6; + 0.9629196988432743 0.6143391585021523 0.0] rtol = 1e-05 +end + +@testset "GLM: Gamma with InverseLink link - AnalyticWeights" begin + model = glm(@formula(lot1 ~ 1 + u), clotting, Gamma(), InverseLink(), wts=aweights(clotting.w), atol=1e-07, rtol=1e-08) + @test deviance(model) ≈ 0.03933389380881642 rtol = 1e-07 + @test loglikelihood(model) ≈ -43.359078787690514 rtol = 1e-07 + @test coef(model) ≈ [-0.017217012596343607, 0.015649040406186487] rtol = 1e-07 + @test stderror(model) ≈ [0.0009144223353860925, 0.0003450913537314497] rtol = 1e-04 + @test aic(model) ≈ 92.71815757538103 rtol = 1e-07 + @test bic(model) ≈ 93.30983130738969 rtol = 1e-07 + @test GLM.momentmatrix(model) ≈ [1900.1063511093867 3058.103199132267; + -1643.317155973023 -3783.877586404854; + -420.13783432322964 -1137.7543467296691; + -981.2887166533023 -2939.6782781526754; + 313.30087123532877 1065.5981029180723; + -186.60227446859759 -688.353296378139; + 324.34628373045786 1327.9854430687467; + 430.8197010892654 1887.863404915401; + 262.77277766267576 1210.113361381432] rtol = 1e-07 +end + +@testset "GLM: Gamma with IdentityLink link - AnalyticWeights" begin + model = glm(@formula(lot1 ~ 1 + u), clotting, Gamma(), IdentityLink(), wts=aweights(clotting.w), rtol=1e-16, atol=1e-16, minstepfac=0.00001) + @test deviance(model) ≈ 1.3435348802929383 rtol = 1e-07 + @test loglikelihood(model) ≈ -101.19916126647321 rtol = 1e-07 + @test coef(model) ≈ [86.45700434128152, -15.320695650698417] rtol = 1e-05 + @test stderror(model) ≈ [16.07962739541372, 3.766841480457265] rtol = 1e-05 + @test aic(model) ≈ 208.39832253294642 rtol = 1e-07 + @test bic(model) ≈ 208.9899962649551 rtol = 1e-07 + @test GLM.momentmatrix(model) ≈ [ 0.26061914480947884 0.4194503323625281; + 0.06148544891860896 0.14157547811603585; + -0.019061929106842457 -0.051620660951180786; + -0.1795782998461795 -0.5379685084791557; + -0.1764962075232437 -0.6002984389013568; + -0.2277661940139623 -0.8402020334398342; + -0.3204523427685144 -1.3120423070655995; + -0.054878647210950426 -0.2404796937532563; + 0.6561290267416002 3.0215858321118008] rtol = 1e-04 +end + +@testset "GLM: Gamma with LogLink link - AnalyticWeights" begin + model = glm(@formula(lot1 ~ 1 + u), clotting, Gamma(), LogLink(), wts=aweights(clotting.w), atol=1e-09, rtol=1e-09) + @test deviance(model) ≈ 0.41206342934199663 rtol = 1e-07 + @test loglikelihood(model) ≈ -81.79777246247532 rtol = 1e-07 + @test coef(model) ≈ [5.325107090308856, -0.5495682740033511] rtol = 1e-07 + @test stderror(model) ≈ [0.20287310816341905, 0.053062600599660774] rtol = 1e-07 + @test aic(model) ≈ 169.59554492495064 rtol = 1e-07 + @test bic(model) ≈ 170.18721865695932 rtol = 1e-07 + @test GLM.momentmatrix(model) ≈ [ 14.39716447431257 23.171342336508012; + 0.0374983950207553 0.0863432453859933; + -2.5490869750808054 -6.903055495494598; + -12.821435846444906 -38.40958915849704; + -8.713283462827741 -29.635596899449876; + -6.520303896525519 -24.05261507847203; + -4.123729229896082 -16.88396834850135; + 3.70269025008355 16.225287295813413; + 16.590486289982852 76.40201283367323] rtol = 1e-07 +end + +@testset "GLM: Gamma with InverseLink link - AnalyticWeights" begin + model = glm(@formula(lot1 ~ 1 + u), clotting, Gamma(), InverseLink(), wts=aweights(clotting.w), atol=1e-09, rtol=1e-09) + @test deviance(model) ≈ 0.03933389380881642 rtol = 1e-07 + @test loglikelihood(model) ≈ -43.359078787690514 rtol = 1e-07 + @test coef(model) ≈ [-0.017217012596343607, 0.015649040406186487] rtol = 1e-07 + @test stderror(model) ≈ [0.0009144223353860925, 0.0003450913537314497] rtol = 1e-07 + @test aic(model) ≈ 92.71815757538103 rtol = 1e-07 + @test bic(model) ≈ 93.30983130738969 rtol = 1e-07 + @test GLM.momentmatrix(model) ≈ [ 1900.1063511093867 3058.103199132267; + -1643.317155973023 -3783.877586404854; + -420.13783432322964 -1137.7543467296691; + -981.2887166533023 -2939.6782781526754; + 313.30087123532877 1065.5981029180723; + -186.60227446859759 -688.353296378139; + 324.34628373045786 1327.9854430687467; + 430.8197010892654 1887.863404915401; + 262.77277766267576 1210.113361381432] rtol = 1e-07 +end + +@testset "GLM: InverseGaussian with InverseSquareLink link - AnalyticWeights" begin + model = glm(@formula(lot1 ~ 1 + u), clotting, InverseGaussian(), InverseSquareLink(), wts=aweights(clotting.w), atol=1e-09, rtol=1e-09) + @test deviance(model) ≈ 0.021377370485120707 rtol = 1e-07 + @test loglikelihood(model) ≈ -86.82546665077861 rtol = 1e-07 + @test coef(model) ≈ [-0.0012633718975150973, 0.0008126490405747128] rtol = 1e-07 + @test stderror(model) ≈ [0.00016779409928094252, 9.025235597677238e-5] rtol = 1e-07 + @test aic(model) ≈ 179.65093330155722 rtol = 1e-07 + @test bic(model) ≈ 180.2426070335659 rtol = 1e-07 + @test GLM.momentmatrix(model) ≈ [ 28815.030725087538 46376.00289690935; + -21039.070620903 -48444.250382140235; + -6195.618377983015 -16778.045594449453; + -15686.073415243622 -46991.276375382586; + -1716.0787284468345 -5836.722477919495; + -2086.203482054124 -7695.75316205041; + 3418.087237993986 13994.826896081435; + 6065.271775021221 26578.18246467872; + 8424.676595366931 38797.069483575455] rtol = 1e-06 +end + +@testset "GLM: with LogLink link - AnalyticWeights" begin + model = glm(@formula(Days ~ Eth + Sex + Age + Lrn), quine, NegativeBinomial(2), LogLink(), wts=aweights(quine.aweights), atol=1e-08, rtol=1e-08) + + + @test deviance(model) ≈ 624.7631999565588 rtol = 1e-07 + @test loglikelihood(model) ≈ -2004.5939464322778 rtol = 1e-07 + @test coef(model) ≈ [3.02411915515531, -0.4641576651688563, 0.0718560942992554, -0.47848540911607984, 0.09677889908013552, 0.3562972562034356, 0.3480161821981514] rtol = 1e-07 + @test stderror(model) ≈ [0.1950707397084349, 0.13200639191036218, 0.1373161597645507, 0.2088476016141468, 0.20252412726336674, 0.21060778935484836, 0.16126722793064027] rtol = 1e-07 + ## Tests below are broken because dof(model)==8 instead of 7 + @test_broken aic(model) ≈ 4023.1878928645556 rtol = 1e-07 + @test_broken bic(model) ≈ 4044.073139216514 rtol = 1e-07 + @test GLM.momentmatrix(model) ≈ [ + -3.866780529709063 -0.0 -3.866780529709063 -0.0 -0.0 -0.0 -3.866780529709063 + -4.370085797122667 -0.0 -4.370085797122667 -0.0 -0.0 -0.0 -4.370085797122667 + -3.956562495375882 -0.0 -3.956562495375882 -0.0 -0.0 -0.0 -3.956562495375882 + -4.102299119258251 -0.0 -4.102299119258251 -0.0 -0.0 -0.0 -0.0 + -4.102299119258251 -0.0 -4.102299119258251 -0.0 -0.0 -0.0 -0.0 + -2.8243330916399567 -0.0 -2.8243330916399567 -0.0 -0.0 -0.0 -0.0 + -0.7247974261272416 -0.0 -0.7247974261272416 -0.0 -0.0 -0.0 -0.0 + -0.0382123316932152 -0.0 -0.0382123316932152 -0.0 -0.0 -0.0 -0.0 + -3.813241073891047 -0.0 -3.813241073891047 -3.813241073891047 -0.0 -0.0 -3.813241073891047 + -3.813241073891047 -0.0 -3.813241073891047 -3.813241073891047 -0.0 -0.0 -3.813241073891047 + -1.593192001014045 -0.0 -1.593192001014045 -1.593192001014045 -0.0 -0.0 -1.593192001014045 + -2.7127578570401822 -0.0 -2.7127578570401822 -2.7127578570401822 -0.0 -0.0 -0.0 + 0.14484002662039835 0.0 0.14484002662039835 0.14484002662039835 0.0 0.0 0.0 + -4.754224412754331 -0.0 -4.754224412754331 -0.0 -4.754224412754331 -0.0 -4.754224412754331 + -0.6279394841753847 -0.0 -0.6279394841753847 -0.0 -0.6279394841753847 -0.0 -0.6279394841753847 + 5.160032033317412 0.0 5.160032033317412 0.0 5.160032033317412 0.0 5.160032033317412 + 6.363463014626628 0.0 6.363463014626628 0.0 6.363463014626628 0.0 6.363463014626628 + -2.991376095035898 -0.0 -2.991376095035898 -0.0 -2.991376095035898 -0.0 -0.0 + -2.492994950052581 -0.0 -2.492994950052581 -0.0 -2.492994950052581 -0.0 -0.0 + -2.492994950052581 -0.0 -2.492994950052581 -0.0 -2.492994950052581 -0.0 -0.0 + -2.226530220466526 -0.0 -2.226530220466526 -0.0 -2.226530220466526 -0.0 -0.0 + 5.713017320814697 0.0 5.713017320814697 0.0 5.713017320814697 0.0 0.0 + 6.908456992944485 0.0 6.908456992944485 0.0 6.908456992944485 0.0 0.0 + 8.12839634400043 0.0 8.12839634400043 0.0 8.12839634400043 0.0 0.0 + -4.628254089687799 -0.0 -4.628254089687799 -0.0 -0.0 -4.628254089687799 -0.0 + -2.183958840253964 -0.0 -2.183958840253964 -0.0 -0.0 -2.183958840253964 -0.0 + -2.183958840253964 -0.0 -2.183958840253964 -0.0 -0.0 -2.183958840253964 -0.0 + -0.9503472567532946 -0.0 -0.9503472567532946 -0.0 -0.0 -0.9503472567532946 -0.0 + 0.6731546773300909 0.0 0.6731546773300909 0.0 0.0 0.6731546773300909 0.0 + 1.2423198758199778 0.0 1.2423198758199778 0.0 0.0 1.2423198758199778 0.0 + 1.8236065476231822 0.0 1.8236065476231822 0.0 0.0 1.8236065476231822 0.0 + -4.171836641319677 -0.0 -0.0 -0.0 -0.0 -0.0 -4.171836641319677 + -3.9882995353410657 -0.0 -0.0 -0.0 -0.0 -0.0 -0.0 + -3.0399730926465205 -0.0 -0.0 -0.0 -0.0 -0.0 -0.0 + 1.309672612863431 0.0 0.0 0.0 0.0 0.0 0.0 + 10.661189363296968 0.0 0.0 0.0 0.0 0.0 0.0 + -3.7634043246260425 -0.0 -0.0 -3.7634043246260425 -0.0 -0.0 -3.7634043246260425 + -3.6605640772207546 -0.0 -0.0 -3.6605640772207546 -0.0 -0.0 -3.6605640772207546 + -3.6605640772207546 -0.0 -0.0 -3.6605640772207546 -0.0 -0.0 -3.6605640772207546 + -3.0720683496525485 -0.0 -0.0 -3.0720683496525485 -0.0 -0.0 -3.0720683496525485 + -1.885334027349047 -0.0 -0.0 -1.885334027349047 -0.0 -0.0 -1.885334027349047 + 2.106807550276347 0.0 0.0 2.106807550276347 0.0 0.0 2.106807550276347 + 3.0150038937286183 0.0 0.0 3.0150038937286183 0.0 0.0 3.0150038937286183 + 6.387064937752826 0.0 0.0 6.387064937752826 0.0 0.0 6.387064937752826 + 17.72394862307137 0.0 0.0 17.72394862307137 0.0 0.0 17.72394862307137 + 18.296957173355864 0.0 0.0 18.296957173355864 0.0 0.0 18.296957173355864 + -3.0375213985118954 -0.0 -0.0 -3.0375213985118954 -0.0 -0.0 -0.0 + -3.0375213985118954 -0.0 -0.0 -3.0375213985118954 -0.0 -0.0 -0.0 + -0.8508688349707806 -0.0 -0.0 -0.8508688349707806 -0.0 -0.0 -0.0 + 2.2977798382338515 0.0 0.0 2.2977798382338515 0.0 0.0 0.0 + 3.4686807301080997 0.0 0.0 3.4686807301080997 0.0 0.0 0.0 + -4.658715933989554 -0.0 -0.0 -0.0 -4.658715933989554 -0.0 -4.658715933989554 + -4.187227633826471 -0.0 -0.0 -0.0 -4.187227633826471 -0.0 -4.187227633826471 + -4.04126740785402 -0.0 -0.0 -0.0 -4.04126740785402 -0.0 -4.04126740785402 + -2.940568463040927 -0.0 -0.0 -0.0 -2.940568463040927 -0.0 -2.940568463040927 + 4.342318636532548 0.0 0.0 0.0 4.342318636532548 0.0 4.342318636532548 + 4.653011109293142 0.0 0.0 0.0 4.653011109293142 0.0 4.653011109293142 + 8.523536317826032 0.0 0.0 0.0 8.523536317826032 0.0 8.523536317826032 + 15.787943104351504 0.0 0.0 0.0 15.787943104351504 0.0 15.787943104351504 + -3.6818016272511183 -0.0 -0.0 -0.0 -3.6818016272511183 -0.0 -0.0 + -2.057196136670586 -0.0 -0.0 -0.0 -0.0 -2.057196136670586 -0.0 + -3.834339745304657 -0.0 -0.0 -0.0 -0.0 -3.834339745304657 -0.0 + -4.1780090350069425 -0.0 -0.0 -0.0 -0.0 -4.1780090350069425 -0.0 + -4.491340364181187 -0.0 -0.0 -0.0 -0.0 -4.491340364181187 -0.0 + -4.3190736545666875 -0.0 -0.0 -0.0 -0.0 -4.3190736545666875 -0.0 + -3.731819061288569 -0.0 -0.0 -0.0 -0.0 -3.731819061288569 -0.0 + -2.238272513055515 -0.0 -0.0 -0.0 -0.0 -2.238272513055515 -0.0 + 1.9859737921268132 0.0 0.0 0.0 0.0 1.9859737921268132 0.0 + 3.2559592797891495 0.0 0.0 0.0 0.0 3.2559592797891495 0.0 + -3.8426774654770597 -3.8426774654770597 -3.8426774654770597 -0.0 -0.0 -0.0 -3.8426774654770597 + -0.9876822943882244 -0.9876822943882244 -0.9876822943882244 -0.0 -0.0 -0.0 -0.9876822943882244 + 23.20842027925341 23.20842027925341 23.20842027925341 0.0 0.0 0.0 23.20842027925341 + -1.920845416870046 -1.920845416870046 -1.920845416870046 -0.0 -0.0 -0.0 -0.0 + -1.920845416870046 -1.920845416870046 -1.920845416870046 -0.0 -0.0 -0.0 -0.0 + -3.2888901738202923 -3.2888901738202923 -3.2888901738202923 -0.0 -0.0 -0.0 -0.0 + -2.758113321833414 -2.758113321833414 -2.758113321833414 -0.0 -0.0 -0.0 -0.0 + -1.306843142455193 -1.306843142455193 -1.306843142455193 -0.0 -0.0 -0.0 -0.0 + -0.8751747276035264 -0.8751747276035264 -0.8751747276035264 -0.0 -0.0 -0.0 -0.0 + -1.8877508012966644 -1.8877508012966644 -1.8877508012966644 -1.8877508012966644 -0.0 -0.0 -1.8877508012966644 + -1.8877508012966644 -1.8877508012966644 -1.8877508012966644 -1.8877508012966644 -0.0 -0.0 -1.8877508012966644 + -2.9308943363443847 -2.9308943363443847 -2.9308943363443847 -2.9308943363443847 -0.0 -0.0 -2.9308943363443847 + -2.9308943363443847 -2.9308943363443847 -2.9308943363443847 -2.9308943363443847 -0.0 -0.0 -2.9308943363443847 + -2.9308943363443847 -2.9308943363443847 -2.9308943363443847 -2.9308943363443847 -0.0 -0.0 -2.9308943363443847 + -0.6051770620747217 -0.6051770620747217 -0.6051770620747217 -0.6051770620747217 -0.0 -0.0 -0.6051770620747217 + 2.697606708942873 2.697606708942873 2.697606708942873 2.697606708942873 0.0 0.0 2.697606708942873 + -2.628558928820721 -2.628558928820721 -2.628558928820721 -2.628558928820721 -0.0 -0.0 -0.0 + -2.3542975772061085 -2.3542975772061085 -2.3542975772061085 -2.3542975772061085 -0.0 -0.0 -0.0 + 0.11268811798135936 0.11268811798135936 0.11268811798135936 0.0 0.11268811798135936 0.0 0.11268811798135936 + 3.1826005245112854 3.1826005245112854 3.1826005245112854 0.0 3.1826005245112854 0.0 3.1826005245112854 + 5.692953263520725 5.692953263520725 5.692953263520725 0.0 5.692953263520725 0.0 5.692953263520725 + -2.7839804243079254 -2.7839804243079254 -2.7839804243079254 -0.0 -2.7839804243079254 -0.0 -0.0 + -1.9433894208611948 -1.9433894208611948 -1.9433894208611948 -0.0 -1.9433894208611948 -0.0 -0.0 + -2.962526696741388 -2.962526696741388 -2.962526696741388 -0.0 -2.962526696741388 -0.0 -0.0 + -3.4432739212266052 -3.4432739212266052 -3.4432739212266052 -0.0 -3.4432739212266052 -0.0 -0.0 + -3.0516553688541084 -3.0516553688541084 -3.0516553688541084 -0.0 -3.0516553688541084 -0.0 -0.0 + 0.3128048727055356 0.3128048727055356 0.3128048727055356 0.0 0.3128048727055356 0.0 0.0 + 5.983398649554576 5.983398649554576 5.983398649554576 0.0 5.983398649554576 0.0 0.0 + -1.9961184031161041 -1.9961184031161041 -1.9961184031161041 -0.0 -0.0 -1.9961184031161041 -0.0 + 4.212201806010905 4.212201806010905 4.212201806010905 0.0 0.0 4.212201806010905 0.0 + -3.152192412974143 -3.152192412974143 -3.152192412974143 -0.0 -0.0 -3.152192412974143 -0.0 + -2.03792823060008 -2.03792823060008 -2.03792823060008 -0.0 -0.0 -2.03792823060008 -0.0 + 2.9007973162738843 2.9007973162738843 2.9007973162738843 0.0 0.0 2.9007973162738843 0.0 + 9.364366020386104 9.364366020386104 9.364366020386104 0.0 0.0 9.364366020386104 0.0 + 24.059031354439128 24.059031354439128 24.059031354439128 0.0 0.0 24.059031354439128 0.0 + 2.864621620127876 2.864621620127876 0.0 0.0 0.0 0.0 2.864621620127876 + -1.374372490365048 -1.374372490365048 -0.0 -0.0 -0.0 -0.0 -0.0 + -0.9287032240778311 -0.9287032240778311 -0.0 -0.0 -0.0 -0.0 -0.0 + 3.919550403175515 3.919550403175515 0.0 0.0 0.0 0.0 0.0 + 12.426707944681816 12.426707944681816 0.0 0.0 0.0 0.0 0.0 + -2.750339462985501 -2.750339462985501 -0.0 -2.750339462985501 -0.0 -0.0 -2.750339462985501 + -2.0720837572297617 -2.0720837572297617 -0.0 -2.0720837572297617 -0.0 -0.0 -2.0720837572297617 + -1.8681224147832116 -1.8681224147832116 -0.0 -1.8681224147832116 -0.0 -0.0 -1.8681224147832116 + -2.778411659017331 -2.778411659017331 -0.0 -2.778411659017331 -0.0 -0.0 -2.778411659017331 + -2.750339462985501 -2.750339462985501 -0.0 -2.750339462985501 -0.0 -0.0 -2.750339462985501 + -2.750339462985501 -2.750339462985501 -0.0 -2.750339462985501 -0.0 -0.0 -2.750339462985501 + -2.750339462985501 -2.750339462985501 -0.0 -2.750339462985501 -0.0 -0.0 -2.750339462985501 + -2.750339462985501 -2.750339462985501 -0.0 -2.750339462985501 -0.0 -0.0 -2.750339462985501 + -2.0720837572297617 -2.0720837572297617 -0.0 -2.0720837572297617 -0.0 -0.0 -2.0720837572297617 + -0.18952790670731487 -0.18952790670731487 -0.0 -0.18952790670731487 -0.0 -0.0 -0.18952790670731487 + 2.1145280030507307 2.1145280030507307 0.0 2.1145280030507307 0.0 0.0 2.1145280030507307 + -1.7407825357737137 -1.7407825357737137 -0.0 -1.7407825357737137 -0.0 -0.0 -0.0 + 4.548120970699322 4.548120970699322 0.0 4.548120970699322 0.0 0.0 0.0 + -1.2257166987183963 -1.2257166987183963 -0.0 -1.2257166987183963 -0.0 -0.0 -0.0 + -1.2257166987183963 -1.2257166987183963 -0.0 -1.2257166987183963 -0.0 -0.0 -0.0 + -0.6449075179371568 -0.6449075179371568 -0.0 -0.6449075179371568 -0.0 -0.0 -0.0 + 17.819813171012125 17.819813171012125 0.0 17.819813171012125 0.0 0.0 0.0 + -1.999110422648601 -1.999110422648601 -0.0 -0.0 -1.999110422648601 -0.0 -1.999110422648601 + -3.9564518053768536 -3.9564518053768536 -0.0 -0.0 -3.9564518053768536 -0.0 -3.9564518053768536 + -2.1216196203872557 -2.1216196203872557 -0.0 -0.0 -2.1216196203872557 -0.0 -2.1216196203872557 + -3.601990642806918 -3.601990642806918 -0.0 -0.0 -3.601990642806918 -0.0 -3.601990642806918 + -3.601990642806918 -3.601990642806918 -0.0 -0.0 -3.601990642806918 -0.0 -3.601990642806918 + -3.8495441274063715 -3.8495441274063715 -0.0 -0.0 -3.8495441274063715 -0.0 -3.8495441274063715 + -3.6199500530041027 -3.6199500530041027 -0.0 -0.0 -3.6199500530041027 -0.0 -3.6199500530041027 + -3.209822061567088 -3.209822061567088 -0.0 -0.0 -3.209822061567088 -0.0 -3.209822061567088 + -2.702521155801149 -2.702521155801149 -0.0 -0.0 -2.702521155801149 -0.0 -2.702521155801149 + -2.921923505820458 -2.921923505820458 -0.0 -0.0 -2.921923505820458 -0.0 -0.0 + -3.058405902935942 -3.058405902935942 -0.0 -0.0 -0.0 -3.058405902935942 -0.0 + -3.1473667781351766 -3.1473667781351766 -0.0 -0.0 -0.0 -3.1473667781351766 -0.0 + 1.4593378269316923 1.4593378269316923 0.0 0.0 0.0 1.4593378269316923 0.0 + -3.7560337640183694 -3.7560337640183694 -0.0 -0.0 -0.0 -3.7560337640183694 -0.0 + -3.7560337640183694 -3.7560337640183694 -0.0 -0.0 -0.0 -3.7560337640183694 -0.0 + -3.8041614268127484 -3.8041614268127484 -0.0 -0.0 -0.0 -3.8041614268127484 -0.0 + -1.3131162740760067 -1.3131162740760067 -0.0 -0.0 -0.0 -1.3131162740760067 -0.0 + -0.18645252170591944 -0.18645252170591944 -0.0 -0.0 -0.0 -0.18645252170591944 -0.0 + 1.4593378269316923 1.4593378269316923 0.0 0.0 0.0 1.4593378269316923 0.0 + 8.572921389223637 8.572921389223637 0.0 0.0 0.0 8.572921389223637 0.0 + ] rtol = 1e-04 +end + +@testset "GLM: with LogLink link - AnalyticWeights" begin + model = glm(@formula(Days ~ Eth + Sex + Age + Lrn), quine, NegativeBinomial(2), LogLink(), wts=aweights(quine.aweights), rtol=1e-08, atol=1e-08) + @test deviance(model) ≈ 624.7631999565588 rtol = 1e-07 + @test loglikelihood(model) ≈ -2004.5939464322778 rtol = 1e-07 + @test coef(model) ≈ [3.02411915515531, -0.4641576651688563, 0.0718560942992554, -0.47848540911607984, 0.09677889908013552, 0.3562972562034356, 0.3480161821981514] rtol = 1e-07 + @test stderror(model) ≈ [0.1950707397084349, 0.13200639191036218, 0.1373161597645507, 0.2088476016141468, 0.20252412726336674, 0.21060778935484836, 0.16126722793064027] rtol = 1e-07 + @test_broken aic(model) ≈ 4023.1878928645556 rtol = 1e-07 + @test_broken bic(model) ≈ 4044.073139216514 rtol = 1e-07 + @test GLM.momentmatrix(model) ≈ [ + -3.866780529709063 -0.0 -3.866780529709063 -0.0 -0.0 -0.0 -3.866780529709063 + -4.370085797122667 -0.0 -4.370085797122667 -0.0 -0.0 -0.0 -4.370085797122667 + -3.956562495375882 -0.0 -3.956562495375882 -0.0 -0.0 -0.0 -3.956562495375882 + -4.102299119258251 -0.0 -4.102299119258251 -0.0 -0.0 -0.0 -0.0 + -4.102299119258251 -0.0 -4.102299119258251 -0.0 -0.0 -0.0 -0.0 + -2.8243330916399567 -0.0 -2.8243330916399567 -0.0 -0.0 -0.0 -0.0 + -0.7247974261272416 -0.0 -0.7247974261272416 -0.0 -0.0 -0.0 -0.0 + -0.0382123316932152 -0.0 -0.0382123316932152 -0.0 -0.0 -0.0 -0.0 + -3.813241073891047 -0.0 -3.813241073891047 -3.813241073891047 -0.0 -0.0 -3.813241073891047 + -3.813241073891047 -0.0 -3.813241073891047 -3.813241073891047 -0.0 -0.0 -3.813241073891047 + -1.593192001014045 -0.0 -1.593192001014045 -1.593192001014045 -0.0 -0.0 -1.593192001014045 + -2.7127578570401822 -0.0 -2.7127578570401822 -2.7127578570401822 -0.0 -0.0 -0.0 + 0.14484002662039835 0.0 0.14484002662039835 0.14484002662039835 0.0 0.0 0.0 + -4.754224412754331 -0.0 -4.754224412754331 -0.0 -4.754224412754331 -0.0 -4.754224412754331 + -0.6279394841753847 -0.0 -0.6279394841753847 -0.0 -0.6279394841753847 -0.0 -0.6279394841753847 + 5.160032033317412 0.0 5.160032033317412 0.0 5.160032033317412 0.0 5.160032033317412 + 6.363463014626628 0.0 6.363463014626628 0.0 6.363463014626628 0.0 6.363463014626628 + -2.991376095035898 -0.0 -2.991376095035898 -0.0 -2.991376095035898 -0.0 -0.0 + -2.492994950052581 -0.0 -2.492994950052581 -0.0 -2.492994950052581 -0.0 -0.0 + -2.492994950052581 -0.0 -2.492994950052581 -0.0 -2.492994950052581 -0.0 -0.0 + -2.226530220466526 -0.0 -2.226530220466526 -0.0 -2.226530220466526 -0.0 -0.0 + 5.713017320814697 0.0 5.713017320814697 0.0 5.713017320814697 0.0 0.0 + 6.908456992944485 0.0 6.908456992944485 0.0 6.908456992944485 0.0 0.0 + 8.12839634400043 0.0 8.12839634400043 0.0 8.12839634400043 0.0 0.0 + -4.628254089687799 -0.0 -4.628254089687799 -0.0 -0.0 -4.628254089687799 -0.0 + -2.183958840253964 -0.0 -2.183958840253964 -0.0 -0.0 -2.183958840253964 -0.0 + -2.183958840253964 -0.0 -2.183958840253964 -0.0 -0.0 -2.183958840253964 -0.0 + -0.9503472567532946 -0.0 -0.9503472567532946 -0.0 -0.0 -0.9503472567532946 -0.0 + 0.6731546773300909 0.0 0.6731546773300909 0.0 0.0 0.6731546773300909 0.0 + 1.2423198758199778 0.0 1.2423198758199778 0.0 0.0 1.2423198758199778 0.0 + 1.8236065476231822 0.0 1.8236065476231822 0.0 0.0 1.8236065476231822 0.0 + -4.171836641319677 -0.0 -0.0 -0.0 -0.0 -0.0 -4.171836641319677 + -3.9882995353410657 -0.0 -0.0 -0.0 -0.0 -0.0 -0.0 + -3.0399730926465205 -0.0 -0.0 -0.0 -0.0 -0.0 -0.0 + 1.309672612863431 0.0 0.0 0.0 0.0 0.0 0.0 + 10.661189363296968 0.0 0.0 0.0 0.0 0.0 0.0 + -3.7634043246260425 -0.0 -0.0 -3.7634043246260425 -0.0 -0.0 -3.7634043246260425 + -3.6605640772207546 -0.0 -0.0 -3.6605640772207546 -0.0 -0.0 -3.6605640772207546 + -3.6605640772207546 -0.0 -0.0 -3.6605640772207546 -0.0 -0.0 -3.6605640772207546 + -3.0720683496525485 -0.0 -0.0 -3.0720683496525485 -0.0 -0.0 -3.0720683496525485 + -1.885334027349047 -0.0 -0.0 -1.885334027349047 -0.0 -0.0 -1.885334027349047 + 2.106807550276347 0.0 0.0 2.106807550276347 0.0 0.0 2.106807550276347 + 3.0150038937286183 0.0 0.0 3.0150038937286183 0.0 0.0 3.0150038937286183 + 6.387064937752826 0.0 0.0 6.387064937752826 0.0 0.0 6.387064937752826 + 17.72394862307137 0.0 0.0 17.72394862307137 0.0 0.0 17.72394862307137 + 18.296957173355864 0.0 0.0 18.296957173355864 0.0 0.0 18.296957173355864 + -3.0375213985118954 -0.0 -0.0 -3.0375213985118954 -0.0 -0.0 -0.0 + -3.0375213985118954 -0.0 -0.0 -3.0375213985118954 -0.0 -0.0 -0.0 + -0.8508688349707806 -0.0 -0.0 -0.8508688349707806 -0.0 -0.0 -0.0 + 2.2977798382338515 0.0 0.0 2.2977798382338515 0.0 0.0 0.0 + 3.4686807301080997 0.0 0.0 3.4686807301080997 0.0 0.0 0.0 + -4.658715933989554 -0.0 -0.0 -0.0 -4.658715933989554 -0.0 -4.658715933989554 + -4.187227633826471 -0.0 -0.0 -0.0 -4.187227633826471 -0.0 -4.187227633826471 + -4.04126740785402 -0.0 -0.0 -0.0 -4.04126740785402 -0.0 -4.04126740785402 + -2.940568463040927 -0.0 -0.0 -0.0 -2.940568463040927 -0.0 -2.940568463040927 + 4.342318636532548 0.0 0.0 0.0 4.342318636532548 0.0 4.342318636532548 + 4.653011109293142 0.0 0.0 0.0 4.653011109293142 0.0 4.653011109293142 + 8.523536317826032 0.0 0.0 0.0 8.523536317826032 0.0 8.523536317826032 + 15.787943104351504 0.0 0.0 0.0 15.787943104351504 0.0 15.787943104351504 + -3.6818016272511183 -0.0 -0.0 -0.0 -3.6818016272511183 -0.0 -0.0 + -2.057196136670586 -0.0 -0.0 -0.0 -0.0 -2.057196136670586 -0.0 + -3.834339745304657 -0.0 -0.0 -0.0 -0.0 -3.834339745304657 -0.0 + -4.1780090350069425 -0.0 -0.0 -0.0 -0.0 -4.1780090350069425 -0.0 + -4.491340364181187 -0.0 -0.0 -0.0 -0.0 -4.491340364181187 -0.0 + -4.3190736545666875 -0.0 -0.0 -0.0 -0.0 -4.3190736545666875 -0.0 + -3.731819061288569 -0.0 -0.0 -0.0 -0.0 -3.731819061288569 -0.0 + -2.238272513055515 -0.0 -0.0 -0.0 -0.0 -2.238272513055515 -0.0 + 1.9859737921268132 0.0 0.0 0.0 0.0 1.9859737921268132 0.0 + 3.2559592797891495 0.0 0.0 0.0 0.0 3.2559592797891495 0.0 + -3.8426774654770597 -3.8426774654770597 -3.8426774654770597 -0.0 -0.0 -0.0 -3.8426774654770597 + -0.9876822943882244 -0.9876822943882244 -0.9876822943882244 -0.0 -0.0 -0.0 -0.9876822943882244 + 23.20842027925341 23.20842027925341 23.20842027925341 0.0 0.0 0.0 23.20842027925341 + -1.920845416870046 -1.920845416870046 -1.920845416870046 -0.0 -0.0 -0.0 -0.0 + -1.920845416870046 -1.920845416870046 -1.920845416870046 -0.0 -0.0 -0.0 -0.0 + -3.2888901738202923 -3.2888901738202923 -3.2888901738202923 -0.0 -0.0 -0.0 -0.0 + -2.758113321833414 -2.758113321833414 -2.758113321833414 -0.0 -0.0 -0.0 -0.0 + -1.306843142455193 -1.306843142455193 -1.306843142455193 -0.0 -0.0 -0.0 -0.0 + -0.8751747276035264 -0.8751747276035264 -0.8751747276035264 -0.0 -0.0 -0.0 -0.0 + -1.8877508012966644 -1.8877508012966644 -1.8877508012966644 -1.8877508012966644 -0.0 -0.0 -1.8877508012966644 + -1.8877508012966644 -1.8877508012966644 -1.8877508012966644 -1.8877508012966644 -0.0 -0.0 -1.8877508012966644 + -2.9308943363443847 -2.9308943363443847 -2.9308943363443847 -2.9308943363443847 -0.0 -0.0 -2.9308943363443847 + -2.9308943363443847 -2.9308943363443847 -2.9308943363443847 -2.9308943363443847 -0.0 -0.0 -2.9308943363443847 + -2.9308943363443847 -2.9308943363443847 -2.9308943363443847 -2.9308943363443847 -0.0 -0.0 -2.9308943363443847 + -0.6051770620747217 -0.6051770620747217 -0.6051770620747217 -0.6051770620747217 -0.0 -0.0 -0.6051770620747217 + 2.697606708942873 2.697606708942873 2.697606708942873 2.697606708942873 0.0 0.0 2.697606708942873 + -2.628558928820721 -2.628558928820721 -2.628558928820721 -2.628558928820721 -0.0 -0.0 -0.0 + -2.3542975772061085 -2.3542975772061085 -2.3542975772061085 -2.3542975772061085 -0.0 -0.0 -0.0 + 0.11268811798135936 0.11268811798135936 0.11268811798135936 0.0 0.11268811798135936 0.0 0.11268811798135936 + 3.1826005245112854 3.1826005245112854 3.1826005245112854 0.0 3.1826005245112854 0.0 3.1826005245112854 + 5.692953263520725 5.692953263520725 5.692953263520725 0.0 5.692953263520725 0.0 5.692953263520725 + -2.7839804243079254 -2.7839804243079254 -2.7839804243079254 -0.0 -2.7839804243079254 -0.0 -0.0 + -1.9433894208611948 -1.9433894208611948 -1.9433894208611948 -0.0 -1.9433894208611948 -0.0 -0.0 + -2.962526696741388 -2.962526696741388 -2.962526696741388 -0.0 -2.962526696741388 -0.0 -0.0 + -3.4432739212266052 -3.4432739212266052 -3.4432739212266052 -0.0 -3.4432739212266052 -0.0 -0.0 + -3.0516553688541084 -3.0516553688541084 -3.0516553688541084 -0.0 -3.0516553688541084 -0.0 -0.0 + 0.3128048727055356 0.3128048727055356 0.3128048727055356 0.0 0.3128048727055356 0.0 0.0 + 5.983398649554576 5.983398649554576 5.983398649554576 0.0 5.983398649554576 0.0 0.0 + -1.9961184031161041 -1.9961184031161041 -1.9961184031161041 -0.0 -0.0 -1.9961184031161041 -0.0 + 4.212201806010905 4.212201806010905 4.212201806010905 0.0 0.0 4.212201806010905 0.0 + -3.152192412974143 -3.152192412974143 -3.152192412974143 -0.0 -0.0 -3.152192412974143 -0.0 + -2.03792823060008 -2.03792823060008 -2.03792823060008 -0.0 -0.0 -2.03792823060008 -0.0 + 2.9007973162738843 2.9007973162738843 2.9007973162738843 0.0 0.0 2.9007973162738843 0.0 + 9.364366020386104 9.364366020386104 9.364366020386104 0.0 0.0 9.364366020386104 0.0 + 24.059031354439128 24.059031354439128 24.059031354439128 0.0 0.0 24.059031354439128 0.0 + 2.864621620127876 2.864621620127876 0.0 0.0 0.0 0.0 2.864621620127876 + -1.374372490365048 -1.374372490365048 -0.0 -0.0 -0.0 -0.0 -0.0 + -0.9287032240778311 -0.9287032240778311 -0.0 -0.0 -0.0 -0.0 -0.0 + 3.919550403175515 3.919550403175515 0.0 0.0 0.0 0.0 0.0 + 12.426707944681816 12.426707944681816 0.0 0.0 0.0 0.0 0.0 + -2.750339462985501 -2.750339462985501 -0.0 -2.750339462985501 -0.0 -0.0 -2.750339462985501 + -2.0720837572297617 -2.0720837572297617 -0.0 -2.0720837572297617 -0.0 -0.0 -2.0720837572297617 + -1.8681224147832116 -1.8681224147832116 -0.0 -1.8681224147832116 -0.0 -0.0 -1.8681224147832116 + -2.778411659017331 -2.778411659017331 -0.0 -2.778411659017331 -0.0 -0.0 -2.778411659017331 + -2.750339462985501 -2.750339462985501 -0.0 -2.750339462985501 -0.0 -0.0 -2.750339462985501 + -2.750339462985501 -2.750339462985501 -0.0 -2.750339462985501 -0.0 -0.0 -2.750339462985501 + -2.750339462985501 -2.750339462985501 -0.0 -2.750339462985501 -0.0 -0.0 -2.750339462985501 + -2.750339462985501 -2.750339462985501 -0.0 -2.750339462985501 -0.0 -0.0 -2.750339462985501 + -2.0720837572297617 -2.0720837572297617 -0.0 -2.0720837572297617 -0.0 -0.0 -2.0720837572297617 + -0.18952790670731487 -0.18952790670731487 -0.0 -0.18952790670731487 -0.0 -0.0 -0.18952790670731487 + 2.1145280030507307 2.1145280030507307 0.0 2.1145280030507307 0.0 0.0 2.1145280030507307 + -1.7407825357737137 -1.7407825357737137 -0.0 -1.7407825357737137 -0.0 -0.0 -0.0 + 4.548120970699322 4.548120970699322 0.0 4.548120970699322 0.0 0.0 0.0 + -1.2257166987183963 -1.2257166987183963 -0.0 -1.2257166987183963 -0.0 -0.0 -0.0 + -1.2257166987183963 -1.2257166987183963 -0.0 -1.2257166987183963 -0.0 -0.0 -0.0 + -0.6449075179371568 -0.6449075179371568 -0.0 -0.6449075179371568 -0.0 -0.0 -0.0 + 17.819813171012125 17.819813171012125 0.0 17.819813171012125 0.0 0.0 0.0 + -1.999110422648601 -1.999110422648601 -0.0 -0.0 -1.999110422648601 -0.0 -1.999110422648601 + -3.9564518053768536 -3.9564518053768536 -0.0 -0.0 -3.9564518053768536 -0.0 -3.9564518053768536 + -2.1216196203872557 -2.1216196203872557 -0.0 -0.0 -2.1216196203872557 -0.0 -2.1216196203872557 + -3.601990642806918 -3.601990642806918 -0.0 -0.0 -3.601990642806918 -0.0 -3.601990642806918 + -3.601990642806918 -3.601990642806918 -0.0 -0.0 -3.601990642806918 -0.0 -3.601990642806918 + -3.8495441274063715 -3.8495441274063715 -0.0 -0.0 -3.8495441274063715 -0.0 -3.8495441274063715 + -3.6199500530041027 -3.6199500530041027 -0.0 -0.0 -3.6199500530041027 -0.0 -3.6199500530041027 + -3.209822061567088 -3.209822061567088 -0.0 -0.0 -3.209822061567088 -0.0 -3.209822061567088 + -2.702521155801149 -2.702521155801149 -0.0 -0.0 -2.702521155801149 -0.0 -2.702521155801149 + -2.921923505820458 -2.921923505820458 -0.0 -0.0 -2.921923505820458 -0.0 -0.0 + -3.058405902935942 -3.058405902935942 -0.0 -0.0 -0.0 -3.058405902935942 -0.0 + -3.1473667781351766 -3.1473667781351766 -0.0 -0.0 -0.0 -3.1473667781351766 -0.0 + 1.4593378269316923 1.4593378269316923 0.0 0.0 0.0 1.4593378269316923 0.0 + -3.7560337640183694 -3.7560337640183694 -0.0 -0.0 -0.0 -3.7560337640183694 -0.0 + -3.7560337640183694 -3.7560337640183694 -0.0 -0.0 -0.0 -3.7560337640183694 -0.0 + -3.8041614268127484 -3.8041614268127484 -0.0 -0.0 -0.0 -3.8041614268127484 -0.0 + -1.3131162740760067 -1.3131162740760067 -0.0 -0.0 -0.0 -1.3131162740760067 -0.0 + -0.18645252170591944 -0.18645252170591944 -0.0 -0.0 -0.0 -0.18645252170591944 -0.0 + 1.4593378269316923 1.4593378269316923 0.0 0.0 0.0 1.4593378269316923 0.0 + 8.572921389223637 8.572921389223637 0.0 0.0 0.0 8.572921389223637 0.0 + ] rtol = 1e-04 +end + +@testset "GLM: with SqrtLink link - AnalyticWeights" begin + model = glm(@formula(Days ~ Eth + Sex + Age + Lrn), quine, NegativeBinomial(2), SqrtLink(), wts=aweights(quine.aweights), rtol=1e-08, atol=1e-09) + @test deviance(model) ≈ 626.6464732988984 rtol = 1e-07 + @test loglikelihood(model) ≈ -2005.5355831034462 rtol = 1e-07 + @test coef(model) ≈ [4.733877229152363, -1.007977895471349, 0.02522392818548873, -0.9859743168046422, 0.2132095063819721, 0.7456070470961186, 0.5840284357554036] rtol = 1e-07 + @test stderror(model) ≈ [0.42307979153860564, 0.286636744566765, 0.29612422536777805, 0.42042723748229144, 0.45565954626859695, 0.4766324296069839, 0.3235019638755972] rtol = 1e-06 + @test_broken aic(model) ≈ 4025.0711662068925 rtol = 1e-07 + @test_broken bic(model) ≈ 4045.956412558851 rtol = 1e-07 + @test GLM.momentmatrix(model) ≈ [ + -1.4294351675636041 -0.0 -1.4294351675636041 -0.0 -0.0 -0.0 -1.4294351675636041 + -1.5410055711037194 -0.0 -1.5410055711037194 -0.0 -0.0 -0.0 -1.5410055711037194 + -1.3571249039047424 -0.0 -1.3571249039047424 -0.0 -0.0 -0.0 -1.3571249039047424 + -1.7394058711709879 -0.0 -1.7394058711709879 -0.0 -0.0 -0.0 -0.0 + -1.7394058711709879 -0.0 -1.7394058711709879 -0.0 -0.0 -0.0 -0.0 + -1.229734152157926 -0.0 -1.229734152157926 -0.0 -0.0 -0.0 -0.0 + -0.3742348640443611 -0.0 -0.3742348640443611 -0.0 -0.0 -0.0 -0.0 + -0.09370480172054219 -0.0 -0.09370480172054219 -0.0 -0.0 -0.0 -0.0 + -1.7293809063089827 -0.0 -1.7293809063089827 -1.7293809063089827 -0.0 -0.0 -1.7293809063089827 + -1.7293809063089827 -0.0 -1.7293809063089827 -1.7293809063089827 -0.0 -0.0 -1.7293809063089827 + -0.6748210571645206 -0.0 -0.6748210571645206 -0.6748210571645206 -0.0 -0.0 -0.6748210571645206 + -1.5016227445218024 -0.0 -1.5016227445218024 -1.5016227445218024 -0.0 -0.0 -0.0 + -0.058778966482651636 -0.0 -0.058778966482651636 -0.058778966482651636 -0.0 -0.0 -0.0 + -1.6582836355486288 -0.0 -1.6582836355486288 -0.0 -1.6582836355486288 -0.0 -1.6582836355486288 + 0.11341508381030255 0.0 0.11341508381030255 0.0 0.11341508381030255 0.0 0.11341508381030255 + 2.4651888863431344 0.0 2.4651888863431344 0.0 2.4651888863431344 0.0 2.4651888863431344 + 2.9517152556309942 0.0 2.9517152556309942 0.0 2.9517152556309942 0.0 2.9517152556309942 + -1.2288386266845785 -0.0 -1.2288386266845785 -0.0 -1.2288386266845785 -0.0 -0.0 + -1.0325293533841053 -0.0 -1.0325293533841053 -0.0 -1.0325293533841053 -0.0 -0.0 + -1.0325293533841053 -0.0 -1.0325293533841053 -0.0 -1.0325293533841053 -0.0 -0.0 + -0.9274622643228648 -0.0 -0.9274622643228648 -0.0 -0.9274622643228648 -0.0 -0.0 + 2.212861910664014 0.0 2.212861910664014 0.0 2.212861910664014 0.0 0.0 + 2.6862849076558937 0.0 2.6862849076558937 0.0 2.6862849076558937 0.0 0.0 + 3.1694781034873523 0.0 3.1694781034873523 0.0 3.1694781034873523 0.0 0.0 + -1.6534192741665588 -0.0 -1.6534192741665588 -0.0 -0.0 -1.6534192741665588 -0.0 + -0.702446330017668 -0.0 -0.702446330017668 -0.0 -0.0 -0.702446330017668 -0.0 + -0.702446330017668 -0.0 -0.702446330017668 -0.0 -0.0 -0.702446330017668 -0.0 + -0.23123674216762394 -0.0 -0.23123674216762394 -0.0 -0.0 -0.23123674216762394 -0.0 + 0.3871584524726257 0.0 0.3871584524726257 0.0 0.0 0.3871584524726257 0.0 + 0.6036586921589513 0.0 0.6036586921589513 0.0 0.0 0.6036586921589513 0.0 + 0.8246522973739006 0.0 0.8246522973739006 0.0 0.0 0.8246522973739006 0.0 + -1.560441651521342 -0.0 -0.0 -0.0 -0.0 -0.0 -1.560441651521342 + -1.7419685003857353 -0.0 -0.0 -0.0 -0.0 -0.0 -0.0 + -1.4153955925789807 -0.0 -0.0 -0.0 -0.0 -0.0 -0.0 + 0.23770439864218734 0.0 0.0 0.0 0.0 0.0 0.0 + 3.853247675936175 0.0 0.0 0.0 0.0 0.0 0.0 + -1.7692672149493731 -0.0 -0.0 -1.7692672149493731 -0.0 -0.0 -1.7692672149493731 + -1.7282440188407218 -0.0 -0.0 -1.7282440188407218 -0.0 -0.0 -1.7282440188407218 + -1.7282440188407218 -0.0 -0.0 -1.7282440188407218 -0.0 -0.0 -1.7282440188407218 + -1.4769837741682987 -0.0 -0.0 -1.4769837741682987 -0.0 -0.0 -1.4769837741682987 + -0.9582774689727417 -0.0 -0.0 -0.9582774689727417 -0.0 -0.0 -0.9582774689727417 + 0.8052632685284861 0.0 0.0 0.8052632685284861 0.0 0.0 0.8052632685284861 + 1.2077994352773953 0.0 0.0 1.2077994352773953 0.0 0.0 1.2077994352773953 + 2.7042310768987665 0.0 0.0 2.7042310768987665 0.0 0.0 2.7042310768987665 + 7.744950633464035 0.0 0.0 7.744950633464035 0.0 0.0 7.744950633464035 + 7.999933021719232 0.0 0.0 7.999933021719232 0.0 0.0 7.999933021719232 + -1.7392669278461907 -0.0 -0.0 -1.7392669278461907 -0.0 -0.0 -0.0 + -1.7392669278461907 -0.0 -0.0 -1.7392669278461907 -0.0 -0.0 -0.0 + -0.7262212443523531 -0.0 -0.0 -0.7262212443523531 -0.0 -0.0 -0.0 + 0.7835691668207807 0.0 0.0 0.7835691668207807 0.0 0.0 0.0 + 1.3489349925379315 0.0 0.0 1.3489349925379315 0.0 0.0 0.0 + -1.6522100272913283 -0.0 -0.0 -0.0 -1.6522100272913283 -0.0 -1.6522100272913283 + -1.4590418232851277 -0.0 -0.0 -0.0 -1.4590418232851277 -0.0 -1.4590418232851277 + -1.4015111702500997 -0.0 -0.0 -0.0 -1.4015111702500997 -0.0 -1.4015111702500997 + -0.9738202253475602 -0.0 -0.0 -0.0 -0.9738202253475602 -0.0 -0.9738202253475602 + 1.8091899079230156 0.0 0.0 0.0 1.8091899079230156 0.0 1.8091899079230156 + 1.9274245415701026 0.0 0.0 0.0 1.9274245415701026 0.0 1.9274245415701026 + 3.399094699981504 0.0 0.0 0.0 3.399094699981504 0.0 3.399094699981504 + 6.157344170373497 0.0 0.0 0.0 6.157344170373497 0.0 6.157344170373497 + -1.5082203700488148 -0.0 -0.0 -0.0 -1.5082203700488148 -0.0 -0.0 + -0.7518968254083281 -0.0 -0.0 -0.0 -0.0 -0.7518968254083281 -0.0 + -1.403623374340758 -0.0 -0.0 -0.0 -0.0 -1.403623374340758 -0.0 + -1.5307566638052945 -0.0 -0.0 -0.0 -0.0 -1.5307566638052945 -0.0 + -1.6487615285777935 -0.0 -0.0 -0.0 -0.0 -1.6487615285777935 -0.0 + -1.5960112869101046 -0.0 -0.0 -0.0 -0.0 -1.5960112869101046 -0.0 + -1.3904968459197917 -0.0 -0.0 -0.0 -0.0 -1.3904968459197917 -0.0 + -0.8618818687491527 -0.0 -0.0 -0.0 -0.0 -0.8618818687491527 -0.0 + 0.6414580291618693 0.0 0.0 0.0 0.0 0.6414580291618693 0.0 + 1.0942097094869556 0.0 0.0 0.0 0.0 1.0942097094869556 0.0 + -1.7282583231217719 -1.7282583231217719 -1.7282583231217719 -0.0 -0.0 -0.0 -1.7282583231217719 + -0.31744768418403196 -0.31744768418403196 -0.31744768418403196 -0.0 -0.0 -0.0 -0.31744768418403196 + 11.375280355235768 11.375280355235768 11.375280355235768 0.0 0.0 0.0 11.375280355235768 + -1.0256901959548927 -1.0256901959548927 -1.0256901959548927 -0.0 -0.0 -0.0 -0.0 + -1.0256901959548927 -1.0256901959548927 -1.0256901959548927 -0.0 -0.0 -0.0 -0.0 + -1.7598032161223125 -1.7598032161223125 -1.7598032161223125 -0.0 -0.0 -0.0 -0.0 + -1.491030768800334 -1.491030768800334 -1.491030768800334 -0.0 -0.0 -0.0 -0.0 + -0.7301769140610584 -0.7301769140610584 -0.7301769140610584 -0.0 -0.0 -0.0 -0.0 + -0.5034045168716083 -0.5034045168716083 -0.5034045168716083 -0.0 -0.0 -0.0 -0.0 + -1.113506915630734 -1.113506915630734 -1.113506915630734 -1.113506915630734 -0.0 -0.0 -1.113506915630734 + -1.113506915630734 -1.113506915630734 -1.113506915630734 -1.113506915630734 -0.0 -0.0 -1.113506915630734 + -1.6237007081122545 -1.6237007081122545 -1.6237007081122545 -1.6237007081122545 -0.0 -0.0 -1.6237007081122545 + -1.6237007081122545 -1.6237007081122545 -1.6237007081122545 -1.6237007081122545 -0.0 -0.0 -1.6237007081122545 + -1.6237007081122545 -1.6237007081122545 -1.6237007081122545 -1.6237007081122545 -0.0 -0.0 -1.6237007081122545 + -0.07026189294137537 -0.07026189294137537 -0.07026189294137537 -0.07026189294137537 -0.0 -0.0 -0.07026189294137537 + 2.0844355685058127 2.0844355685058127 2.0844355685058127 2.0844355685058127 0.0 0.0 2.0844355685058127 + -1.7313903927438412 -1.7313903927438412 -1.7313903927438412 -1.7313903927438412 -0.0 -0.0 -0.0 + -1.480745232754872 -1.480745232754872 -1.480745232754872 -1.480745232754872 -0.0 -0.0 -0.0 + 0.21539031393949493 0.21539031393949493 0.21539031393949493 0.0 0.21539031393949493 0.0 0.21539031393949493 + 1.6360787089859707 1.6360787089859707 1.6360787089859707 0.0 1.6360787089859707 0.0 1.6360787089859707 + 2.7952193074887086 2.7952193074887086 2.7952193074887086 0.0 2.7952193074887086 0.0 2.7952193074887086 + -1.448364418208364 -1.448364418208364 -1.448364418208364 -0.0 -1.448364418208364 -0.0 -0.0 + -0.9833503482488964 -0.9833503482488964 -0.9833503482488964 -0.0 -0.9833503482488964 -0.0 -0.0 + -1.5017276161539084 -1.5017276161539084 -1.5017276161539084 -0.0 -1.5017276161539084 -0.0 -0.0 + -1.7640356839137032 -1.7640356839137032 -1.7640356839137032 -0.0 -1.7640356839137032 -0.0 -0.0 + -1.5776069676233444 -1.5776069676233444 -1.5776069676233444 -0.0 -1.5776069676233444 -0.0 -0.0 + 0.06361165131312438 0.06361165131312438 0.06361165131312438 0.0 0.06361165131312438 0.0 0.0 + 2.8475608847598153 2.8475608847598153 2.8475608847598153 0.0 2.8475608847598153 0.0 0.0 + -0.8892460264142052 -0.8892460264142052 -0.8892460264142052 -0.0 -0.0 -0.8892460264142052 -0.0 + 1.7743695974457907 1.7743695974457907 1.7743695974457907 0.0 0.0 1.7743695974457907 0.0 + -1.4305200814192562 -1.4305200814192562 -1.4305200814192562 -0.0 -0.0 -1.4305200814192562 -0.0 + -0.9478929479399423 -0.9478929479399423 -0.9478929479399423 -0.0 -0.0 -0.9478929479399423 -0.0 + 1.2024302930353608 1.2024302930353608 1.2024302930353608 0.0 0.0 1.2024302930353608 0.0 + 4.02280289664674 4.02280289664674 4.02280289664674 0.0 0.0 4.02280289664674 0.0 + 10.440933185941839 10.440933185941839 10.440933185941839 0.0 0.0 10.440933185941839 0.0 + 1.262517093518885 1.262517093518885 0.0 0.0 0.0 0.0 1.262517093518885 + -0.9176184029771589 -0.9176184029771589 -0.0 -0.0 -0.0 -0.0 -0.0 + -0.6982138187318754 -0.6982138187318754 -0.0 -0.0 -0.0 -0.0 -0.0 + 1.7133696015602422 1.7133696015602422 0.0 0.0 0.0 0.0 0.0 + 5.976953806399672 5.976953806399672 0.0 0.0 0.0 0.0 0.0 + -1.6123792319065735 -1.6123792319065735 -0.0 -1.6123792319065735 -0.0 -0.0 -1.6123792319065735 + -1.1866621929181271 -1.1866621929181271 -0.0 -1.1866621929181271 -0.0 -0.0 -1.1866621929181271 + -1.1194589330024307 -1.1194589330024307 -0.0 -1.1194589330024307 -0.0 -0.0 -1.1194589330024307 + -1.6605118926433484 -1.6605118926433484 -0.0 -1.6605118926433484 -0.0 -0.0 -1.6605118926433484 + -1.6123792319065735 -1.6123792319065735 -0.0 -1.6123792319065735 -0.0 -0.0 -1.6123792319065735 + -1.6123792319065735 -1.6123792319065735 -0.0 -1.6123792319065735 -0.0 -0.0 -1.6123792319065735 + -1.6123792319065735 -1.6123792319065735 -0.0 -1.6123792319065735 -0.0 -0.0 -1.6123792319065735 + -1.6123792319065735 -1.6123792319065735 -0.0 -1.6123792319065735 -0.0 -0.0 -1.6123792319065735 + -1.1866621929181271 -1.1866621929181271 -0.0 -1.1866621929181271 -0.0 -0.0 -1.1866621929181271 + -0.016084003453250676 -0.016084003453250676 -0.0 -0.016084003453250676 -0.0 -0.0 -0.016084003453250676 + 1.4107278812149031 1.4107278812149031 0.0 1.4107278812149031 0.0 0.0 1.4107278812149031 + -1.1128985115655265 -1.1128985115655265 -0.0 -1.1128985115655265 -0.0 -0.0 -0.0 + 3.7957001151581404 3.7957001151581404 0.0 3.7957001151581404 0.0 0.0 0.0 + -0.7046958095802869 -0.7046958095802869 -0.0 -0.7046958095802869 -0.0 -0.0 -0.0 + -0.7046958095802869 -0.7046958095802869 -0.0 -0.7046958095802869 -0.0 -0.0 -0.0 + -0.2475403067755282 -0.2475403067755282 -0.0 -0.2475403067755282 -0.0 -0.0 -0.0 + 14.054845699928913 14.054845699928913 0.0 14.054845699928913 0.0 0.0 0.0 + -0.8850373634971601 -0.8850373634971601 -0.0 -0.0 -0.8850373634971601 -0.0 -0.8850373634971601 + -1.7594068536637126 -1.7594068536637126 -0.0 -0.0 -1.7594068536637126 -0.0 -1.7594068536637126 + -0.9681259531090506 -0.9681259531090506 -0.0 -0.0 -0.9681259531090506 -0.0 -0.9681259531090506 + -1.5970364987524888 -1.5970364987524888 -0.0 -0.0 -1.5970364987524888 -0.0 -1.5970364987524888 + -1.5970364987524888 -1.5970364987524888 -0.0 -0.0 -1.5970364987524888 -0.0 -1.5970364987524888 + -1.7082890535667876 -1.7082890535667876 -0.0 -0.0 -1.7082890535667876 -0.0 -1.7082890535667876 + -1.6168827210404924 -1.6168827210404924 -0.0 -0.0 -1.6168827210404924 -0.0 -1.6168827210404924 + -1.4399676449006795 -1.4399676449006795 -0.0 -0.0 -1.4399676449006795 -0.0 -1.4399676449006795 + -1.2202487676722908 -1.2202487676722908 -0.0 -0.0 -1.2202487676722908 -0.0 -1.2202487676722908 + -1.5079358693315765 -1.5079358693315765 -0.0 -0.0 -1.5079358693315765 -0.0 -0.0 + -1.3842064467607202 -1.3842064467607202 -0.0 -0.0 -0.0 -1.3842064467607202 -0.0 + -1.5208922216041325 -1.5208922216041325 -0.0 -0.0 -0.0 -1.5208922216041325 -0.0 + 0.3453894161447818 0.3453894161447818 0.0 0.0 0.0 0.3453894161447818 0.0 + -1.717557999730545 -1.717557999730545 -0.0 -0.0 -0.0 -1.717557999730545 -0.0 + -1.717557999730545 -1.717557999730545 -0.0 -0.0 -0.0 -1.717557999730545 -0.0 + -1.76269912849327 -1.76269912849327 -0.0 -0.0 -0.0 -1.76269912849327 -0.0 + -0.7863622513628796 -0.7863622513628796 -0.0 -0.0 -0.0 -0.7863622513628796 -0.0 + -0.32795262618891574 -0.32795262618891574 -0.0 -0.0 -0.0 -0.32795262618891574 -0.0 + 0.3453894161447818 0.3453894161447818 0.0 0.0 0.0 0.3453894161447818 0.0 + 3.2758115948278728 3.2758115948278728 0.0 0.0 0.0 3.2758115948278728 0.0] rtol = 1e-04 +end + From 852e30725d8f7e35130173d5f92c2a12c6868014 Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Tue, 25 Oct 2022 10:52:06 +0200 Subject: [PATCH 065/106] Delete scratch.jl --- src/scratch.jl | 69 -------------------------------------------------- 1 file changed, 69 deletions(-) delete mode 100644 src/scratch.jl diff --git a/src/scratch.jl b/src/scratch.jl deleted file mode 100644 index 051f723e..00000000 --- a/src/scratch.jl +++ /dev/null @@ -1,69 +0,0 @@ -using Revise -using GLM -using DataFrames -using Random -using CSV -using StatsBase -using RDatasets -Random.seed!(11) - -y = rand(10) -x = rand(10,2) -wts = rand(10) -df = DataFrame(x, :auto) -df.y = y -df.wts = wts -lm1 = lm(x,y) -lmw = lm(x,y; wts = wts) -lmf = lm(@formula(y~x1+x2-1), df) -lmfw = lm(@formula(y~-1+x1+x2), df; wts = iweights(wts)) -lmfw = lm(@formula(y~-1+x1+x2), df; wts = pweights(wts)) -lmfw = lm(@formula(y~-1+x1+x2), df; wts = fweights(wts)) - -glm(@formula(y~-1+x1+x2), df, Normal, IdentityLink; wts = fweights(wts)) - -cooksdistance(lm1) - - - -df = dataset("quantreg", "engel") -N = nrow(df) -df.weights = repeat(1:5, Int(N/5)) -f = @formula(FoodExp ~ Income) -lm_model = lm(f, df, wts = FrequencyWeights(df.weights)) -glm_model = glm(f, df, Normal(), wts = FrequencyWeights(df.weights)) -@test isapprox(coef(lm_model), [154.35104595140706, 0.4836896390157505]) -@test isapprox(coef(glm_model), [154.35104595140706, 0.4836896390157505]) -@test isapprox(stderror(lm_model), [9.382302620120193, 0.00816741377772968]) -@test isapprox(r2(lm_model), 0.8330258148644486) -@test isapprox(adjr2(lm_model), 0.832788298242634) -@test isapprox(vcov(lm_model), [88.02760245551447 -0.06772589439264813; - -0.06772589439264813 6.670664781664879e-5]) -@test isapprox(first(predict(lm_model)), 357.57694841780994) -@test isapprox(loglikelihood(lm_model), -4353.946729075838) -@test isapprox(loglikelihood(glm_model), -4353.946729075838) -@test isapprox(nullloglikelihood(lm_model), -4984.892139711452) -@test isapprox(mean(residuals(lm_model)), -5.412966629787718) - -lm_model = lm(f, df, wts = df.weights) -glm_model = glm(f, df, Normal(), wts = df.weights) -@test isa(weights(lm_model), FrequencyWeights) -@test isa(weights(glm_model), FrequencyWeights) - - - - -lm_model = lm(f, df, wts = iweights(df.weights)) -glm_model = glm(f, df, Normal(), wts = iweights(df.weights)) -@test isapprox(coef(lm_model), [154.35104595140706, 0.4836896390157505]) -@test isapprox(coef(glm_model), [154.35104595140706, 0.4836896390157505]) -@test isapprox(stderror(lm_model), [16.297055281313032, 0.014186793927918842]) -@test isapprox(r2(lm_model), 0.8330258148644486) -@test isapprox(adjr2(lm_model), 0.8323091874604334) -@test isapprox(vcov(lm_model), [265.59401084217296 -0.20434035947652907; - -0.20434035947652907 0.00020126512195323495]) -@test isapprox(first(predict(lm_model)), 357.57694841780994) -@test isapprox(loglikelihood(lm_model), -4353.946729075838) -@test isapprox(loglikelihood(glm_model), -4353.946729075838) -@test isapprox(nullloglikelihood(lm_model), -4984.892139711452) -@test isapprox(mean(residuals(lm_model)), -5.412966629787718) \ No newline at end of file From d1ba3e5b984d761b05eedcf579df65701ec4cf93 Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Tue, 25 Oct 2022 10:52:47 +0200 Subject: [PATCH 066/106] Delete analytic_weights.jl --- analytic_weights.jl | 170 -------------------------------------------- 1 file changed, 170 deletions(-) delete mode 100644 analytic_weights.jl diff --git a/analytic_weights.jl b/analytic_weights.jl deleted file mode 100644 index bf59c949..00000000 --- a/analytic_weights.jl +++ /dev/null @@ -1,170 +0,0 @@ -rng=StableRNG(123) - -x1 = rand(rng, 15) -x2 = ifelse.(randn(rng, 15).>0,1,0) - -y = ifelse.(0.004 .- 0.01.*x1 .+ 1.5.*x2 .+ randn(rng, 15).>0, 1, 0) -w = rand(rng, 15)*6 -w = floor.(w) .+ 1 - - -df = DataFrame(y=y,x1=x1,x2=x2, w=w) - -clotting = DataFrame(u = log.([5,10,15,20,30,40,60,80,100]), - lot1 = [118,58,42,35,27,25,21,19,18], - w = [1.5,2.0,1.1,4.5,2.4,3.5,5.6,5.4,6.7]) - -quine.aweights = log.(3 .+ 3 .*quine.Days) -quine.pweights = 1.0./(quine.aweights./sum(quine.aweights)) -quine.fweights = floor.(quine.aweights) - -dobson = DataFrame(Counts = [18.,17,15,20,10,20,25,13,12], - Outcome = categorical(repeat(string.('A':'C'), outer = 3)), - Treatment = categorical(repeat(string.('a':'c'), inner = 3)), - w = [1,2,1,2,3,4,3,2,1] - ) - - -@testset "GLM: Binomial with LogitLink link - AnalyticWeights" begin - model = glm(@formula(y ~ 1 + x1 + x2), df, Binomial(), LogitLink(), wts=df) - @test deviance(model) ≈ 40.80540879298288 - @test loglikelihood(model) ≈ -20.402704396491437 - @test coef(model) ≈ - @test stderror(model) ≈ [1.3764416117357274, 2.1047035048697387, 2891.0832705575244] - @test aic(model) ≈ 46.805408792982874 - @test bic(model) ≈ 48.929559396289505 - @test momentmatrix(model) ≈ [4.975291538563986e-9 9.006548481517277e-10 4.975291538563986e-9; 7.457508769849823e-9 2.7405594965454296e-9 7.457508769849823e-9; 2.285271769575856e-8 1.528980344883474e-8 2.285271769575856e-8; 2.2327974533715886e-9 9.540869852611076e-11 2.2327974533715886e-9; -3.9002435408230824 -1.7082714161861667 -0.0; 1.9456115337317996 0.9402948505761761 0.0; 3.250941922802209e-8 2.4836342240320546e-8 3.250941922802209e-8; 0.7662630435773894 0.7083972308412865 0.0; 2.0874526025016062 1.0751322798196368 0.0; 1.995938694540086e-8 9.880729193072285e-9 1.995938694540086e-8; 2.354909005285928 1.651591865013276 0.0; -1.2548308790461924 -0.8703121995275815 -0.0; -1.3365591258889113 -1.1711904042878245 -0.0; -0.6626026393385315 -0.6256422062487993 -0.0; 2.9653540079963233e-8 2.192027023041504e-8 2.9653540079963233e-8] -end - -@testset "GLM: Binomial with LogitLink link - AnalyticWeights" begin - model = glm(@formula(y ~ 1 + x1 + x2), df, Binomial(), LogitLink(), wts=df) - @test deviance(model) ≈ 40.80540879298288 - @test loglikelihood(model) ≈ -20.402704396491437 - @test coef(model) ≈ - @test stderror(model) ≈ [1.3764416117357274, 2.1047035048697387, 2891.0832705575244] - @test aic(model) ≈ 46.805408792982874 - @test bic(model) ≈ 48.929559396289505 - @test momentmatrix(model) ≈ [4.975291538563986e-9 9.006548481517277e-10 4.975291538563986e-9; 7.457508769849823e-9 2.7405594965454296e-9 7.457508769849823e-9; 2.285271769575856e-8 1.528980344883474e-8 2.285271769575856e-8; 2.2327974533715886e-9 9.540869852611076e-11 2.2327974533715886e-9; -3.9002435408230824 -1.7082714161861667 -0.0; 1.9456115337317996 0.9402948505761761 0.0; 3.250941922802209e-8 2.4836342240320546e-8 3.250941922802209e-8; 0.7662630435773894 0.7083972308412865 0.0; 2.0874526025016062 1.0751322798196368 0.0; 1.995938694540086e-8 9.880729193072285e-9 1.995938694540086e-8; 2.354909005285928 1.651591865013276 0.0; -1.2548308790461924 -0.8703121995275815 -0.0; -1.3365591258889113 -1.1711904042878245 -0.0; -0.6626026393385315 -0.6256422062487993 -0.0; 2.9653540079963233e-8 2.192027023041504e-8 2.9653540079963233e-8] -end - -@testset "GLM: Binomial with ProbitLink link - AnalyticWeights" begin - model = glm(@formula(y ~ 1 + x1 + x2), df, Binomial(), ProbitLink(), wts=df) - @test deviance(model) ≈ 40.78137404276874 - @test loglikelihood(model) ≈ -20.39068702138437 - @test coef(model) ≈ - @test stderror(model) ≈ [0.8364009151837031, 1.2669759328012313, 383.275056480285] - @test aic(model) ≈ 46.78137404276874 - @test bic(model) ≈ 48.90552464607537 - @test momentmatrix(model) ≈ [2.141319874423096e-10 3.876335911964382e-11 2.141319874423096e-10; 2.660620461776351e-9 9.777512703308697e-10 2.660620461776351e-9; 1.6837104388797515e-7 1.1265006647327841e-7 1.6837104388797515e-7; 1.775927884351608e-11 7.588640333961262e-13 1.775927884351608e-11; -6.32108519917438 -2.768578180312368 -0.0; 3.160081293711658 1.5272360984516953 0.0; 5.617843709811802e-7 4.291885008799245e-7 5.617843709811802e-7; 1.3247656634751994 1.2247234619826348 0.0; 3.3733372105472994 1.737420874337357 0.0; 2.719173690921834e-8 1.3461044140494708e-8 2.719173690921834e-8; 3.7980026833505782 2.6636911749202743 0.0; -2.0047842518828927 -1.390456850377069 -0.0; -2.2151750132435413 -1.9410976058418117 -0.0; -1.11514238917241 -1.0529389761258265 -0.0; 4.12161049030174e-7 3.046746374574499e-7 4.12161049030174e-7] -end - -@testset "GLM: Binomial with CauchitLink link - AnalyticWeights" begin - model = glm(@formula(y ~ 1 + x1 + x2), df, Binomial(), CauchitLink(), wts=df) - @test deviance(model) ≈ 40.8975259939964 - @test loglikelihood(model) ≈ -20.4487629969982 - @test coef(model) ≈ - @test stderror(model) ≈ [1.3251544363304, 2.0816819672326212, 3.9763720496138e10] - @test aic(model) ≈ 46.8975259939964 - @test bic(model) ≈ 49.021676597303035 - @test momentmatrix(model) ≈ [3.292881549556344e-15 5.960956677632797e-16 3.292881549556344e-15; 2.469661355441319e-15 9.075757186192704e-16 2.469661355441319e-15; 2.469661724388082e-15 1.6523479987683206e-15 2.469661724388082e-15; 2.469660974846597e-15 1.055300108189554e-16 2.469660974846597e-15; -4.288393332164326 -1.8782775162685588 -0.0; 2.090455393992148 1.010296458594581 0.0; 2.469661833186925e-15 1.8867567604535068e-15 2.469661833186925e-15; 0.6503620950105018 0.6012487630862063 0.0; 2.3400182402064034 1.205214979443208 0.0; 4.116102547017224e-15 2.0376424741524278e-15 4.116102547017224e-15; 2.8310688979703755 1.9855418407609555 0.0; -1.610846692068502 -1.117233844883936 -0.0; -1.3776439416401394 -1.2071919107215705 -0.0; -0.6350206613065066 -0.5995987700109224 -0.0; 2.469661799647931e-15 1.8256051007749665e-15 2.469661799647931e-15] -end - -@testset "GLM: Binomial with CloglogLink link - AnalyticWeights" begin - model = glm(@formula(y ~ 1 + x1 + x2), df, Binomial(), CloglogLink(), wts=df) - @test deviance(model) ≈ 41.205557080153405 - @test loglikelihood(model) ≈ -20.602778540076702 - @test coef(model) ≈ - @test stderror(model) ≈ [0.973218567746775, 1.5788168207479476, 134.2308296148033] - @test aic(model) ≈ 47.205557080153405 - @test bic(model) ≈ 49.329707683460036 - @test momentmatrix(model) ≈ [8.881784197001252e-16 1.6078297995730002e-16 8.881784197001252e-16; 6.661338147750942e-16 2.447974797472054e-16 6.661338147750942e-16; 3.3706748473230575e-8 2.2551784252367408e-8 3.3706748473230575e-8; 6.661338147750942e-16 2.8464274811830893e-17 6.661338147750942e-16; -6.002070351507831 -2.62885256064177 -0.0; 3.090828500347432 1.4937669069612967 0.0; 2.193326882864893e-6 1.6756441179599079e-6 2.193326882864893e-6; 0.8552543543316015 0.7906681933203132 0.0; 3.211027051982352 1.653823818956702 0.0; 1.0692624807746624e-12 5.293295349074617e-13 1.0692624807746624e-12; 3.040005920969103 2.1320777309844043 0.0; -1.607851182110078 -1.1151562510789412 -0.0; -1.7153341048186206 -1.503100614703514 -0.0; -0.8718601911242182 -0.8232272271960682 -0.0; 8.127843792739411e-7 6.008204479027258e-7 8.127843792739411e-7] -end - -@testset "GLM: Gamma with InverseLink link - AnalyticWeights" begin - model = glm(@formula(lot1 ~ 1 + u), clotting, Gamma(), InverseLink(), wts=clotting) - @test deviance(model) ≈ 0.03933389380881642 - @test loglikelihood(model) ≈ -43.359078787690514 - @test coef(model) ≈ - @test stderror(model) ≈ [0.0009144223353860925, 0.0003450913537314497] - @test aic(model) ≈ 92.71815757538103 - @test bic(model) ≈ 93.30983130738969 - @test momentmatrix(model) ≈ [1900.1063511093867 3058.103199132267; -1643.317155973023 -3783.877586404854; -420.13783432322964 -1137.7543467296691; -981.2887166533023 -2939.6782781526754; 313.30087123532877 1065.5981029180723; -186.60227446859759 -688.353296378139; 324.34628373045786 1327.9854430687467; 430.8197010892654 1887.863404915401; 262.77277766267576 1210.113361381432] -end - -@testset "GLM: Gamma with IdentityLink link - AnalyticWeights" begin - model = glm(@formula(lot1 ~ 1 + u), clotting, Gamma(), IdentityLink(), wts=clotting) - @test deviance(model) ≈ 1.3435348802929383 - @test loglikelihood(model) ≈ -101.19916126647321 - @test coef(model) ≈ - @test stderror(model) ≈ [16.07962739541372, 3.766841480457265] - @test aic(model) ≈ 208.39832253294642 - @test bic(model) ≈ 208.9899962649551 - @test momentmatrix(model) ≈ [0.26061914480947884 0.4194503323625281; 0.06148544891860896 0.14157547811603585; -0.019061929106842457 -0.051620660951180786; -0.1795782998461795 -0.5379685084791557; -0.1764962075232437 -0.6002984389013568; -0.2277661940139623 -0.8402020334398342; -0.3204523427685144 -1.3120423070655995; -0.054878647210950426 -0.2404796937532563; 0.6561290267416002 3.0215858321118008] -end - -@testset "GLM: Gamma with LogLink link - AnalyticWeights" begin - model = glm(@formula(lot1 ~ 1 + u), clotting, Gamma(), LogLink(), wts=clotting) - @test deviance(model) ≈ 0.41206342934199663 - @test loglikelihood(model) ≈ -81.79777246247532 - @test coef(model) ≈ - @test stderror(model) ≈ [0.20287310816341905, 0.053062600599660774] - @test aic(model) ≈ 169.59554492495064 - @test bic(model) ≈ 170.18721865695932 - @test momentmatrix(model) ≈ [14.39716447431257 23.171342336508012; 0.0374983950207553 0.0863432453859933; -2.5490869750808054 -6.903055495494598; -12.821435846444906 -38.40958915849704; -8.713283462827741 -29.635596899449876; -6.520303896525519 -24.05261507847203; -4.123729229896082 -16.88396834850135; 3.70269025008355 16.225287295813413; 16.590486289982852 76.40201283367323] -end - -@testset "GLM: Gamma with InverseLink link - AnalyticWeights" begin - model = glm(@formula(lot1 ~ 1 + u), clotting, Gamma(), InverseLink(), wts=clotting) - @test deviance(model) ≈ 0.03933389380881642 - @test loglikelihood(model) ≈ -43.359078787690514 - @test coef(model) ≈ - @test stderror(model) ≈ [0.0009144223353860925, 0.0003450913537314497] - @test aic(model) ≈ 92.71815757538103 - @test bic(model) ≈ 93.30983130738969 - @test momentmatrix(model) ≈ [1900.1063511093867 3058.103199132267; -1643.317155973023 -3783.877586404854; -420.13783432322964 -1137.7543467296691; -981.2887166533023 -2939.6782781526754; 313.30087123532877 1065.5981029180723; -186.60227446859759 -688.353296378139; 324.34628373045786 1327.9854430687467; 430.8197010892654 1887.863404915401; 262.77277766267576 1210.113361381432] -end - -@testset "GLM: InverseGaussian with InverseSquareLink link - AnalyticWeights" begin - model = glm(@formula(lot1 ~ 1 + u), clotting, InverseGaussian(), InverseSquareLink(), wts=clotting) - @test deviance(model) ≈ 0.021377370485120707 - @test loglikelihood(model) ≈ -86.82546665077861 - @test coef(model) ≈ - @test stderror(model) ≈ [0.00016779409928094252, 9.025235597677238e-5] - @test aic(model) ≈ 179.65093330155722 - @test bic(model) ≈ 180.2426070335659 - @test momentmatrix(model) ≈ [28815.030725087538 46376.00289690935; -21039.070620903 -48444.250382140235; -6195.618377983015 -16778.045594449453; -15686.073415243622 -46991.276375382586; -1716.0787284468345 -5836.722477919495; -2086.203482054124 -7695.75316205041; 3418.087237993986 13994.826896081435; 6065.271775021221 26578.18246467872; 8424.676595366931 38797.069483575455] -end - -@testset "GLM: with LogLink link - AnalyticWeights" begin - model = glm(@formula(Days ~ Eth + Sex + Age + Lrn), quine, (), LogLink(), wts=quine) - @test deviance(model) ≈ 624.7631999565588 - @test loglikelihood(model) ≈ -2004.5939464322778 - @test coef(model) ≈ - @test stderror(model) ≈ [0.1950707397084349, 0.13200639191036218, 0.1373161597645507, 0.2088476016141468, 0.20252412726336674, 0.21060778935484836, 0.16126722793064027] - @test aic(model) ≈ 4023.1878928645556 - @test bic(model) ≈ 4044.073139216514 - @test momentmatrix(model) ≈ [-3.866780529709063 -0.0 -3.866780529709063 -0.0 -0.0 -0.0 -3.866780529709063; -4.370085797122667 -0.0 -4.370085797122667 -0.0 -0.0 -0.0 -4.370085797122667; -3.956562495375882 -0.0 -3.956562495375882 -0.0 -0.0 -0.0 -3.956562495375882; -4.102299119258251 -0.0 -4.102299119258251 -0.0 -0.0 -0.0 -0.0; -4.102299119258251 -0.0 -4.102299119258251 -0.0 -0.0 -0.0 -0.0; -2.8243330916399567 -0.0 -2.8243330916399567 -0.0 -0.0 -0.0 -0.0; -0.7247974261272416 -0.0 -0.7247974261272416 -0.0 -0.0 -0.0 -0.0; -0.0382123316932152 -0.0 -0.0382123316932152 -0.0 -0.0 -0.0 -0.0; -3.813241073891047 -0.0 -3.813241073891047 -3.813241073891047 -0.0 -0.0 -3.813241073891047; -3.813241073891047 -0.0 -3.813241073891047 -3.813241073891047 -0.0 -0.0 -3.813241073891047; -1.593192001014045 -0.0 -1.593192001014045 -1.593192001014045 -0.0 -0.0 -1.593192001014045; -2.7127578570401822 -0.0 -2.7127578570401822 -2.7127578570401822 -0.0 -0.0 -0.0; 0.14484002662039835 0.0 0.14484002662039835 0.14484002662039835 0.0 0.0 0.0; -4.754224412754331 -0.0 -4.754224412754331 -0.0 -4.754224412754331 -0.0 -4.754224412754331; -0.6279394841753847 -0.0 -0.6279394841753847 -0.0 -0.6279394841753847 -0.0 -0.6279394841753847; 5.160032033317412 0.0 5.160032033317412 0.0 5.160032033317412 0.0 5.160032033317412; 6.363463014626628 0.0 6.363463014626628 0.0 6.363463014626628 0.0 6.363463014626628; -2.991376095035898 -0.0 -2.991376095035898 -0.0 -2.991376095035898 -0.0 -0.0; -2.492994950052581 -0.0 -2.492994950052581 -0.0 -2.492994950052581 -0.0 -0.0; -2.492994950052581 -0.0 -2.492994950052581 -0.0 -2.492994950052581 -0.0 -0.0; -2.226530220466526 -0.0 -2.226530220466526 -0.0 -2.226530220466526 -0.0 -0.0; 5.713017320814697 0.0 5.713017320814697 0.0 5.713017320814697 0.0 0.0; 6.908456992944485 0.0 6.908456992944485 0.0 6.908456992944485 0.0 0.0; 8.12839634400043 0.0 8.12839634400043 0.0 8.12839634400043 0.0 0.0; -4.628254089687799 -0.0 -4.628254089687799 -0.0 -0.0 -4.628254089687799 -0.0; -2.183958840253964 -0.0 -2.183958840253964 -0.0 -0.0 -2.183958840253964 -0.0; -2.183958840253964 -0.0 -2.183958840253964 -0.0 -0.0 -2.183958840253964 -0.0; -0.9503472567532946 -0.0 -0.9503472567532946 -0.0 -0.0 -0.9503472567532946 -0.0; 0.6731546773300909 0.0 0.6731546773300909 0.0 0.0 0.6731546773300909 0.0; 1.2423198758199778 0.0 1.2423198758199778 0.0 0.0 1.2423198758199778 0.0; 1.8236065476231822 0.0 1.8236065476231822 0.0 0.0 1.8236065476231822 0.0; -4.171836641319677 -0.0 -0.0 -0.0 -0.0 -0.0 -4.171836641319677; -3.9882995353410657 -0.0 -0.0 -0.0 -0.0 -0.0 -0.0; -3.0399730926465205 -0.0 -0.0 -0.0 -0.0 -0.0 -0.0; 1.309672612863431 0.0 0.0 0.0 0.0 0.0 0.0; 10.661189363296968 0.0 0.0 0.0 0.0 0.0 0.0; -3.7634043246260425 -0.0 -0.0 -3.7634043246260425 -0.0 -0.0 -3.7634043246260425; -3.6605640772207546 -0.0 -0.0 -3.6605640772207546 -0.0 -0.0 -3.6605640772207546; -3.6605640772207546 -0.0 -0.0 -3.6605640772207546 -0.0 -0.0 -3.6605640772207546; -3.0720683496525485 -0.0 -0.0 -3.0720683496525485 -0.0 -0.0 -3.0720683496525485; -1.885334027349047 -0.0 -0.0 -1.885334027349047 -0.0 -0.0 -1.885334027349047; 2.106807550276347 0.0 0.0 2.106807550276347 0.0 0.0 2.106807550276347; 3.0150038937286183 0.0 0.0 3.0150038937286183 0.0 0.0 3.0150038937286183; 6.387064937752826 0.0 0.0 6.387064937752826 0.0 0.0 6.387064937752826; 17.72394862307137 0.0 0.0 17.72394862307137 0.0 0.0 17.72394862307137; 18.296957173355864 0.0 0.0 18.296957173355864 0.0 0.0 18.296957173355864; -3.0375213985118954 -0.0 -0.0 -3.0375213985118954 -0.0 -0.0 -0.0; -3.0375213985118954 -0.0 -0.0 -3.0375213985118954 -0.0 -0.0 -0.0; -0.8508688349707806 -0.0 -0.0 -0.8508688349707806 -0.0 -0.0 -0.0; 2.2977798382338515 0.0 0.0 2.2977798382338515 0.0 0.0 0.0; 3.4686807301080997 0.0 0.0 3.4686807301080997 0.0 0.0 0.0; -4.658715933989554 -0.0 -0.0 -0.0 -4.658715933989554 -0.0 -4.658715933989554; -4.187227633826471 -0.0 -0.0 -0.0 -4.187227633826471 -0.0 -4.187227633826471; -4.04126740785402 -0.0 -0.0 -0.0 -4.04126740785402 -0.0 -4.04126740785402; -2.940568463040927 -0.0 -0.0 -0.0 -2.940568463040927 -0.0 -2.940568463040927; 4.342318636532548 0.0 0.0 0.0 4.342318636532548 0.0 4.342318636532548; 4.653011109293142 0.0 0.0 0.0 4.653011109293142 0.0 4.653011109293142; 8.523536317826032 0.0 0.0 0.0 8.523536317826032 0.0 8.523536317826032; 15.787943104351504 0.0 0.0 0.0 15.787943104351504 0.0 15.787943104351504; -3.6818016272511183 -0.0 -0.0 -0.0 -3.6818016272511183 -0.0 -0.0; -2.057196136670586 -0.0 -0.0 -0.0 -0.0 -2.057196136670586 -0.0; -3.834339745304657 -0.0 -0.0 -0.0 -0.0 -3.834339745304657 -0.0; -4.1780090350069425 -0.0 -0.0 -0.0 -0.0 -4.1780090350069425 -0.0; -4.491340364181187 -0.0 -0.0 -0.0 -0.0 -4.491340364181187 -0.0; -4.3190736545666875 -0.0 -0.0 -0.0 -0.0 -4.3190736545666875 -0.0; -3.731819061288569 -0.0 -0.0 -0.0 -0.0 -3.731819061288569 -0.0; -2.238272513055515 -0.0 -0.0 -0.0 -0.0 -2.238272513055515 -0.0; 1.9859737921268132 0.0 0.0 0.0 0.0 1.9859737921268132 0.0; 3.2559592797891495 0.0 0.0 0.0 0.0 3.2559592797891495 0.0; -3.8426774654770597 -3.8426774654770597 -3.8426774654770597 -0.0 -0.0 -0.0 -3.8426774654770597; -0.9876822943882244 -0.9876822943882244 -0.9876822943882244 -0.0 -0.0 -0.0 -0.9876822943882244; 23.20842027925341 23.20842027925341 23.20842027925341 0.0 0.0 0.0 23.20842027925341; -1.920845416870046 -1.920845416870046 -1.920845416870046 -0.0 -0.0 -0.0 -0.0; -1.920845416870046 -1.920845416870046 -1.920845416870046 -0.0 -0.0 -0.0 -0.0; -3.2888901738202923 -3.2888901738202923 -3.2888901738202923 -0.0 -0.0 -0.0 -0.0; -2.758113321833414 -2.758113321833414 -2.758113321833414 -0.0 -0.0 -0.0 -0.0; -1.306843142455193 -1.306843142455193 -1.306843142455193 -0.0 -0.0 -0.0 -0.0; -0.8751747276035264 -0.8751747276035264 -0.8751747276035264 -0.0 -0.0 -0.0 -0.0; -1.8877508012966644 -1.8877508012966644 -1.8877508012966644 -1.8877508012966644 -0.0 -0.0 -1.8877508012966644; -1.8877508012966644 -1.8877508012966644 -1.8877508012966644 -1.8877508012966644 -0.0 -0.0 -1.8877508012966644; -2.9308943363443847 -2.9308943363443847 -2.9308943363443847 -2.9308943363443847 -0.0 -0.0 -2.9308943363443847; -2.9308943363443847 -2.9308943363443847 -2.9308943363443847 -2.9308943363443847 -0.0 -0.0 -2.9308943363443847; -2.9308943363443847 -2.9308943363443847 -2.9308943363443847 -2.9308943363443847 -0.0 -0.0 -2.9308943363443847; -0.6051770620747217 -0.6051770620747217 -0.6051770620747217 -0.6051770620747217 -0.0 -0.0 -0.6051770620747217; 2.697606708942873 2.697606708942873 2.697606708942873 2.697606708942873 0.0 0.0 2.697606708942873; -2.628558928820721 -2.628558928820721 -2.628558928820721 -2.628558928820721 -0.0 -0.0 -0.0; -2.3542975772061085 -2.3542975772061085 -2.3542975772061085 -2.3542975772061085 -0.0 -0.0 -0.0; 0.11268811798135936 0.11268811798135936 0.11268811798135936 0.0 0.11268811798135936 0.0 0.11268811798135936; 3.1826005245112854 3.1826005245112854 3.1826005245112854 0.0 3.1826005245112854 0.0 3.1826005245112854; 5.692953263520725 5.692953263520725 5.692953263520725 0.0 5.692953263520725 0.0 5.692953263520725; -2.7839804243079254 -2.7839804243079254 -2.7839804243079254 -0.0 -2.7839804243079254 -0.0 -0.0; -1.9433894208611948 -1.9433894208611948 -1.9433894208611948 -0.0 -1.9433894208611948 -0.0 -0.0; -2.962526696741388 -2.962526696741388 -2.962526696741388 -0.0 -2.962526696741388 -0.0 -0.0; -3.4432739212266052 -3.4432739212266052 -3.4432739212266052 -0.0 -3.4432739212266052 -0.0 -0.0; -3.0516553688541084 -3.0516553688541084 -3.0516553688541084 -0.0 -3.0516553688541084 -0.0 -0.0; 0.3128048727055356 0.3128048727055356 0.3128048727055356 0.0 0.3128048727055356 0.0 0.0; 5.983398649554576 5.983398649554576 5.983398649554576 0.0 5.983398649554576 0.0 0.0; -1.9961184031161041 -1.9961184031161041 -1.9961184031161041 -0.0 -0.0 -1.9961184031161041 -0.0; 4.212201806010905 4.212201806010905 4.212201806010905 0.0 0.0 4.212201806010905 0.0; -3.152192412974143 -3.152192412974143 -3.152192412974143 -0.0 -0.0 -3.152192412974143 -0.0; -2.03792823060008 -2.03792823060008 -2.03792823060008 -0.0 -0.0 -2.03792823060008 -0.0; 2.9007973162738843 2.9007973162738843 2.9007973162738843 0.0 0.0 2.9007973162738843 0.0; 9.364366020386104 9.364366020386104 9.364366020386104 0.0 0.0 9.364366020386104 0.0; 24.059031354439128 24.059031354439128 24.059031354439128 0.0 0.0 24.059031354439128 0.0; 2.864621620127876 2.864621620127876 0.0 0.0 0.0 0.0 2.864621620127876; -1.374372490365048 -1.374372490365048 -0.0 -0.0 -0.0 -0.0 -0.0; -0.9287032240778311 -0.9287032240778311 -0.0 -0.0 -0.0 -0.0 -0.0; 3.919550403175515 3.919550403175515 0.0 0.0 0.0 0.0 0.0; 12.426707944681816 12.426707944681816 0.0 0.0 0.0 0.0 0.0; -2.750339462985501 -2.750339462985501 -0.0 -2.750339462985501 -0.0 -0.0 -2.750339462985501; -2.0720837572297617 -2.0720837572297617 -0.0 -2.0720837572297617 -0.0 -0.0 -2.0720837572297617; -1.8681224147832116 -1.8681224147832116 -0.0 -1.8681224147832116 -0.0 -0.0 -1.8681224147832116; -2.778411659017331 -2.778411659017331 -0.0 -2.778411659017331 -0.0 -0.0 -2.778411659017331; -2.750339462985501 -2.750339462985501 -0.0 -2.750339462985501 -0.0 -0.0 -2.750339462985501; -2.750339462985501 -2.750339462985501 -0.0 -2.750339462985501 -0.0 -0.0 -2.750339462985501; -2.750339462985501 -2.750339462985501 -0.0 -2.750339462985501 -0.0 -0.0 -2.750339462985501; -2.750339462985501 -2.750339462985501 -0.0 -2.750339462985501 -0.0 -0.0 -2.750339462985501; -2.0720837572297617 -2.0720837572297617 -0.0 -2.0720837572297617 -0.0 -0.0 -2.0720837572297617; -0.18952790670731487 -0.18952790670731487 -0.0 -0.18952790670731487 -0.0 -0.0 -0.18952790670731487; 2.1145280030507307 2.1145280030507307 0.0 2.1145280030507307 0.0 0.0 2.1145280030507307; -1.7407825357737137 -1.7407825357737137 -0.0 -1.7407825357737137 -0.0 -0.0 -0.0; 4.548120970699322 4.548120970699322 0.0 4.548120970699322 0.0 0.0 0.0; -1.2257166987183963 -1.2257166987183963 -0.0 -1.2257166987183963 -0.0 -0.0 -0.0; -1.2257166987183963 -1.2257166987183963 -0.0 -1.2257166987183963 -0.0 -0.0 -0.0; -0.6449075179371568 -0.6449075179371568 -0.0 -0.6449075179371568 -0.0 -0.0 -0.0; 17.819813171012125 17.819813171012125 0.0 17.819813171012125 0.0 0.0 0.0; -1.999110422648601 -1.999110422648601 -0.0 -0.0 -1.999110422648601 -0.0 -1.999110422648601; -3.9564518053768536 -3.9564518053768536 -0.0 -0.0 -3.9564518053768536 -0.0 -3.9564518053768536; -2.1216196203872557 -2.1216196203872557 -0.0 -0.0 -2.1216196203872557 -0.0 -2.1216196203872557; -3.601990642806918 -3.601990642806918 -0.0 -0.0 -3.601990642806918 -0.0 -3.601990642806918; -3.601990642806918 -3.601990642806918 -0.0 -0.0 -3.601990642806918 -0.0 -3.601990642806918; -3.8495441274063715 -3.8495441274063715 -0.0 -0.0 -3.8495441274063715 -0.0 -3.8495441274063715; -3.6199500530041027 -3.6199500530041027 -0.0 -0.0 -3.6199500530041027 -0.0 -3.6199500530041027; -3.209822061567088 -3.209822061567088 -0.0 -0.0 -3.209822061567088 -0.0 -3.209822061567088; -2.702521155801149 -2.702521155801149 -0.0 -0.0 -2.702521155801149 -0.0 -2.702521155801149; -2.921923505820458 -2.921923505820458 -0.0 -0.0 -2.921923505820458 -0.0 -0.0; -3.058405902935942 -3.058405902935942 -0.0 -0.0 -0.0 -3.058405902935942 -0.0; -3.1473667781351766 -3.1473667781351766 -0.0 -0.0 -0.0 -3.1473667781351766 -0.0; 1.4593378269316923 1.4593378269316923 0.0 0.0 0.0 1.4593378269316923 0.0; -3.7560337640183694 -3.7560337640183694 -0.0 -0.0 -0.0 -3.7560337640183694 -0.0; -3.7560337640183694 -3.7560337640183694 -0.0 -0.0 -0.0 -3.7560337640183694 -0.0; -3.8041614268127484 -3.8041614268127484 -0.0 -0.0 -0.0 -3.8041614268127484 -0.0; -1.3131162740760067 -1.3131162740760067 -0.0 -0.0 -0.0 -1.3131162740760067 -0.0; -0.18645252170591944 -0.18645252170591944 -0.0 -0.0 -0.0 -0.18645252170591944 -0.0; 1.4593378269316923 1.4593378269316923 0.0 0.0 0.0 1.4593378269316923 0.0; 8.572921389223637 8.572921389223637 0.0 0.0 0.0 8.572921389223637 0.0] -end - -@testset "GLM: with LogLink link - AnalyticWeights" begin - model = glm(@formula(Days ~ Eth + Sex + Age + Lrn), quine, (), LogLink(), wts=quine) - @test deviance(model) ≈ 624.7631999565588 - @test loglikelihood(model) ≈ -2004.5939464322778 - @test coef(model) ≈ - @test stderror(model) ≈ [0.1950707397084349, 0.13200639191036218, 0.1373161597645507, 0.2088476016141468, 0.20252412726336674, 0.21060778935484836, 0.16126722793064027] - @test aic(model) ≈ 4023.1878928645556 - @test bic(model) ≈ 4044.073139216514 - @test momentmatrix(model) ≈ [-3.866780529709063 -0.0 -3.866780529709063 -0.0 -0.0 -0.0 -3.866780529709063; -4.370085797122667 -0.0 -4.370085797122667 -0.0 -0.0 -0.0 -4.370085797122667; -3.956562495375882 -0.0 -3.956562495375882 -0.0 -0.0 -0.0 -3.956562495375882; -4.102299119258251 -0.0 -4.102299119258251 -0.0 -0.0 -0.0 -0.0; -4.102299119258251 -0.0 -4.102299119258251 -0.0 -0.0 -0.0 -0.0; -2.8243330916399567 -0.0 -2.8243330916399567 -0.0 -0.0 -0.0 -0.0; -0.7247974261272416 -0.0 -0.7247974261272416 -0.0 -0.0 -0.0 -0.0; -0.0382123316932152 -0.0 -0.0382123316932152 -0.0 -0.0 -0.0 -0.0; -3.813241073891047 -0.0 -3.813241073891047 -3.813241073891047 -0.0 -0.0 -3.813241073891047; -3.813241073891047 -0.0 -3.813241073891047 -3.813241073891047 -0.0 -0.0 -3.813241073891047; -1.593192001014045 -0.0 -1.593192001014045 -1.593192001014045 -0.0 -0.0 -1.593192001014045; -2.7127578570401822 -0.0 -2.7127578570401822 -2.7127578570401822 -0.0 -0.0 -0.0; 0.14484002662039835 0.0 0.14484002662039835 0.14484002662039835 0.0 0.0 0.0; -4.754224412754331 -0.0 -4.754224412754331 -0.0 -4.754224412754331 -0.0 -4.754224412754331; -0.6279394841753847 -0.0 -0.6279394841753847 -0.0 -0.6279394841753847 -0.0 -0.6279394841753847; 5.160032033317412 0.0 5.160032033317412 0.0 5.160032033317412 0.0 5.160032033317412; 6.363463014626628 0.0 6.363463014626628 0.0 6.363463014626628 0.0 6.363463014626628; -2.991376095035898 -0.0 -2.991376095035898 -0.0 -2.991376095035898 -0.0 -0.0; -2.492994950052581 -0.0 -2.492994950052581 -0.0 -2.492994950052581 -0.0 -0.0; -2.492994950052581 -0.0 -2.492994950052581 -0.0 -2.492994950052581 -0.0 -0.0; -2.226530220466526 -0.0 -2.226530220466526 -0.0 -2.226530220466526 -0.0 -0.0; 5.713017320814697 0.0 5.713017320814697 0.0 5.713017320814697 0.0 0.0; 6.908456992944485 0.0 6.908456992944485 0.0 6.908456992944485 0.0 0.0; 8.12839634400043 0.0 8.12839634400043 0.0 8.12839634400043 0.0 0.0; -4.628254089687799 -0.0 -4.628254089687799 -0.0 -0.0 -4.628254089687799 -0.0; -2.183958840253964 -0.0 -2.183958840253964 -0.0 -0.0 -2.183958840253964 -0.0; -2.183958840253964 -0.0 -2.183958840253964 -0.0 -0.0 -2.183958840253964 -0.0; -0.9503472567532946 -0.0 -0.9503472567532946 -0.0 -0.0 -0.9503472567532946 -0.0; 0.6731546773300909 0.0 0.6731546773300909 0.0 0.0 0.6731546773300909 0.0; 1.2423198758199778 0.0 1.2423198758199778 0.0 0.0 1.2423198758199778 0.0; 1.8236065476231822 0.0 1.8236065476231822 0.0 0.0 1.8236065476231822 0.0; -4.171836641319677 -0.0 -0.0 -0.0 -0.0 -0.0 -4.171836641319677; -3.9882995353410657 -0.0 -0.0 -0.0 -0.0 -0.0 -0.0; -3.0399730926465205 -0.0 -0.0 -0.0 -0.0 -0.0 -0.0; 1.309672612863431 0.0 0.0 0.0 0.0 0.0 0.0; 10.661189363296968 0.0 0.0 0.0 0.0 0.0 0.0; -3.7634043246260425 -0.0 -0.0 -3.7634043246260425 -0.0 -0.0 -3.7634043246260425; -3.6605640772207546 -0.0 -0.0 -3.6605640772207546 -0.0 -0.0 -3.6605640772207546; -3.6605640772207546 -0.0 -0.0 -3.6605640772207546 -0.0 -0.0 -3.6605640772207546; -3.0720683496525485 -0.0 -0.0 -3.0720683496525485 -0.0 -0.0 -3.0720683496525485; -1.885334027349047 -0.0 -0.0 -1.885334027349047 -0.0 -0.0 -1.885334027349047; 2.106807550276347 0.0 0.0 2.106807550276347 0.0 0.0 2.106807550276347; 3.0150038937286183 0.0 0.0 3.0150038937286183 0.0 0.0 3.0150038937286183; 6.387064937752826 0.0 0.0 6.387064937752826 0.0 0.0 6.387064937752826; 17.72394862307137 0.0 0.0 17.72394862307137 0.0 0.0 17.72394862307137; 18.296957173355864 0.0 0.0 18.296957173355864 0.0 0.0 18.296957173355864; -3.0375213985118954 -0.0 -0.0 -3.0375213985118954 -0.0 -0.0 -0.0; -3.0375213985118954 -0.0 -0.0 -3.0375213985118954 -0.0 -0.0 -0.0; -0.8508688349707806 -0.0 -0.0 -0.8508688349707806 -0.0 -0.0 -0.0; 2.2977798382338515 0.0 0.0 2.2977798382338515 0.0 0.0 0.0; 3.4686807301080997 0.0 0.0 3.4686807301080997 0.0 0.0 0.0; -4.658715933989554 -0.0 -0.0 -0.0 -4.658715933989554 -0.0 -4.658715933989554; -4.187227633826471 -0.0 -0.0 -0.0 -4.187227633826471 -0.0 -4.187227633826471; -4.04126740785402 -0.0 -0.0 -0.0 -4.04126740785402 -0.0 -4.04126740785402; -2.940568463040927 -0.0 -0.0 -0.0 -2.940568463040927 -0.0 -2.940568463040927; 4.342318636532548 0.0 0.0 0.0 4.342318636532548 0.0 4.342318636532548; 4.653011109293142 0.0 0.0 0.0 4.653011109293142 0.0 4.653011109293142; 8.523536317826032 0.0 0.0 0.0 8.523536317826032 0.0 8.523536317826032; 15.787943104351504 0.0 0.0 0.0 15.787943104351504 0.0 15.787943104351504; -3.6818016272511183 -0.0 -0.0 -0.0 -3.6818016272511183 -0.0 -0.0; -2.057196136670586 -0.0 -0.0 -0.0 -0.0 -2.057196136670586 -0.0; -3.834339745304657 -0.0 -0.0 -0.0 -0.0 -3.834339745304657 -0.0; -4.1780090350069425 -0.0 -0.0 -0.0 -0.0 -4.1780090350069425 -0.0; -4.491340364181187 -0.0 -0.0 -0.0 -0.0 -4.491340364181187 -0.0; -4.3190736545666875 -0.0 -0.0 -0.0 -0.0 -4.3190736545666875 -0.0; -3.731819061288569 -0.0 -0.0 -0.0 -0.0 -3.731819061288569 -0.0; -2.238272513055515 -0.0 -0.0 -0.0 -0.0 -2.238272513055515 -0.0; 1.9859737921268132 0.0 0.0 0.0 0.0 1.9859737921268132 0.0; 3.2559592797891495 0.0 0.0 0.0 0.0 3.2559592797891495 0.0; -3.8426774654770597 -3.8426774654770597 -3.8426774654770597 -0.0 -0.0 -0.0 -3.8426774654770597; -0.9876822943882244 -0.9876822943882244 -0.9876822943882244 -0.0 -0.0 -0.0 -0.9876822943882244; 23.20842027925341 23.20842027925341 23.20842027925341 0.0 0.0 0.0 23.20842027925341; -1.920845416870046 -1.920845416870046 -1.920845416870046 -0.0 -0.0 -0.0 -0.0; -1.920845416870046 -1.920845416870046 -1.920845416870046 -0.0 -0.0 -0.0 -0.0; -3.2888901738202923 -3.2888901738202923 -3.2888901738202923 -0.0 -0.0 -0.0 -0.0; -2.758113321833414 -2.758113321833414 -2.758113321833414 -0.0 -0.0 -0.0 -0.0; -1.306843142455193 -1.306843142455193 -1.306843142455193 -0.0 -0.0 -0.0 -0.0; -0.8751747276035264 -0.8751747276035264 -0.8751747276035264 -0.0 -0.0 -0.0 -0.0; -1.8877508012966644 -1.8877508012966644 -1.8877508012966644 -1.8877508012966644 -0.0 -0.0 -1.8877508012966644; -1.8877508012966644 -1.8877508012966644 -1.8877508012966644 -1.8877508012966644 -0.0 -0.0 -1.8877508012966644; -2.9308943363443847 -2.9308943363443847 -2.9308943363443847 -2.9308943363443847 -0.0 -0.0 -2.9308943363443847; -2.9308943363443847 -2.9308943363443847 -2.9308943363443847 -2.9308943363443847 -0.0 -0.0 -2.9308943363443847; -2.9308943363443847 -2.9308943363443847 -2.9308943363443847 -2.9308943363443847 -0.0 -0.0 -2.9308943363443847; -0.6051770620747217 -0.6051770620747217 -0.6051770620747217 -0.6051770620747217 -0.0 -0.0 -0.6051770620747217; 2.697606708942873 2.697606708942873 2.697606708942873 2.697606708942873 0.0 0.0 2.697606708942873; -2.628558928820721 -2.628558928820721 -2.628558928820721 -2.628558928820721 -0.0 -0.0 -0.0; -2.3542975772061085 -2.3542975772061085 -2.3542975772061085 -2.3542975772061085 -0.0 -0.0 -0.0; 0.11268811798135936 0.11268811798135936 0.11268811798135936 0.0 0.11268811798135936 0.0 0.11268811798135936; 3.1826005245112854 3.1826005245112854 3.1826005245112854 0.0 3.1826005245112854 0.0 3.1826005245112854; 5.692953263520725 5.692953263520725 5.692953263520725 0.0 5.692953263520725 0.0 5.692953263520725; -2.7839804243079254 -2.7839804243079254 -2.7839804243079254 -0.0 -2.7839804243079254 -0.0 -0.0; -1.9433894208611948 -1.9433894208611948 -1.9433894208611948 -0.0 -1.9433894208611948 -0.0 -0.0; -2.962526696741388 -2.962526696741388 -2.962526696741388 -0.0 -2.962526696741388 -0.0 -0.0; -3.4432739212266052 -3.4432739212266052 -3.4432739212266052 -0.0 -3.4432739212266052 -0.0 -0.0; -3.0516553688541084 -3.0516553688541084 -3.0516553688541084 -0.0 -3.0516553688541084 -0.0 -0.0; 0.3128048727055356 0.3128048727055356 0.3128048727055356 0.0 0.3128048727055356 0.0 0.0; 5.983398649554576 5.983398649554576 5.983398649554576 0.0 5.983398649554576 0.0 0.0; -1.9961184031161041 -1.9961184031161041 -1.9961184031161041 -0.0 -0.0 -1.9961184031161041 -0.0; 4.212201806010905 4.212201806010905 4.212201806010905 0.0 0.0 4.212201806010905 0.0; -3.152192412974143 -3.152192412974143 -3.152192412974143 -0.0 -0.0 -3.152192412974143 -0.0; -2.03792823060008 -2.03792823060008 -2.03792823060008 -0.0 -0.0 -2.03792823060008 -0.0; 2.9007973162738843 2.9007973162738843 2.9007973162738843 0.0 0.0 2.9007973162738843 0.0; 9.364366020386104 9.364366020386104 9.364366020386104 0.0 0.0 9.364366020386104 0.0; 24.059031354439128 24.059031354439128 24.059031354439128 0.0 0.0 24.059031354439128 0.0; 2.864621620127876 2.864621620127876 0.0 0.0 0.0 0.0 2.864621620127876; -1.374372490365048 -1.374372490365048 -0.0 -0.0 -0.0 -0.0 -0.0; -0.9287032240778311 -0.9287032240778311 -0.0 -0.0 -0.0 -0.0 -0.0; 3.919550403175515 3.919550403175515 0.0 0.0 0.0 0.0 0.0; 12.426707944681816 12.426707944681816 0.0 0.0 0.0 0.0 0.0; -2.750339462985501 -2.750339462985501 -0.0 -2.750339462985501 -0.0 -0.0 -2.750339462985501; -2.0720837572297617 -2.0720837572297617 -0.0 -2.0720837572297617 -0.0 -0.0 -2.0720837572297617; -1.8681224147832116 -1.8681224147832116 -0.0 -1.8681224147832116 -0.0 -0.0 -1.8681224147832116; -2.778411659017331 -2.778411659017331 -0.0 -2.778411659017331 -0.0 -0.0 -2.778411659017331; -2.750339462985501 -2.750339462985501 -0.0 -2.750339462985501 -0.0 -0.0 -2.750339462985501; -2.750339462985501 -2.750339462985501 -0.0 -2.750339462985501 -0.0 -0.0 -2.750339462985501; -2.750339462985501 -2.750339462985501 -0.0 -2.750339462985501 -0.0 -0.0 -2.750339462985501; -2.750339462985501 -2.750339462985501 -0.0 -2.750339462985501 -0.0 -0.0 -2.750339462985501; -2.0720837572297617 -2.0720837572297617 -0.0 -2.0720837572297617 -0.0 -0.0 -2.0720837572297617; -0.18952790670731487 -0.18952790670731487 -0.0 -0.18952790670731487 -0.0 -0.0 -0.18952790670731487; 2.1145280030507307 2.1145280030507307 0.0 2.1145280030507307 0.0 0.0 2.1145280030507307; -1.7407825357737137 -1.7407825357737137 -0.0 -1.7407825357737137 -0.0 -0.0 -0.0; 4.548120970699322 4.548120970699322 0.0 4.548120970699322 0.0 0.0 0.0; -1.2257166987183963 -1.2257166987183963 -0.0 -1.2257166987183963 -0.0 -0.0 -0.0; -1.2257166987183963 -1.2257166987183963 -0.0 -1.2257166987183963 -0.0 -0.0 -0.0; -0.6449075179371568 -0.6449075179371568 -0.0 -0.6449075179371568 -0.0 -0.0 -0.0; 17.819813171012125 17.819813171012125 0.0 17.819813171012125 0.0 0.0 0.0; -1.999110422648601 -1.999110422648601 -0.0 -0.0 -1.999110422648601 -0.0 -1.999110422648601; -3.9564518053768536 -3.9564518053768536 -0.0 -0.0 -3.9564518053768536 -0.0 -3.9564518053768536; -2.1216196203872557 -2.1216196203872557 -0.0 -0.0 -2.1216196203872557 -0.0 -2.1216196203872557; -3.601990642806918 -3.601990642806918 -0.0 -0.0 -3.601990642806918 -0.0 -3.601990642806918; -3.601990642806918 -3.601990642806918 -0.0 -0.0 -3.601990642806918 -0.0 -3.601990642806918; -3.8495441274063715 -3.8495441274063715 -0.0 -0.0 -3.8495441274063715 -0.0 -3.8495441274063715; -3.6199500530041027 -3.6199500530041027 -0.0 -0.0 -3.6199500530041027 -0.0 -3.6199500530041027; -3.209822061567088 -3.209822061567088 -0.0 -0.0 -3.209822061567088 -0.0 -3.209822061567088; -2.702521155801149 -2.702521155801149 -0.0 -0.0 -2.702521155801149 -0.0 -2.702521155801149; -2.921923505820458 -2.921923505820458 -0.0 -0.0 -2.921923505820458 -0.0 -0.0; -3.058405902935942 -3.058405902935942 -0.0 -0.0 -0.0 -3.058405902935942 -0.0; -3.1473667781351766 -3.1473667781351766 -0.0 -0.0 -0.0 -3.1473667781351766 -0.0; 1.4593378269316923 1.4593378269316923 0.0 0.0 0.0 1.4593378269316923 0.0; -3.7560337640183694 -3.7560337640183694 -0.0 -0.0 -0.0 -3.7560337640183694 -0.0; -3.7560337640183694 -3.7560337640183694 -0.0 -0.0 -0.0 -3.7560337640183694 -0.0; -3.8041614268127484 -3.8041614268127484 -0.0 -0.0 -0.0 -3.8041614268127484 -0.0; -1.3131162740760067 -1.3131162740760067 -0.0 -0.0 -0.0 -1.3131162740760067 -0.0; -0.18645252170591944 -0.18645252170591944 -0.0 -0.0 -0.0 -0.18645252170591944 -0.0; 1.4593378269316923 1.4593378269316923 0.0 0.0 0.0 1.4593378269316923 0.0; 8.572921389223637 8.572921389223637 0.0 0.0 0.0 8.572921389223637 0.0] -end - -@testset "GLM: with SqrtLink link - AnalyticWeights" begin - model = glm(@formula(Days ~ Eth + Sex + Age + Lrn), quine, (), SqrtLink(), wts=quine) - @test deviance(model) ≈ 626.6464732988984 - @test loglikelihood(model) ≈ -2005.5355831034462 - @test coef(model) ≈ - @test stderror(model) ≈ [0.42307979153860564, 0.286636744566765, 0.29612422536777805, 0.42042723748229144, 0.45565954626859695, 0.4766324296069839, 0.3235019638755972] - @test aic(model) ≈ 4025.0711662068925 - @test bic(model) ≈ 4045.956412558851 - @test momentmatrix(model) ≈ [-1.4294351675636041 -0.0 -1.4294351675636041 -0.0 -0.0 -0.0 -1.4294351675636041; -1.5410055711037194 -0.0 -1.5410055711037194 -0.0 -0.0 -0.0 -1.5410055711037194; -1.3571249039047424 -0.0 -1.3571249039047424 -0.0 -0.0 -0.0 -1.3571249039047424; -1.7394058711709879 -0.0 -1.7394058711709879 -0.0 -0.0 -0.0 -0.0; -1.7394058711709879 -0.0 -1.7394058711709879 -0.0 -0.0 -0.0 -0.0; -1.229734152157926 -0.0 -1.229734152157926 -0.0 -0.0 -0.0 -0.0; -0.3742348640443611 -0.0 -0.3742348640443611 -0.0 -0.0 -0.0 -0.0; -0.09370480172054219 -0.0 -0.09370480172054219 -0.0 -0.0 -0.0 -0.0; -1.7293809063089827 -0.0 -1.7293809063089827 -1.7293809063089827 -0.0 -0.0 -1.7293809063089827; -1.7293809063089827 -0.0 -1.7293809063089827 -1.7293809063089827 -0.0 -0.0 -1.7293809063089827; -0.6748210571645206 -0.0 -0.6748210571645206 -0.6748210571645206 -0.0 -0.0 -0.6748210571645206; -1.5016227445218024 -0.0 -1.5016227445218024 -1.5016227445218024 -0.0 -0.0 -0.0; -0.058778966482651636 -0.0 -0.058778966482651636 -0.058778966482651636 -0.0 -0.0 -0.0; -1.6582836355486288 -0.0 -1.6582836355486288 -0.0 -1.6582836355486288 -0.0 -1.6582836355486288; 0.11341508381030255 0.0 0.11341508381030255 0.0 0.11341508381030255 0.0 0.11341508381030255; 2.4651888863431344 0.0 2.4651888863431344 0.0 2.4651888863431344 0.0 2.4651888863431344; 2.9517152556309942 0.0 2.9517152556309942 0.0 2.9517152556309942 0.0 2.9517152556309942; -1.2288386266845785 -0.0 -1.2288386266845785 -0.0 -1.2288386266845785 -0.0 -0.0; -1.0325293533841053 -0.0 -1.0325293533841053 -0.0 -1.0325293533841053 -0.0 -0.0; -1.0325293533841053 -0.0 -1.0325293533841053 -0.0 -1.0325293533841053 -0.0 -0.0; -0.9274622643228648 -0.0 -0.9274622643228648 -0.0 -0.9274622643228648 -0.0 -0.0; 2.212861910664014 0.0 2.212861910664014 0.0 2.212861910664014 0.0 0.0; 2.6862849076558937 0.0 2.6862849076558937 0.0 2.6862849076558937 0.0 0.0; 3.1694781034873523 0.0 3.1694781034873523 0.0 3.1694781034873523 0.0 0.0; -1.6534192741665588 -0.0 -1.6534192741665588 -0.0 -0.0 -1.6534192741665588 -0.0; -0.702446330017668 -0.0 -0.702446330017668 -0.0 -0.0 -0.702446330017668 -0.0; -0.702446330017668 -0.0 -0.702446330017668 -0.0 -0.0 -0.702446330017668 -0.0; -0.23123674216762394 -0.0 -0.23123674216762394 -0.0 -0.0 -0.23123674216762394 -0.0; 0.3871584524726257 0.0 0.3871584524726257 0.0 0.0 0.3871584524726257 0.0; 0.6036586921589513 0.0 0.6036586921589513 0.0 0.0 0.6036586921589513 0.0; 0.8246522973739006 0.0 0.8246522973739006 0.0 0.0 0.8246522973739006 0.0; -1.560441651521342 -0.0 -0.0 -0.0 -0.0 -0.0 -1.560441651521342; -1.7419685003857353 -0.0 -0.0 -0.0 -0.0 -0.0 -0.0; -1.4153955925789807 -0.0 -0.0 -0.0 -0.0 -0.0 -0.0; 0.23770439864218734 0.0 0.0 0.0 0.0 0.0 0.0; 3.853247675936175 0.0 0.0 0.0 0.0 0.0 0.0; -1.7692672149493731 -0.0 -0.0 -1.7692672149493731 -0.0 -0.0 -1.7692672149493731; -1.7282440188407218 -0.0 -0.0 -1.7282440188407218 -0.0 -0.0 -1.7282440188407218; -1.7282440188407218 -0.0 -0.0 -1.7282440188407218 -0.0 -0.0 -1.7282440188407218; -1.4769837741682987 -0.0 -0.0 -1.4769837741682987 -0.0 -0.0 -1.4769837741682987; -0.9582774689727417 -0.0 -0.0 -0.9582774689727417 -0.0 -0.0 -0.9582774689727417; 0.8052632685284861 0.0 0.0 0.8052632685284861 0.0 0.0 0.8052632685284861; 1.2077994352773953 0.0 0.0 1.2077994352773953 0.0 0.0 1.2077994352773953; 2.7042310768987665 0.0 0.0 2.7042310768987665 0.0 0.0 2.7042310768987665; 7.744950633464035 0.0 0.0 7.744950633464035 0.0 0.0 7.744950633464035; 7.999933021719232 0.0 0.0 7.999933021719232 0.0 0.0 7.999933021719232; -1.7392669278461907 -0.0 -0.0 -1.7392669278461907 -0.0 -0.0 -0.0; -1.7392669278461907 -0.0 -0.0 -1.7392669278461907 -0.0 -0.0 -0.0; -0.7262212443523531 -0.0 -0.0 -0.7262212443523531 -0.0 -0.0 -0.0; 0.7835691668207807 0.0 0.0 0.7835691668207807 0.0 0.0 0.0; 1.3489349925379315 0.0 0.0 1.3489349925379315 0.0 0.0 0.0; -1.6522100272913283 -0.0 -0.0 -0.0 -1.6522100272913283 -0.0 -1.6522100272913283; -1.4590418232851277 -0.0 -0.0 -0.0 -1.4590418232851277 -0.0 -1.4590418232851277; -1.4015111702500997 -0.0 -0.0 -0.0 -1.4015111702500997 -0.0 -1.4015111702500997; -0.9738202253475602 -0.0 -0.0 -0.0 -0.9738202253475602 -0.0 -0.9738202253475602; 1.8091899079230156 0.0 0.0 0.0 1.8091899079230156 0.0 1.8091899079230156; 1.9274245415701026 0.0 0.0 0.0 1.9274245415701026 0.0 1.9274245415701026; 3.399094699981504 0.0 0.0 0.0 3.399094699981504 0.0 3.399094699981504; 6.157344170373497 0.0 0.0 0.0 6.157344170373497 0.0 6.157344170373497; -1.5082203700488148 -0.0 -0.0 -0.0 -1.5082203700488148 -0.0 -0.0; -0.7518968254083281 -0.0 -0.0 -0.0 -0.0 -0.7518968254083281 -0.0; -1.403623374340758 -0.0 -0.0 -0.0 -0.0 -1.403623374340758 -0.0; -1.5307566638052945 -0.0 -0.0 -0.0 -0.0 -1.5307566638052945 -0.0; -1.6487615285777935 -0.0 -0.0 -0.0 -0.0 -1.6487615285777935 -0.0; -1.5960112869101046 -0.0 -0.0 -0.0 -0.0 -1.5960112869101046 -0.0; -1.3904968459197917 -0.0 -0.0 -0.0 -0.0 -1.3904968459197917 -0.0; -0.8618818687491527 -0.0 -0.0 -0.0 -0.0 -0.8618818687491527 -0.0; 0.6414580291618693 0.0 0.0 0.0 0.0 0.6414580291618693 0.0; 1.0942097094869556 0.0 0.0 0.0 0.0 1.0942097094869556 0.0; -1.7282583231217719 -1.7282583231217719 -1.7282583231217719 -0.0 -0.0 -0.0 -1.7282583231217719; -0.31744768418403196 -0.31744768418403196 -0.31744768418403196 -0.0 -0.0 -0.0 -0.31744768418403196; 11.375280355235768 11.375280355235768 11.375280355235768 0.0 0.0 0.0 11.375280355235768; -1.0256901959548927 -1.0256901959548927 -1.0256901959548927 -0.0 -0.0 -0.0 -0.0; -1.0256901959548927 -1.0256901959548927 -1.0256901959548927 -0.0 -0.0 -0.0 -0.0; -1.7598032161223125 -1.7598032161223125 -1.7598032161223125 -0.0 -0.0 -0.0 -0.0; -1.491030768800334 -1.491030768800334 -1.491030768800334 -0.0 -0.0 -0.0 -0.0; -0.7301769140610584 -0.7301769140610584 -0.7301769140610584 -0.0 -0.0 -0.0 -0.0; -0.5034045168716083 -0.5034045168716083 -0.5034045168716083 -0.0 -0.0 -0.0 -0.0; -1.113506915630734 -1.113506915630734 -1.113506915630734 -1.113506915630734 -0.0 -0.0 -1.113506915630734; -1.113506915630734 -1.113506915630734 -1.113506915630734 -1.113506915630734 -0.0 -0.0 -1.113506915630734; -1.6237007081122545 -1.6237007081122545 -1.6237007081122545 -1.6237007081122545 -0.0 -0.0 -1.6237007081122545; -1.6237007081122545 -1.6237007081122545 -1.6237007081122545 -1.6237007081122545 -0.0 -0.0 -1.6237007081122545; -1.6237007081122545 -1.6237007081122545 -1.6237007081122545 -1.6237007081122545 -0.0 -0.0 -1.6237007081122545; -0.07026189294137537 -0.07026189294137537 -0.07026189294137537 -0.07026189294137537 -0.0 -0.0 -0.07026189294137537; 2.0844355685058127 2.0844355685058127 2.0844355685058127 2.0844355685058127 0.0 0.0 2.0844355685058127; -1.7313903927438412 -1.7313903927438412 -1.7313903927438412 -1.7313903927438412 -0.0 -0.0 -0.0; -1.480745232754872 -1.480745232754872 -1.480745232754872 -1.480745232754872 -0.0 -0.0 -0.0; 0.21539031393949493 0.21539031393949493 0.21539031393949493 0.0 0.21539031393949493 0.0 0.21539031393949493; 1.6360787089859707 1.6360787089859707 1.6360787089859707 0.0 1.6360787089859707 0.0 1.6360787089859707; 2.7952193074887086 2.7952193074887086 2.7952193074887086 0.0 2.7952193074887086 0.0 2.7952193074887086; -1.448364418208364 -1.448364418208364 -1.448364418208364 -0.0 -1.448364418208364 -0.0 -0.0; -0.9833503482488964 -0.9833503482488964 -0.9833503482488964 -0.0 -0.9833503482488964 -0.0 -0.0; -1.5017276161539084 -1.5017276161539084 -1.5017276161539084 -0.0 -1.5017276161539084 -0.0 -0.0; -1.7640356839137032 -1.7640356839137032 -1.7640356839137032 -0.0 -1.7640356839137032 -0.0 -0.0; -1.5776069676233444 -1.5776069676233444 -1.5776069676233444 -0.0 -1.5776069676233444 -0.0 -0.0; 0.06361165131312438 0.06361165131312438 0.06361165131312438 0.0 0.06361165131312438 0.0 0.0; 2.8475608847598153 2.8475608847598153 2.8475608847598153 0.0 2.8475608847598153 0.0 0.0; -0.8892460264142052 -0.8892460264142052 -0.8892460264142052 -0.0 -0.0 -0.8892460264142052 -0.0; 1.7743695974457907 1.7743695974457907 1.7743695974457907 0.0 0.0 1.7743695974457907 0.0; -1.4305200814192562 -1.4305200814192562 -1.4305200814192562 -0.0 -0.0 -1.4305200814192562 -0.0; -0.9478929479399423 -0.9478929479399423 -0.9478929479399423 -0.0 -0.0 -0.9478929479399423 -0.0; 1.2024302930353608 1.2024302930353608 1.2024302930353608 0.0 0.0 1.2024302930353608 0.0; 4.02280289664674 4.02280289664674 4.02280289664674 0.0 0.0 4.02280289664674 0.0; 10.440933185941839 10.440933185941839 10.440933185941839 0.0 0.0 10.440933185941839 0.0; 1.262517093518885 1.262517093518885 0.0 0.0 0.0 0.0 1.262517093518885; -0.9176184029771589 -0.9176184029771589 -0.0 -0.0 -0.0 -0.0 -0.0; -0.6982138187318754 -0.6982138187318754 -0.0 -0.0 -0.0 -0.0 -0.0; 1.7133696015602422 1.7133696015602422 0.0 0.0 0.0 0.0 0.0; 5.976953806399672 5.976953806399672 0.0 0.0 0.0 0.0 0.0; -1.6123792319065735 -1.6123792319065735 -0.0 -1.6123792319065735 -0.0 -0.0 -1.6123792319065735; -1.1866621929181271 -1.1866621929181271 -0.0 -1.1866621929181271 -0.0 -0.0 -1.1866621929181271; -1.1194589330024307 -1.1194589330024307 -0.0 -1.1194589330024307 -0.0 -0.0 -1.1194589330024307; -1.6605118926433484 -1.6605118926433484 -0.0 -1.6605118926433484 -0.0 -0.0 -1.6605118926433484; -1.6123792319065735 -1.6123792319065735 -0.0 -1.6123792319065735 -0.0 -0.0 -1.6123792319065735; -1.6123792319065735 -1.6123792319065735 -0.0 -1.6123792319065735 -0.0 -0.0 -1.6123792319065735; -1.6123792319065735 -1.6123792319065735 -0.0 -1.6123792319065735 -0.0 -0.0 -1.6123792319065735; -1.6123792319065735 -1.6123792319065735 -0.0 -1.6123792319065735 -0.0 -0.0 -1.6123792319065735; -1.1866621929181271 -1.1866621929181271 -0.0 -1.1866621929181271 -0.0 -0.0 -1.1866621929181271; -0.016084003453250676 -0.016084003453250676 -0.0 -0.016084003453250676 -0.0 -0.0 -0.016084003453250676; 1.4107278812149031 1.4107278812149031 0.0 1.4107278812149031 0.0 0.0 1.4107278812149031; -1.1128985115655265 -1.1128985115655265 -0.0 -1.1128985115655265 -0.0 -0.0 -0.0; 3.7957001151581404 3.7957001151581404 0.0 3.7957001151581404 0.0 0.0 0.0; -0.7046958095802869 -0.7046958095802869 -0.0 -0.7046958095802869 -0.0 -0.0 -0.0; -0.7046958095802869 -0.7046958095802869 -0.0 -0.7046958095802869 -0.0 -0.0 -0.0; -0.2475403067755282 -0.2475403067755282 -0.0 -0.2475403067755282 -0.0 -0.0 -0.0; 14.054845699928913 14.054845699928913 0.0 14.054845699928913 0.0 0.0 0.0; -0.8850373634971601 -0.8850373634971601 -0.0 -0.0 -0.8850373634971601 -0.0 -0.8850373634971601; -1.7594068536637126 -1.7594068536637126 -0.0 -0.0 -1.7594068536637126 -0.0 -1.7594068536637126; -0.9681259531090506 -0.9681259531090506 -0.0 -0.0 -0.9681259531090506 -0.0 -0.9681259531090506; -1.5970364987524888 -1.5970364987524888 -0.0 -0.0 -1.5970364987524888 -0.0 -1.5970364987524888; -1.5970364987524888 -1.5970364987524888 -0.0 -0.0 -1.5970364987524888 -0.0 -1.5970364987524888; -1.7082890535667876 -1.7082890535667876 -0.0 -0.0 -1.7082890535667876 -0.0 -1.7082890535667876; -1.6168827210404924 -1.6168827210404924 -0.0 -0.0 -1.6168827210404924 -0.0 -1.6168827210404924; -1.4399676449006795 -1.4399676449006795 -0.0 -0.0 -1.4399676449006795 -0.0 -1.4399676449006795; -1.2202487676722908 -1.2202487676722908 -0.0 -0.0 -1.2202487676722908 -0.0 -1.2202487676722908; -1.5079358693315765 -1.5079358693315765 -0.0 -0.0 -1.5079358693315765 -0.0 -0.0; -1.3842064467607202 -1.3842064467607202 -0.0 -0.0 -0.0 -1.3842064467607202 -0.0; -1.5208922216041325 -1.5208922216041325 -0.0 -0.0 -0.0 -1.5208922216041325 -0.0; 0.3453894161447818 0.3453894161447818 0.0 0.0 0.0 0.3453894161447818 0.0; -1.717557999730545 -1.717557999730545 -0.0 -0.0 -0.0 -1.717557999730545 -0.0; -1.717557999730545 -1.717557999730545 -0.0 -0.0 -0.0 -1.717557999730545 -0.0; -1.76269912849327 -1.76269912849327 -0.0 -0.0 -0.0 -1.76269912849327 -0.0; -0.7863622513628796 -0.7863622513628796 -0.0 -0.0 -0.0 -0.7863622513628796 -0.0; -0.32795262618891574 -0.32795262618891574 -0.0 -0.0 -0.0 -0.32795262618891574 -0.0; 0.3453894161447818 0.3453894161447818 0.0 0.0 0.0 0.3453894161447818 0.0; 3.2758115948278728 3.2758115948278728 0.0 0.0 0.0 3.2758115948278728 0.0] -end - From 831f28041a12b3ac359b9fca4ef3e24975d60e70 Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Tue, 15 Nov 2022 16:58:04 +0100 Subject: [PATCH 067/106] Follow reviewer suggestions [Batch 1] --- src/glmfit.jl | 6 +-- src/linpred.jl | 20 +++++++- src/lm.jl | 32 +++--------- test/runtests.jl | 124 ++++++++++++++++------------------------------- 4 files changed, 69 insertions(+), 113 deletions(-) diff --git a/src/glmfit.jl b/src/glmfit.jl index 195b72f1..6bb07b86 100644 --- a/src/glmfit.jl +++ b/src/glmfit.jl @@ -774,12 +774,10 @@ function residuals(r::GlmResp; weighted::Bool=false) return dres end - - -## To be removed once is merged +## To be removed once StasAPI PR# is merged momentmatrix(m::RegressionModel) = momentmatrix(m.model) -function momentmatrix(m::GeneralizedLinearModel) +function momentmatrix(m::GeneralizedLinearModel; weighted::Bool = isweighted(m)) X = modelmatrix(m; weighted=false) r = m.rr.wrkwt .* m.rr.wrkresid d = varstruct(m.rr, r) diff --git a/src/linpred.jl b/src/linpred.jl index 6dc009b2..fdae6684 100644 --- a/src/linpred.jl +++ b/src/linpred.jl @@ -279,7 +279,7 @@ function vcov(pp::DensePredChol{T, C, <:ProbabilityWeights}, u::AbstractVector, B = Zv'Zv Av = view(A, nnancols, nnancols) V = similar(pp.scratchm2) - V[nnancols, nnancols] .= Av*B*Av + V[nnancols, nnancols] = Av*B*Av V[nancols, :] .= NaN V[:, nancols] .= NaN else @@ -324,6 +324,24 @@ function modelmatrix(pp::LinPred; weighted::Bool=isweighted(obj)) return Z end +hatvalues(x::LinPredModel) = hatvalues(x.pp) + +function hatvalues(pp::DensePredChol{T, C, W}) where {T, C<:CholeskyPivoted, W} + X = modelmatrix(pp; weighted=isweighted(pp)) + _, k = size(X) + ch = pp.chol + rnk = rank(ch) + p = ch.p + idx = invperm(p)[1:rnk] + sum(x -> x^2, view(X, :, 1:rnk)/ch.U[1:rnk, idx], dims=2) +end + +function hatvalues(pp::DensePredChol{T, C, W}) where {T, C<:Cholesky, W} + X = modelmatrix(pp; weighted=isweighted(pp)) + sum(x -> x^2, X/pp.chol.U, dims=2) +end + + response(obj::LinPredModel) = obj.rr.y fitted(m::LinPredModel) = m.rr.mu diff --git a/src/lm.jl b/src/lm.jl index 4dd36e0a..37cca998 100644 --- a/src/lm.jl +++ b/src/lm.jl @@ -46,7 +46,7 @@ updateμ!(r::LmResp{V}, linPr) where {V<:FPVector} = updateμ!(r, convert(V, vec function deviance(r::LmResp{T,<:AbstractWeights}) where T y = r.y mu = r.mu - wts = r.wts + wts = r.wts if wts isa UnitWeights v = zero(eltype(y)) + zero(eltype(y)) @inbounds @simd for i in eachindex(y,mu,wts) @@ -81,15 +81,12 @@ function loglikelihood(r::LmResp{T,<:ProbabilityWeights}) where T throw(ArgumentError("The `loglikelihood` for probability weighted models is not currently supported.")) end -function residuals(r::LmResp; weighted=false) +function residuals(r::LmResp; weighted::Bool=false) wts = weights(r) - res = r.y - r.mu - if !weighted - res - elseif r.wts isa AbstractWeights - sqrt.(wts).*res + if weighted && !isa(r.wts, UnitWeights) + sqrt.(wts) .* (r.y .- r.mu) else - res + r.y .- r.mu end end @@ -350,28 +347,11 @@ function crossmodelmatrix(model::RegressionModel; weighted::Bool=false) return Symmetric(x' * x) end -hatvalues(x::LinPredModel) = hatvalues(x.pp) - -function hatvalues(pp::DensePredChol{T, C, W}) where {T, C<:CholeskyPivoted, W} - X = modelmatrix(pp; weighted=isweighted(pp)) - _, k = size(X) - ch = pp.chol - rnk = rank(ch) - p = ch.p - idx = invperm(p)[1:rnk] - sum((view(X,:,1:rnk)/ch.U[1:rnk, idx]).^2, dims=2) -end - -function hatvalues(pp::DensePredChol{T, C, W}) where {T, C<:Cholesky, W} - X = modelmatrix(pp; weighted=isweighted(pp)) - sum((X/pp.chol.U).^2, dims=2) -end - function StatsBase.cooksdistance(obj::LinearModel) u = residuals(obj; weighted=isweighted(obj)) mse = GLM.dispersion(obj,true) k = dof(obj)-1 hii = hatvalues(obj) - D = @. u^2 * (hii / (1 - hii)^2) / (k*mse) + D = @. u^2 * (hii / (1 - hii)^2) / (k*mse) return D end diff --git a/test/runtests.jl b/test/runtests.jl index a1cc0183..ff7ff476 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -130,17 +130,15 @@ end ## Standard errors from STATA @test stderror(lm_model) ≈ [ 47.22671, .0517617] atol=1e-05 @test stderror(glm_model) ≈ [ 47.22671, .0517617] atol=1e-05 - ## Test the non full rank case df.Income2 = df.Income*2 df.Income3 = df.Income*3 - f = @formula(FoodExp ~ Income3) m1 = lm(f, df, wts = pweights(df.weights)) f = @formula(FoodExp ~ Income + Income2 + Income3) m2 = lm(f, df, wts = pweights(df.weights)) @test stderror(m1) ≈ filter(!isnan, stderror(m2)) - + f = @formula(FoodExp ~ Income3+Income^2) m3 = lm(f, df, wts = pweights(df.weights)) f = @formula(FoodExp ~ Income + Income2 + Income3+Income^2) @@ -894,43 +892,20 @@ end β = randn(rng, 10) y = Bool[rand(rng) < logistic(x) for x in X * β] wts = rand(1000) - gmsparsev = [fit(LinearModel, X, y; wts=fweights(wts)), - fit(LinearModel, X, sparse(y); wts=fweights(wts)), - fit(LinearModel, Matrix(X), sparse(y); wts=fweights(wts))] - gmdense = fit(LinearModel, Matrix(X), y; wts=fweights(wts)) - - for gmsparse in gmsparsev - @test isapprox(deviance(gmsparse), deviance(gmdense)) - @test isapprox(coef(gmsparse), coef(gmdense)) - @test isapprox(vcov(gmsparse), vcov(gmdense)) - @test isapprox(Matrix(modelmatrix(gmsparse; weighted=true)), modelmatrix(gmdense; weighted=true)) - end - - gmsparsev = [fit(LinearModel, X, y; wts=aweights(wts)), - fit(LinearModel, X, sparse(y); wts=aweights(wts)), - fit(LinearModel, Matrix(X), sparse(y); wts=aweights(wts))] - gmdense = fit(LinearModel, Matrix(X), y; wts=aweights(wts)) - - for gmsparse in gmsparsev - @test isapprox(deviance(gmsparse), deviance(gmdense)) - @test isapprox(coef(gmsparse), coef(gmdense)) - @test isapprox(vcov(gmsparse), vcov(gmdense)) - @test isapprox(Matrix(modelmatrix(gmsparse; weighted=true)), modelmatrix(gmdense; weighted=true)) - end - - gmsparsev = [fit(LinearModel, X, y; wts=pweights(wts)), - fit(LinearModel, X, sparse(y); wts=pweights(wts)), - fit(LinearModel, Matrix(X), sparse(y); wts=pweights(wts))] - gmdense = fit(LinearModel, Matrix(X), y; wts=pweights(wts)) - - for gmsparse in gmsparsev - @test isapprox(deviance(gmsparse), deviance(gmdense)) - @test isapprox(coef(gmsparse), coef(gmdense)) - @test isapprox(vcov(gmsparse), vcov(gmdense)) - @test isapprox(Matrix(modelmatrix(gmsparse; weighted=true)), modelmatrix(gmdense; weighted=true)) + for wfun in (fweights, aweights, pweights) + + gmsparsev = [fit(LinearModel, X, y; wts=wfun(wts)), + fit(LinearModel, X, sparse(y); wts=wfun(wts)), + fit(LinearModel, Matrix(X), sparse(y); wts=wfun(wts))] + gmdense = fit(LinearModel, Matrix(X), y; wts=wfun(wts)) + + for gmsparse in gmsparsev + @test isapprox(deviance(gmsparse), deviance(gmdense)) + @test isapprox(coef(gmsparse), coef(gmdense)) + @test isapprox(vcov(gmsparse), vcov(gmdense)) + @test isapprox(Matrix(modelmatrix(gmsparse; weighted=true)), modelmatrix(gmdense; weighted=true)) + end end - - end @testset "Predict" begin @@ -1567,10 +1542,12 @@ end @testset "momentmatrix" begin @testset "Poisson" begin - dobson = DataFrame(Counts = [18.,17,15,20,10,20,25,13,12], - Outcome = categorical(repeat(string.('A':'C'), outer = 3)), - Treatment = categorical(repeat(string.('a':'c'), inner = 3)), - Weights = [0.3, 0.2, .9, .8, .2, .3, .4, .8, .9]) + dobson = DataFrame( + Counts = [18.,17,15,20,10,20,25,13,12], + Outcome = categorical(repeat(string.('A':'C'), outer = 3)), + Treatment = categorical(repeat(string.('a':'c'), inner = 3)), + Weights = [0.3, 0.2, .9, .8, .2, .3, .4, .8, .9] + ) f = @formula(Counts ~ 1 + Outcome + Treatment) @@ -1597,15 +1574,11 @@ end 1.8686815106332157 0.0 0.0 0.0 1.8686815106332157; 0.010149793505874801 0.010149793505874801 0.0 0.0 0.010149793505874801; -1.8788313148033928 -0.0 -1.8788313148033928 -0.0 -1.8788313148033928] - - - @test mm0_pois ≈ GLM.momentmatrix(gm_pois) atol=1e-06 @test mm0_poisw ≈ GLM.momentmatrix(gm_poisw) atol=1e-06 end @testset "Binomial" begin - f = @formula(admit ~ 1 + rank) - + f = @formula(admit ~ 1 + rank) gm_bin = fit(GeneralizedLinearModel, f, admit_agr, Binomial()) mm0_bin = [-0.5 -0.0 -0.0 -0.0 -0.5 -0.5 -0.0 -0.0 @@ -1617,27 +1590,29 @@ end 0.5 0.0 0.0 0.5] @test mm0_bin ≈ GLM.momentmatrix(gm_bin) - gm_binw = fit(GeneralizedLinearModel, f, admit_agr, Binomial(), wts=aweights(admit_agr.count)) - mm0_binw = [-15.1475 -0.0 -0.0 -0.0 - -34.6887 -34.6887 -0.0 -0.0 - -21.5207 -0.0 -21.5207 -0.0 - -9.85075 -0.0 -0.0 -9.85075 - 15.1475 0.0 0.0 0.0 - 34.6887 34.6887 0.0 0.0 - 21.5207 0.0 21.5207 0.0 - 9.85075 0.0 0.0 9.85075] + gm_binw = fit(GeneralizedLinearModel, f, admit_agr, Binomial(), + wts=aweights(admit_agr.count); rtol=1e-08) + mm0_binw = [-15.1475 -0.0 -0.0 -0.0 + -34.6887 -34.6887 -0.0 -0.0 + -21.5207 -0.0 -21.5207 -0.0 + -9.85075 -0.0 -0.0 -9.85075 + 15.1475 0.0 0.0 0.0 + 34.6887 34.6887 0.0 0.0 + 21.5207 0.0 21.5207 0.0 + 9.85075 0.0 0.0 9.85075] @test mm0_binw ≈ GLM.momentmatrix(gm_binw) atol=1e-03 - Vcov =[ 0.0660173 -0.0660173 -0.0660173 -0.0660173 + Vcov = [0.0660173 -0.0660173 -0.0660173 -0.0660173 -0.0660173 0.0948451 0.0660173 0.0660173 -0.0660173 0.0660173 0.112484 0.0660173 -0.0660173 0.0660173 0.0660173 0.167532] - ## This is due to divverences between chol and qr - @test vcov(gm_binw) ≈ Vcov atol=1e-03 + ## This is due to differences between chol and qr + @test vcov(gm_binw) ≈ Vcov atol=1e-06 - gm_binw = fit(GeneralizedLinearModel, f, admit_agr, Binomial(), wts=pweights(admit_agr.count)) - @test mm0_binw ≈ GLM.momentmatrix(gm_binw) atol=1e-03 + gm_binw = fit(GeneralizedLinearModel, f, admit_agr, Binomial(); + wts=pweights(admit_agr.count), rtol=1e-08) + @test mm0_binw ≈ GLM.momentmatrix(gm_binw) atol=1e-05 ## This are obtained from stata ## glm admit i.rank [pweight=count], family(binomial) irls coef_stata = [.16430305, -.75002998, -1.364698, -1.6867296] @@ -1654,8 +1629,7 @@ end end @testset "Binomial ProbitLink" begin - f = @formula(admit ~ 1 + rank) - + f = @formula(admit ~ 1 + rank) gm_bin = fit(GeneralizedLinearModel, f, admit_agr, Binomial(), ProbitLink()) mm0_bin = [-0.7978846 0.0000000 0.0000000 0.0000000 -0.7978846 -0.7978846 0.0000000 0.0000000 @@ -1667,8 +1641,8 @@ end 0.7978846 0.0000000 0.0000000 0.7978846] @test mm0_bin ≈ GLM.momentmatrix(gm_bin) rtol=1e-06 - gm_binw = fit(GeneralizedLinearModel, f, admit_agr, Binomial(), ProbitLink(), wts=aweights(admit_agr.count)) - + gm_binw = fit(GeneralizedLinearModel, f, admit_agr, Binomial(), ProbitLink(), + wts=aweights(admit_agr.count)) mm0_binw = [ -24.20695 0.00000 0.00000 0.00000 -56.36158 -56.36158 0.00000 0.00000 -36.86681 0.00000 -36.86681 0.00000 @@ -1685,27 +1659,13 @@ end -0.02585008 0.02585008 0.04168393 0.02585008 -0.02585008 0.02585008 0.02585008 0.05792112] - ## This is due to divverences between chol and qr @test vcov(gm_binw) ≈ Vcov rtol=1e-06 - gm_binw = fit(GeneralizedLinearModel, f, admit_agr, Binomial(), ProbitLink(), wts=pweights(admit_agr.count)) + gm_binw = fit(GeneralizedLinearModel, f, admit_agr, Binomial(), ProbitLink(); + wts=pweights(admit_agr.count), rto=1e-08) @test mm0_binw ≈ GLM.momentmatrix(gm_binw) rtol=1e-05 - ## This are obtained from stata - ## glm admit i.rank [pweight=count], family(binomial) irls - #coef_stata = [] - #@test coef(gm_binw) ≈ coef_stata rtol=1e-05 - ## Stata: uses different residuals degrees of freedom. In this case (n-1) instead of (n-4) - ## Also need to give low tolerance (this small differences seem to be due to QR vs Cholesky) - #@test stderror(gm_binw)*sqrt(5/7) ≈ [] atol=1e-02 - - ## Stata is also off with fweights - gm_binw = fit(GeneralizedLinearModel, f, admit_agr, Binomial(), wts=fweights(admit_agr.count)) - ## vs Stata (here stata uses the same df) - stata_se = [.25693835, .30796933, .33538667, .4093073] - @test stderror(gm_binw) ≈ stata_se rtol = 1e-03 end - end include("analytic_weights.jl") \ No newline at end of file From b00dc165c564f4674a88321db99edec27c1ff115 Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Tue, 15 Nov 2022 17:03:27 +0100 Subject: [PATCH 068/106] Follow reviewer's suggestions [Batch 2] --- src/lm.jl | 3 +- test/analytic_weights.jl | 79 ++++++++++++++++++++++++---------------- 2 files changed, 50 insertions(+), 32 deletions(-) diff --git a/src/lm.jl b/src/lm.jl index 37cca998..e7b080da 100644 --- a/src/lm.jl +++ b/src/lm.jl @@ -62,6 +62,7 @@ function deviance(r::LmResp{T,<:AbstractWeights}) where T end weights(r::LmResp) = r.wts +isweighted(r::LmResp) = weights(r) isa Union{AnalyticWeights, FrequencyWeights, ProbabilityWeights} nobs(r::LmResp{<:Any,W}) where {W<:FrequencyWeights} = sum(r.wts) nobs(r::LmResp{<:Any,W}) where {W<:AbstractWeights} = oftype(sum(one(eltype(r.wts))), length(r.y)) @@ -83,7 +84,7 @@ end function residuals(r::LmResp; weighted::Bool=false) wts = weights(r) - if weighted && !isa(r.wts, UnitWeights) + if weighted && isweighted(r) sqrt.(wts) .* (r.y .- r.mu) else r.y .- r.mu diff --git a/test/analytic_weights.jl b/test/analytic_weights.jl index fe969c86..7aad8a35 100644 --- a/test/analytic_weights.jl +++ b/test/analytic_weights.jl @@ -2,33 +2,33 @@ rng = StableRNG(123) x1 = rand(rng, 25) x2 = ifelse.(randn(rng, 25) .> 0, 1, 0) - y = ifelse.(0.004 .- 0.01 .* x1 .+ 1.5 .* x2 .+ randn(rng, 25) .> 0, 1, 0) w = rand(rng, 25) * 6 w = floor.(w) .+ 1 - - df = DataFrame(y=y, x1=x1, x2=x2, w=w) -clotting = DataFrame(u=log.([5, 10, 15, 20, 30, 40, 60, 80, 100]), +clotting = DataFrame( + u=log.([5, 10, 15, 20, 30, 40, 60, 80, 100]), lot1=[118, 58, 42, 35, 27, 25, 21, 19, 18], - w=[1.5, 2.0, 1.1, 4.5, 2.4, 3.5, 5.6, 5.4, 6.7]) + w=[1.5, 2.0, 1.1, 4.5, 2.4, 3.5, 5.6, 5.4, 6.7] +) quine.aweights = log.(3 .+ 3 .* quine.Days) quine.pweights = 1.0 ./ (quine.aweights ./ sum(quine.aweights)) quine.fweights = floor.(quine.aweights) -dobson = DataFrame(Counts=[18.0, 17, 15, 20, 10, 20, 25, 13, 12], +dobson = DataFrame( + Counts=[18.0, 17, 15, 20, 10, 20, 25, 13, 12], Outcome=categorical(repeat(string.('A':'C'), outer=3)), Treatment=categorical(repeat(string.('a':'c'), inner=3)), w=[1, 2, 1, 2, 3, 4, 3, 2, 1] ) - @testset "GLM: Binomial with LogitLink link - AnalyticWeights" begin - model = glm(@formula(y ~ 1 + x1 + x2), df, Binomial(), LogitLink(), wts=aweights(df.w), atol=1e-08,rtol=1e-08) + model = glm(@formula(y ~ 1 + x1 + x2), df, Binomial(), LogitLink(), wts=aweights(df.w), + atol=1e-08, rtol=1e-08) @test deviance(model) ≈ 39.58120350785813 rtol = 1e-06 - @test loglikelihood(model) ≈ -19.79060175392906 rtol = 1e-06 + @test loglikelihood(model) ≈ -19.79060175392906 rtol = 1e-06 @test coef(model) ≈ [0.6333582770515337, 1.8861277804531265, 18.61281712203539] rtol = 1e-06 @test stderror(model) ≈ [0.9021013750843575, 2.063002891039618, 2337.217357530545] rtol = 1e-07 @test aic(model) ≈ 45.58120350785812 rtol = 1e-07 @@ -61,7 +61,8 @@ dobson = DataFrame(Counts=[18.0, 17, 15, 20, 10, 20, 25, 13, 12], end @testset "GLM: Binomial with ProbitLink link - AnalyticWeights" begin - model = glm(@formula(y ~ 1 + x1 + x2), df, Binomial(), ProbitLink(), wts=aweights(df.w), rtol=1e-09) + model = glm(@formula(y ~ 1 + x1 + x2), df, Binomial(), ProbitLink(), + wts=aweights(df.w), rtol=1e-09) @test deviance(model) ≈ 39.595360462143866 rtol = 1e-06 @test loglikelihood(model) ≈ -19.797680231071933 rtol = 1e-06 @test coef(model) ≈ [0.42120722997197313, 1.0416447141541567, 4.916910225354065] rtol = 1e-07 @@ -99,8 +100,6 @@ end model = glm(@formula(y ~ 1 + x1 + x2), df, Binomial(), CauchitLink(), wts=aweights(df.w), rtol=1e-08, atol=1e-08) @test deviance(model) ≈ 39.627559015619845 rtol = 1e-07 @test loglikelihood(model) ≈ -19.813779507809922 rtol = 1e-07 - - @test aic(model) ≈ 45.627559015619845 rtol = 1e-07 @test bic(model) ≈ 49.28418649022444 rtol = 1e-07 @test GLM.momentmatrix(model) ≈ [ 1.003054020887253 0.1815783979426737 0.0; @@ -166,7 +165,8 @@ end end @testset "GLM: Gamma with InverseLink link - AnalyticWeights" begin - model = glm(@formula(lot1 ~ 1 + u), clotting, Gamma(), InverseLink(), wts=aweights(clotting.w), atol=1e-07, rtol=1e-08) + model = glm(@formula(lot1 ~ 1 + u), clotting, Gamma(), InverseLink(), + wts=aweights(clotting.w), atol=1e-07, rtol=1e-08) @test deviance(model) ≈ 0.03933389380881642 rtol = 1e-07 @test loglikelihood(model) ≈ -43.359078787690514 rtol = 1e-07 @test coef(model) ≈ [-0.017217012596343607, 0.015649040406186487] rtol = 1e-07 @@ -185,7 +185,8 @@ end end @testset "GLM: Gamma with IdentityLink link - AnalyticWeights" begin - model = glm(@formula(lot1 ~ 1 + u), clotting, Gamma(), IdentityLink(), wts=aweights(clotting.w), rtol=1e-16, atol=1e-16, minstepfac=0.00001) + model = glm(@formula(lot1 ~ 1 + u), clotting, Gamma(), IdentityLink(), + wts=aweights(clotting.w), rtol=1e-16, atol=1e-16, minstepfac=0.00001) @test deviance(model) ≈ 1.3435348802929383 rtol = 1e-07 @test loglikelihood(model) ≈ -101.19916126647321 rtol = 1e-07 @test coef(model) ≈ [86.45700434128152, -15.320695650698417] rtol = 1e-05 @@ -204,7 +205,8 @@ end end @testset "GLM: Gamma with LogLink link - AnalyticWeights" begin - model = glm(@formula(lot1 ~ 1 + u), clotting, Gamma(), LogLink(), wts=aweights(clotting.w), atol=1e-09, rtol=1e-09) + model = glm(@formula(lot1 ~ 1 + u), clotting, Gamma(), LogLink(), + wts=aweights(clotting.w), atol=1e-09, rtol=1e-09) @test deviance(model) ≈ 0.41206342934199663 rtol = 1e-07 @test loglikelihood(model) ≈ -81.79777246247532 rtol = 1e-07 @test coef(model) ≈ [5.325107090308856, -0.5495682740033511] rtol = 1e-07 @@ -223,7 +225,8 @@ end end @testset "GLM: Gamma with InverseLink link - AnalyticWeights" begin - model = glm(@formula(lot1 ~ 1 + u), clotting, Gamma(), InverseLink(), wts=aweights(clotting.w), atol=1e-09, rtol=1e-09) + model = glm(@formula(lot1 ~ 1 + u), clotting, Gamma(), InverseLink(), + wts=aweights(clotting.w), atol=1e-09, rtol=1e-09) @test deviance(model) ≈ 0.03933389380881642 rtol = 1e-07 @test loglikelihood(model) ≈ -43.359078787690514 rtol = 1e-07 @test coef(model) ≈ [-0.017217012596343607, 0.015649040406186487] rtol = 1e-07 @@ -242,7 +245,8 @@ end end @testset "GLM: InverseGaussian with InverseSquareLink link - AnalyticWeights" begin - model = glm(@formula(lot1 ~ 1 + u), clotting, InverseGaussian(), InverseSquareLink(), wts=aweights(clotting.w), atol=1e-09, rtol=1e-09) + model = glm(@formula(lot1 ~ 1 + u), clotting, InverseGaussian(), InverseSquareLink(), + wts=aweights(clotting.w), atol=1e-09, rtol=1e-09) @test deviance(model) ≈ 0.021377370485120707 rtol = 1e-07 @test loglikelihood(model) ≈ -86.82546665077861 rtol = 1e-07 @test coef(model) ≈ [-0.0012633718975150973, 0.0008126490405747128] rtol = 1e-07 @@ -260,14 +264,18 @@ end 8424.676595366931 38797.069483575455] rtol = 1e-06 end -@testset "GLM: with LogLink link - AnalyticWeights" begin - model = glm(@formula(Days ~ Eth + Sex + Age + Lrn), quine, NegativeBinomial(2), LogLink(), wts=aweights(quine.aweights), atol=1e-08, rtol=1e-08) - +@testset "GLM: NegativeBinomial with LogLink link - AnalyticWeights" begin + model = glm(@formula(Days ~ Eth + Sex + Age + Lrn), quine, NegativeBinomial(2), + LogLink(), wts=aweights(quine.aweights), atol=1e-08, rtol=1e-08) @test deviance(model) ≈ 624.7631999565588 rtol = 1e-07 @test loglikelihood(model) ≈ -2004.5939464322778 rtol = 1e-07 - @test coef(model) ≈ [3.02411915515531, -0.4641576651688563, 0.0718560942992554, -0.47848540911607984, 0.09677889908013552, 0.3562972562034356, 0.3480161821981514] rtol = 1e-07 - @test stderror(model) ≈ [0.1950707397084349, 0.13200639191036218, 0.1373161597645507, 0.2088476016141468, 0.20252412726336674, 0.21060778935484836, 0.16126722793064027] rtol = 1e-07 + @test coef(model) ≈ [3.02411915515531, -0.4641576651688563, 0.0718560942992554, + -0.47848540911607984, 0.09677889908013552, 0.3562972562034356, + 0.3480161821981514] rtol = 1e-07 + @test stderror(model) ≈ [0.1950707397084349, 0.13200639191036218, 0.1373161597645507, + 0.2088476016141468, 0.20252412726336674, 0.21060778935484836, + 0.16126722793064027] rtol = 1e-07 ## Tests below are broken because dof(model)==8 instead of 7 @test_broken aic(model) ≈ 4023.1878928645556 rtol = 1e-07 @test_broken bic(model) ≈ 4044.073139216514 rtol = 1e-07 @@ -421,12 +429,17 @@ end ] rtol = 1e-04 end -@testset "GLM: with LogLink link - AnalyticWeights" begin - model = glm(@formula(Days ~ Eth + Sex + Age + Lrn), quine, NegativeBinomial(2), LogLink(), wts=aweights(quine.aweights), rtol=1e-08, atol=1e-08) +@testset "GLM: NegativeBinomial with LogLink link - AnalyticWeights" begin + model = glm(@formula(Days ~ Eth + Sex + Age + Lrn), quine, + NegativeBinomial(2), LogLink(), wts=aweights(quine.aweights), rtol=1e-08, atol=1e-08) @test deviance(model) ≈ 624.7631999565588 rtol = 1e-07 @test loglikelihood(model) ≈ -2004.5939464322778 rtol = 1e-07 - @test coef(model) ≈ [3.02411915515531, -0.4641576651688563, 0.0718560942992554, -0.47848540911607984, 0.09677889908013552, 0.3562972562034356, 0.3480161821981514] rtol = 1e-07 - @test stderror(model) ≈ [0.1950707397084349, 0.13200639191036218, 0.1373161597645507, 0.2088476016141468, 0.20252412726336674, 0.21060778935484836, 0.16126722793064027] rtol = 1e-07 + @test coef(model) ≈ [3.02411915515531, -0.4641576651688563, 0.0718560942992554, + -0.47848540911607984, 0.09677889908013552, 0.3562972562034356, + 0.3480161821981514] rtol = 1e-07 + @test stderror(model) ≈ [0.1950707397084349, 0.13200639191036218, 0.1373161597645507, + 0.2088476016141468, 0.20252412726336674, 0.21060778935484836, + 0.16126722793064027] rtol = 1e-07 @test_broken aic(model) ≈ 4023.1878928645556 rtol = 1e-07 @test_broken bic(model) ≈ 4044.073139216514 rtol = 1e-07 @test GLM.momentmatrix(model) ≈ [ @@ -579,12 +592,17 @@ end ] rtol = 1e-04 end -@testset "GLM: with SqrtLink link - AnalyticWeights" begin - model = glm(@formula(Days ~ Eth + Sex + Age + Lrn), quine, NegativeBinomial(2), SqrtLink(), wts=aweights(quine.aweights), rtol=1e-08, atol=1e-09) +@testset "GLM: NegativeBinomial with SqrtLink link - AnalyticWeights" begin + model = glm(@formula(Days ~ Eth + Sex + Age + Lrn), quine, NegativeBinomial(2), + SqrtLink(), wts=aweights(quine.aweights), rtol=1e-08, atol=1e-09) @test deviance(model) ≈ 626.6464732988984 rtol = 1e-07 @test loglikelihood(model) ≈ -2005.5355831034462 rtol = 1e-07 - @test coef(model) ≈ [4.733877229152363, -1.007977895471349, 0.02522392818548873, -0.9859743168046422, 0.2132095063819721, 0.7456070470961186, 0.5840284357554036] rtol = 1e-07 - @test stderror(model) ≈ [0.42307979153860564, 0.286636744566765, 0.29612422536777805, 0.42042723748229144, 0.45565954626859695, 0.4766324296069839, 0.3235019638755972] rtol = 1e-06 + @test coef(model) ≈ [4.733877229152363, -1.007977895471349, 0.02522392818548873, + -0.9859743168046422, 0.2132095063819721, 0.7456070470961186, + 0.5840284357554036] rtol = 1e-07 + @test stderror(model) ≈ [0.42307979153860564, 0.286636744566765, 0.29612422536777805, + 0.42042723748229144, 0.45565954626859695, 0.4766324296069839, + 0.3235019638755972] rtol = 1e-06 @test_broken aic(model) ≈ 4025.0711662068925 rtol = 1e-07 @test_broken bic(model) ≈ 4045.956412558851 rtol = 1e-07 @test GLM.momentmatrix(model) ≈ [ @@ -735,4 +753,3 @@ end 0.3453894161447818 0.3453894161447818 0.0 0.0 0.0 0.3453894161447818 0.0 3.2758115948278728 3.2758115948278728 0.0 0.0 0.0 3.2758115948278728 0.0] rtol = 1e-04 end - From 0825324ff364afc8369c93053fb7486e5f61a5bc Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Tue, 15 Nov 2022 19:21:04 +0100 Subject: [PATCH 069/106] probability weights vcov uses momentmatrix --- src/linpred.jl | 3 ++- test/runtests.jl | 61 ++++++++++++------------------------------------ 2 files changed, 17 insertions(+), 47 deletions(-) diff --git a/src/linpred.jl b/src/linpred.jl index fdae6684..cc9dde2d 100644 --- a/src/linpred.jl +++ b/src/linpred.jl @@ -270,7 +270,8 @@ vcov(x::DensePredChol{T, C, P}, u::AbstractVector, d::Real) where {T,C,P} = rmul vcov(x::SparsePredChol{T, C, M, P}, u::AbstractVector, d::Real) where {T,C,M,P} = rmul!(invchol(x), d) function vcov(pp::DensePredChol{T, C, <:ProbabilityWeights}, u::AbstractVector, d::Real) where {T, C} - Z = mul!(pp.scratchm1, Diagonal(u), pp.X) + #Z = #mul!(pp.scratchm1, Diagonal(u), pp.X) + Z = momentmatrix(pp) A = invchol(pp) if pp.chol isa CholeskyPivoted && rank(pp.chol) != size(A, 1) nancols = [all(isnan, col) for col in eachcol(A)] diff --git a/test/runtests.jl b/test/runtests.jl index ff7ff476..dd59ead7 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -585,7 +585,8 @@ end # Logistic regression using aggregated data and weights admit_agr = DataFrame(count = [28., 97, 93, 55, 33, 54, 28, 12], admit = repeat([false, true], inner=[4]), - rank = categorical(repeat(1:4, outer=2))) + rank = categorical(repeat(1:4, outer=2)) + ) @testset "Aggregated Binomial LogitLink (FrequencyWeights)" begin for distr in (Binomial, Bernoulli) @@ -1579,7 +1580,10 @@ end end @testset "Binomial" begin f = @formula(admit ~ 1 + rank) - gm_bin = fit(GeneralizedLinearModel, f, admit_agr, Binomial()) + gm_bin = fit(GeneralizedLinearModel, f, admit_agr, Binomial(); rtol=1e-8) + gm_binw = fit(GeneralizedLinearModel, f, admit_agr, Binomial(), + wts=aweights(admit_agr.count); rtol=1e-08) + mm0_bin = [-0.5 -0.0 -0.0 -0.0 -0.5 -0.5 -0.0 -0.0 -0.5 -0.0 -0.5 -0.0 @@ -1588,10 +1592,7 @@ end 0.5 0.5 0.0 0.0 0.5 0.0 0.5 0.0 0.5 0.0 0.0 0.5] - @test mm0_bin ≈ GLM.momentmatrix(gm_bin) - - gm_binw = fit(GeneralizedLinearModel, f, admit_agr, Binomial(), - wts=aweights(admit_agr.count); rtol=1e-08) + mm0_binw = [-15.1475 -0.0 -0.0 -0.0 -34.6887 -34.6887 -0.0 -0.0 -21.5207 -0.0 -21.5207 -0.0 @@ -1600,37 +1601,18 @@ end 34.6887 34.6887 0.0 0.0 21.5207 0.0 21.5207 0.0 9.85075 0.0 0.0 9.85075] - + + @test mm0_bin ≈ GLM.momentmatrix(gm_bin) @test mm0_binw ≈ GLM.momentmatrix(gm_binw) atol=1e-03 - Vcov = [0.0660173 -0.0660173 -0.0660173 -0.0660173 - -0.0660173 0.0948451 0.0660173 0.0660173 - -0.0660173 0.0660173 0.112484 0.0660173 - -0.0660173 0.0660173 0.0660173 0.167532] - - ## This is due to differences between chol and qr - @test vcov(gm_binw) ≈ Vcov atol=1e-06 - - gm_binw = fit(GeneralizedLinearModel, f, admit_agr, Binomial(); - wts=pweights(admit_agr.count), rtol=1e-08) - @test mm0_binw ≈ GLM.momentmatrix(gm_binw) atol=1e-05 - ## This are obtained from stata - ## glm admit i.rank [pweight=count], family(binomial) irls - coef_stata = [.16430305, -.75002998, -1.364698, -1.6867296] - @test coef(gm_binw) ≈ coef_stata atol=1e-05 - ## Stata: uses different residuals degrees of freedom. In this case (n-1) instead of (n-4) - ## Also need to give low tolerance (this small differences seem to be due to QR vs Cholesky) - @test stderror(gm_binw)*sqrt(5/7) ≈ [1.5118579, 2.1380899, 2.1380899, 2.1380899] atol=1e-02 - - ## Stata is also off with fweights - gm_binw = fit(GeneralizedLinearModel, f, admit_agr, Binomial(), wts=fweights(admit_agr.count)) - ## vs Stata (here stata uses the same df) - stata_se = [.25693835, .30796933, .33538667, .4093073] - @test stderror(gm_binw) ≈ stata_se atol = 0.001 + end @testset "Binomial ProbitLink" begin f = @formula(admit ~ 1 + rank) gm_bin = fit(GeneralizedLinearModel, f, admit_agr, Binomial(), ProbitLink()) + gm_binw = fit(GeneralizedLinearModel, f, admit_agr, Binomial(), ProbitLink(), + wts=aweights(admit_agr.count), rtol=1e-8) + mm0_bin = [-0.7978846 0.0000000 0.0000000 0.0000000 -0.7978846 -0.7978846 0.0000000 0.0000000 -0.7978846 0.0000000 -0.7978846 0.0000000 @@ -1639,10 +1621,7 @@ end 0.7978846 0.7978846 0.0000000 0.0000000 0.7978846 0.0000000 0.7978846 0.0000000 0.7978846 0.0000000 0.0000000 0.7978846] - @test mm0_bin ≈ GLM.momentmatrix(gm_bin) rtol=1e-06 - gm_binw = fit(GeneralizedLinearModel, f, admit_agr, Binomial(), ProbitLink(), - wts=aweights(admit_agr.count)) mm0_binw = [ -24.20695 0.00000 0.00000 0.00000 -56.36158 -56.36158 0.00000 0.00000 -36.86681 0.00000 -36.86681 0.00000 @@ -1652,18 +1631,8 @@ end 36.86681 0.00000 36.86681 0.00000 17.52584 0.00000 0.00000 17.52584] - @test mm0_binw ≈ GLM.momentmatrix(gm_binw) rtol=1e-05 - - Vcov =[ 0.02585008 -0.02585008 -0.02585008 -0.02585008 - -0.02585008 0.03677007 0.02585008 0.02585008 - -0.02585008 0.02585008 0.04168393 0.02585008 - -0.02585008 0.02585008 0.02585008 0.05792112] - - @test vcov(gm_binw) ≈ Vcov rtol=1e-06 - - gm_binw = fit(GeneralizedLinearModel, f, admit_agr, Binomial(), ProbitLink(); - wts=pweights(admit_agr.count), rto=1e-08) - @test mm0_binw ≈ GLM.momentmatrix(gm_binw) rtol=1e-05 + @test mm0_bin ≈ GLM.momentmatrix(gm_bin) rtol=1e-06 + @test mm0_binw ≈ GLM.momentmatrix(gm_binw) rtol=1e-05 end end From 48d15fb29a562d7eb2e564f9167a9cfe49d881df Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Wed, 16 Nov 2022 23:43:47 +0100 Subject: [PATCH 070/106] Fix ProbabilityWeights vcov and tests --- src/glmfit.jl | 11 +- src/linpred.jl | 62 +++++---- src/lm.jl | 7 + test/analytic_weights.jl | 2 +- test/probability_weights.jl | 253 ++++++++++++++++++++++++++++++++++++ test/runtests.jl | 13 +- 6 files changed, 311 insertions(+), 37 deletions(-) create mode 100644 test/probability_weights.jl diff --git a/src/glmfit.jl b/src/glmfit.jl index 6bb07b86..a632cc72 100644 --- a/src/glmfit.jl +++ b/src/glmfit.jl @@ -779,12 +779,7 @@ momentmatrix(m::RegressionModel) = momentmatrix(m.model) function momentmatrix(m::GeneralizedLinearModel; weighted::Bool = isweighted(m)) X = modelmatrix(m; weighted=false) - r = m.rr.wrkwt .* m.rr.wrkresid - d = varstruct(m.rr, r) - return mul!(m.pp.scratchm1, Diagonal(r.*d), m.pp.X) + #r = m.rr.wrkwt .* m.rr.wrkresid + r, d = varstruct(m) + return mul!(m.pp.scratchm1, Diagonal(r.*d), X) end - -varstruct(rr::GlmResp{<:Any, <:Union{Normal, Poisson, Binomial, Bernoulli, NegativeBinomial}}, - r::AbstractArray) = 1 -varstruct(rr::GlmResp{<:Any, <:Union{Gamma, Geometric, InverseGaussian}}, - r::AbstractArray) = sum(rr.wrkwt)/sum(abs2, r) diff --git a/src/linpred.jl b/src/linpred.jl index cc9dde2d..2d3a30b1 100644 --- a/src/linpred.jl +++ b/src/linpred.jl @@ -255,24 +255,30 @@ invchol(x::SparsePredChol) = cholesky!(x) \ Matrix{Float64}(I, size(x.X, 2), siz working_residuals(x::LinPredModel) = x.rr.wrkresid working_weights(x::LinPredModel) = x.rr.wrkwt +## Rewrite vcov +struct IID end +struct NIID end + function vcov(x::LinPredModel) - d = dispersion(x, true) - u = working_residuals(x).*working_weights(x) - V = vcov(x.pp, u, d) - if x.pp.wts isa ProbabilityWeights - V*(nobs(x)/dof_residual(x)) + if weights(x) isa ProbabilityWeights + ## df with ProbabilityWeights n-1 + vcov(NIID(), x).*dof_residual(x)/(nobs(x)-1) else - V + vcov(IID(), x) end end -vcov(x::DensePredChol{T, C, P}, u::AbstractVector, d::Real) where {T,C,P} = rmul!(invchol(x), d) -vcov(x::SparsePredChol{T, C, M, P}, u::AbstractVector, d::Real) where {T,C,M,P} = rmul!(invchol(x), d) +vcov(::IID, x::LinPredModel) = rmul!(invchol(x.pp), dispersion(x, true)) + +function vcov(::NIID, x::LinPredModel) + s = nobs(x)/dof_residual(x) + mm = momentmatrix(x) + _, d = varstruct(x) + A = invchol(x.pp)./d + _vcov(x.pp, mm, A).*s +end -function vcov(pp::DensePredChol{T, C, <:ProbabilityWeights}, u::AbstractVector, d::Real) where {T, C} - #Z = #mul!(pp.scratchm1, Diagonal(u), pp.X) - Z = momentmatrix(pp) - A = invchol(pp) +function _vcov(pp::DensePredChol, Z::Matrix, A::Matrix) if pp.chol isa CholeskyPivoted && rank(pp.chol) != size(A, 1) nancols = [all(isnan, col) for col in eachcol(A)] nnancols = .!nancols @@ -280,26 +286,23 @@ function vcov(pp::DensePredChol{T, C, <:ProbabilityWeights}, u::AbstractVector, B = Zv'Zv Av = view(A, nnancols, nnancols) V = similar(pp.scratchm2) - V[nnancols, nnancols] = Av*B*Av + V[nnancols, nnancols] = Av * B * Av V[nancols, :] .= NaN V[:, nancols] .= NaN else B = mul!(pp.scratchm2, Z', Z) - V = A*B*A + V = A * B * A end return V end - -function vcov(pp::SparsePredChol{T, C, M, <:ProbabilityWeights}, u::AbstractVector, d::Real) where {T, C, M} - ## Note: SparsePredChol does not handle rankdeficient cases - Z = mul!(pp.scratchm1, Diagonal(u), pp.X) - A = invchol(pp) - B = Z'*Z - V = A*B*A - return V + +function _vcov(pp::SparsePredChol, Z::Matrix, A::Matrix) + ## SparsePredChol does not handle rankdeficient cases + B = Z'*Z + V = A*B*A + return V end - function cor(x::LinPredModel) Σ = vcov(x) invstd = inv.(sqrt.(diag(Σ))) @@ -316,7 +319,7 @@ modelframe(obj::LinPredModel) = obj.fr modelmatrix(obj::LinPredModel; weighted::Bool=isweighted(obj)) = modelmatrix(obj.pp; weighted=weighted) -function modelmatrix(pp::LinPred; weighted::Bool=isweighted(obj)) +function modelmatrix(pp::LinPred; weighted::Bool=isweighted(pp)) Z = if weighted mul!(pp.scratchm1, Diagonal(sqrt.(pp.wts)), pp.X) else @@ -366,3 +369,14 @@ coef(obj::LinPredModel) = coef(obj.pp) dof_residual(obj::LinPredModel) = nobs(obj) - dof(obj) + 1 hasintercept(m::LinPredModel) = any(i -> all(==(1), view(m.pp.X , :, i)), 1:size(m.pp.X, 2)) + +function varstruct(x::LinPredModel) + wrkwt = working_weights(x) + wrkres = working_residuals(x) + r = wrkwt .* wrkres + if x.rr.d isa Union{Gamma, Geometric, InverseGaussian} + r, sum(wrkwt)/sum(abs2, r) + else + r, 1.0 + end +end \ No newline at end of file diff --git a/src/lm.jl b/src/lm.jl index e7b080da..1b192bbe 100644 --- a/src/lm.jl +++ b/src/lm.jl @@ -335,6 +335,13 @@ function momentmatrix(m::LinearModel; weighted=isweighted(m)) end end +function varstruct(x::LinearModel) + wrkwt = working_weights(x) + wrkres = working_residuals(x) + r = wrkwt .* wrkres + return r, 1.0 +end + """ cooksdistance(obj::LinearModel) diff --git a/test/analytic_weights.jl b/test/analytic_weights.jl index 7aad8a35..c15c704e 100644 --- a/test/analytic_weights.jl +++ b/test/analytic_weights.jl @@ -28,7 +28,7 @@ dobson = DataFrame( model = glm(@formula(y ~ 1 + x1 + x2), df, Binomial(), LogitLink(), wts=aweights(df.w), atol=1e-08, rtol=1e-08) @test deviance(model) ≈ 39.58120350785813 rtol = 1e-06 - @test loglikelihood(model) ≈ -19.79060175392906 rtol = 1e-06 + @test loglikelihood(model) ≈ -19.79060175392906 rtol = 1e-06 @test coef(model) ≈ [0.6333582770515337, 1.8861277804531265, 18.61281712203539] rtol = 1e-06 @test stderror(model) ≈ [0.9021013750843575, 2.063002891039618, 2337.217357530545] rtol = 1e-07 @test aic(model) ≈ 45.58120350785812 rtol = 1e-07 diff --git a/test/probability_weights.jl b/test/probability_weights.jl new file mode 100644 index 00000000..2664203e --- /dev/null +++ b/test/probability_weights.jl @@ -0,0 +1,253 @@ +rng = StableRNG(123) +x1 = rand(rng, 50) +x2 = ifelse.(randn(rng, 50) .> 0, 1, 0) +y = ifelse.(0.004 .- 0.01 .* x1 .+ 1.5 .* x2 .+ randn(rng, 50) .> 0, 1, 0) +w = rand(rng, 50) * 6 +w = floor.(w) .+ 1 +df = DataFrame(y = y, x1 = x1, x2 = x2, w = w) +df.pweights = size(df, 1) .* (df.w ./ sum(df.w)) + +clotting = DataFrame( + u = log.([5, 10, 15, 20, 30, 40, 60, 80, 100]), + lot1 = [118, 58, 42, 35, 27, 25, 21, 19, 18], + w = [1.5, 2.0, 1.1, 4.5, 2.4, 3.5, 5.6, 5.4, 6.7], +) + +clotting.pweights = (clotting.w ./ sum(clotting.w)) + +quine = RDatasets.dataset("MASS", "quine") +quine.aweights = log.(3 .+ 3 .* quine.Days) +quine.pweights = size(quine, 1) .* (quine.aweights ./ sum(quine.aweights)) + +dobson = DataFrame( + Counts = [18.0, 17, 15, 20, 10, 20, 25, 13, 12], + Outcome = categorical(repeat(string.('A':'C'), outer = 3)), + Treatment = categorical(repeat(string.('a':'c'), inner = 3)), + w = [1, 2, 1, 2, 3, 4, 3, 2, 1], +) + +dobson.pweights = size(dobson, 1) .* (dobson.w ./ sum(dobson.w)) + +@testset "GLM: Binomial with LogitLink link - ProbabilityWeights" begin + model = glm( + @formula(y ~ 1 + x1 + x2), + df, + Binomial(), + LogitLink(), + wts = pweights(df.pweights), + rtol = 1e-07, + ) + @test deviance(model) ≈ 47.311214978934785 rtol = 1e-07 + @test nulldeviance(model) ≈ 60.82748267747685 rtol = 1e-07 + @test coef(model) ≈ [-0.5241460813701, 0.14468927249342, 2.487500063309] rtol = 1e-06 + ## Test broken because of https://github.com/JuliaStats/GLM.jl/issues/509 + @test_broken dof_residual(model) == 47.0 + @test stderror(model) ≈ [1.07077535201799, 1.4966446912323, 0.7679252464101] rtol = 1e-05 +end + +@testset "GLM: Binomial with ProbitLink link - ProbabilityWeights" begin + model = glm( + @formula(y ~ 1 + x1 + x2), + df, + Binomial(), + ProbitLink(), + wts = pweights(df.pweights), + rtol = 1e-09, + ) + @test deviance(model) ≈ 47.280413566179 rtol = 1e-07 + @test nulldeviance(model) ≈ 60.82748267747685 rtol = 1e-07 + @test coef(model) ≈ [-0.379823362118, 0.17460125170132, 1.4927538978259] rtol = 1e-07 + ## Test broken because of https://github.com/JuliaStats/GLM.jl/issues/509 + @test_broken dof_residual(model) == 47.0 + @test stderror(model) ≈ [0.6250657160317, 0.851366312489, 0.4423686640689] rtol = 1e-05 +end + +@testset "GLM: Binomial with CauchitLink link - ProbabilityWeights" begin + model = glm( + @formula(y ~ 1 + x1 + x2), + df, + Binomial(), + CauchitLink(), + wts = pweights(df.pweights), + rtol = 1e-07, + ) + @test deviance(model) ≈ 47.17915872474391 rtol = 1e-07 + @test nulldeviance(model) ≈ 60.82748267747685 rtol = 1e-07 + @test coef(model) ≈ [-0.007674579802284, -0.5378132620063, 2.994759904353] rtol = 1e-06 + ## Test broken because of https://github.com/JuliaStats/GLM.jl/issues/509 + @test_broken dof_residual(model) == 47.0 + @test stderror(model) ≈ [1.020489214335, 1.5748610330014, 1.5057621596148] rtol = 1e-03 +end + +@testset "GLM: Binomial with CloglogLink link - ProbabilityWeights" begin + model = glm( + @formula(y ~ 1 + x1 + x2), + df, + Binomial(), + CloglogLink(), + wts = pweights(df.pweights), + rtol = 1e-09, + ) + @test deviance(model) ≈ 47.063354817529856 rtol = 1e-07 + @test nulldeviance(model) ≈ 60.82748267747685 rtol = 1e-07 + @test coef(model) ≈ [-0.9897210433718, 0.449902058467, 1.5467108410611] rtol = 1e-07 + ## Test broken because of https://github.com/JuliaStats/GLM.jl/issues/509 + @test_broken dof_residual(model) == 47.0 + @test stderror(model) ≈ [0.647026270959, 0.74668663622095, 0.49056337945919] rtol = 1e-04 +end + +@testset "GLM: Gamma with LogLink link - ProbabilityWeights" begin + model = glm( + @formula(lot1 ~ 1 + u), + clotting, + Gamma(), + LogLink(), + wts = pweights(clotting.pweights), + rtol = 1e-12, + atol = 1e-9, + ) + @test deviance(model) ≈ 0.012601328117859285 rtol = 1e-07 + @test nulldeviance(model) ≈ 0.28335799805430917 rtol = 1e-07 + @test coef(model) ≈ [5.325098274654255, -0.5495659110653159] rtol = 1e-5 + ## Test broken because of https://github.com/JuliaStats/GLM.jl/issues/509 + @test dof_residual(model) == 7.0 + @test stderror(model) ≈ [0.2651749940925478, 0.06706321966020713] rtol = 1e-07 +end + +@testset "GLM: NegativeBinomial(2) with LogLink link - ProbabilityWeights" begin + model = glm( + @formula(Days ~ Eth + Sex + Age + Lrn), + quine, + NegativeBinomial(2), + LogLink(), + wts = pweights(quine.pweights), + atol = 1e-09, + ) + @test deviance(model) ≈ 178.46174895746665 rtol = 1e-07 + @test nulldeviance(model) ≈ 214.52243528092782 rtol = 1e-07 + @test coef(model) ≈ [ + 3.0241191551553044, + -0.46415766516885565, + 0.07185609429925505, + -0.47848540911607695, + 0.09677889908013788, + 0.3562972562034377, + 0.34801618219815034, + ] rtol = 1e-04 + ## Test broken because of https://github.com/JuliaStats/GLM.jl/issues/509 + @test dof_residual(model) == 139.0 + @test_broken stderror(model) ≈ [ + 0.20080246284436692, + 0.14068933863735536, + 0.1440710375321996, + 0.2533527583247213, + 0.2401168459633955, + 0.23210823521812646, + 0.19039099362430775, + ] rtol = 1e-05 + @test stderror(model) ≈ [ + 0.20080246284436692, + 0.14068933863735536, + 0.1440710375321996, + 0.2533527583247213, + 0.2401168459633955, + 0.23210823521812646, + 0.19039099362430775, + ] rtol = 1e-04 +end + +@testset "GLM: with LogLink link - ProbabilityWeights" begin + model = glm( + @formula(Days ~ Eth + Sex + Age + Lrn), + quine, + NegativeBinomial(2), + LogLink(), + wts = pweights(quine.pweights), + rtol = 1e-09, + ) + @test deviance(model) ≈ 178.46174895746665 rtol = 1e-07 + @test nulldeviance(model) ≈ 214.52243528092782 rtol = 1e-07 + @test coef(model) ≈ [ + 3.0241191551553044, + -0.46415766516885565, + 0.07185609429925505, + -0.47848540911607695, + 0.09677889908013788, + 0.3562972562034377, + 0.34801618219815034, + ] rtol = 1e-04 + ## Test broken because of https://github.com/JuliaStats/GLM.jl/issues/509 + ## here the test is actually right! + ## @test_broken dof_residual(model) == 139.0 + @test stderror(model) ≈ [ + 0.20080246284436692, + 0.14068933863735536, + 0.1440710375321996, + 0.2533527583247213, + 0.2401168459633955, + 0.23210823521812646, + 0.19039099362430775, + ] rtol = 1e-04 +end + +@testset "GLM: NegaiveBinomial(2) with SqrtLink link - ProbabilityWeights" begin + model = glm( + @formula(Days ~ Eth + Sex + Age + Lrn), + quine, + NegativeBinomial(2), + SqrtLink(), + wts = pweights(quine.pweights), + rtol = 1e-08, + ) + @test deviance(model) ≈ 178.99970038364276 rtol = 1e-07 + @test nulldeviance(model) ≈ 214.52243528092782 rtol = 1e-07 + @test coef(model) ≈ [ + 4.733877229152367, + -1.0079778954713488, + 0.025223928185488836, + -0.985974316804644, + 0.2132095063819702, + 0.7456070470961171, + 0.5840284357554048, + ] rtol = 1e-07 + ## Test broken because of https://github.com/JuliaStats/GLM.jl/issues/509 + ## This is right, by mistake! + ## @test_broken dof_residual(model) == 139.0 + @test stderror(model) ≈ [ + 0.4156607040373307, + 0.30174203746555045, + 0.30609799754882105, + 0.526030598769091, + 0.5384102946567921, + 0.5328456049279787, + 0.4065359817407846, + ] rtol = 1e-04 +end + +@testset "GLM: Poisson with LogLink link - ProbabilityWeights" begin + model = glm( + @formula(Counts ~ 1 + Outcome + Treatment), + dobson, + Poisson(), + LogLink(), + wts = pweights(dobson.pweights), + ) + @test deviance(model) ≈ 4.837327189925912 rtol = 1e-07 + @test nulldeviance(model) ≈ 12.722836814903907 rtol = 1e-07 + @test coef(model) ≈ [ + 3.1097109912423444, + -0.5376892683400354, + -0.19731134600684794, + -0.05011966661241072, + 0.010415729161988225, + ] rtol = 1e-07 + ## Test broken because of https://github.com/JuliaStats/GLM.jl/issues/509 + @test_broken dof_residual(model) == 4.0 + @test stderror(model) ≈ [ + 0.15474638805584298, + 0.13467582259453692, + 0.1482320418486368, + 0.17141304156534284, + 0.17488650070332398, + ] rtol = 1e-06 +end diff --git a/test/runtests.jl b/test/runtests.jl index dd59ead7..d9cfb5ab 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -127,9 +127,13 @@ end lm_model = lm(f, df, wts = pweights(df.weights)) glm_model = glm(f, df, Normal(), wts = pweights(df.weights)) - ## Standard errors from STATA - @test stderror(lm_model) ≈ [ 47.22671, .0517617] atol=1e-05 - @test stderror(glm_model) ≈ [ 47.22671, .0517617] atol=1e-05 + ## Standard errors from svyglm + ## Note: Stata and R use different degrees of freedom adjustments + ## 1. State uses dof_residual + ## 2. R uses n-1 + ## We follow R + @test stderror(lm_model) ≈ [47.1257, .0516509] atol=1e-05 + @test stderror(glm_model) ≈ [47.1257, .0516509] atol=1e-05 ## Test the non full rank case df.Income2 = df.Income*2 df.Income3 = df.Income*3 @@ -1637,4 +1641,5 @@ end end -include("analytic_weights.jl") \ No newline at end of file +include("analytic_weights.jl") +include("probability_weights.jl") \ No newline at end of file From 3338eabda601b933c1845cbc950ad6e4b9c12c5c Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Thu, 17 Nov 2022 09:58:39 +0100 Subject: [PATCH 071/106] Use leverage from StasAPI --- src/GLM.jl | 5 +++-- src/glmfit.jl | 3 ++- src/linpred.jl | 9 +++++---- src/lm.jl | 2 +- 4 files changed, 11 insertions(+), 8 deletions(-) diff --git a/src/GLM.jl b/src/GLM.jl index 188d251c..3afe8e05 100644 --- a/src/GLM.jl +++ b/src/GLM.jl @@ -12,7 +12,8 @@ module GLM import Statistics: cor import StatsBase: coef, coeftable, confint, deviance, nulldeviance, dof, dof_residual, loglikelihood, nullloglikelihood, nobs, stderror, vcov, residuals, predict, - fitted, fit, model_response, response, modelmatrix, r2, r², adjr2, adjr², PValue, weights + fitted, fit, model_response, response, modelmatrix, r2, r², adjr2, adjr², + PValue, weights, leverage import StatsFuns: xlogy import SpecialFunctions: erfc, erfcinv, digamma, trigamma import StatsModels: hasintercept @@ -20,7 +21,7 @@ module GLM loglikelihood, nullloglikelihood, nobs, stderror, vcov, residuals, predict, fitted, fit, fit!, model_response, response, modelmatrix, r2, r², adjr2, adjr², cooksdistance, hasintercept, dispersion, weights, AnalyticWeights, ProbabilityWeights, FrequencyWeights, - UnitWeights, uweights, fweights, pweights, aweights + UnitWeights, uweights, fweights, pweights, aweights, leverage export # types diff --git a/src/glmfit.jl b/src/glmfit.jl index a632cc72..f150990d 100644 --- a/src/glmfit.jl +++ b/src/glmfit.jl @@ -774,8 +774,9 @@ function residuals(r::GlmResp; weighted::Bool=false) return dres end -## To be removed once StasAPI PR# is merged +## momentmatrix(m::RegressionModel) = momentmatrix(m.model) +leverage(m::RegressionModel) = leverage(m.model) function momentmatrix(m::GeneralizedLinearModel; weighted::Bool = isweighted(m)) X = modelmatrix(m; weighted=false) diff --git a/src/linpred.jl b/src/linpred.jl index 2d3a30b1..2efc9226 100644 --- a/src/linpred.jl +++ b/src/linpred.jl @@ -328,9 +328,10 @@ function modelmatrix(pp::LinPred; weighted::Bool=isweighted(pp)) return Z end -hatvalues(x::LinPredModel) = hatvalues(x.pp) +leverage(x::LinPredModel) = leverage(x.pp) -function hatvalues(pp::DensePredChol{T, C, W}) where {T, C<:CholeskyPivoted, W} + +function leverage(pp::DensePredChol{T, C, W}) where {T, C<:CholeskyPivoted, W} X = modelmatrix(pp; weighted=isweighted(pp)) _, k = size(X) ch = pp.chol @@ -340,7 +341,7 @@ function hatvalues(pp::DensePredChol{T, C, W}) where {T, C<:CholeskyPivoted, W} sum(x -> x^2, view(X, :, 1:rnk)/ch.U[1:rnk, idx], dims=2) end -function hatvalues(pp::DensePredChol{T, C, W}) where {T, C<:Cholesky, W} +function leverage(pp::DensePredChol{T, C, W}) where {T, C<:Cholesky, W} X = modelmatrix(pp; weighted=isweighted(pp)) sum(x -> x^2, X/pp.chol.U, dims=2) end @@ -379,4 +380,4 @@ function varstruct(x::LinPredModel) else r, 1.0 end -end \ No newline at end of file +end diff --git a/src/lm.jl b/src/lm.jl index 1b192bbe..5dba4eea 100644 --- a/src/lm.jl +++ b/src/lm.jl @@ -359,7 +359,7 @@ function StatsBase.cooksdistance(obj::LinearModel) u = residuals(obj; weighted=isweighted(obj)) mse = GLM.dispersion(obj,true) k = dof(obj)-1 - hii = hatvalues(obj) + hii = leverage(obj) D = @. u^2 * (hii / (1 - hii)^2) / (k*mse) return D end From 970e26ef870f6249ae165c53b46d063e5242b6f2 Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Thu, 17 Nov 2022 22:50:32 +0100 Subject: [PATCH 072/106] Rebase against master --- src/glmfit.jl | 9 +++++++-- src/linpred.jl | 23 +++++------------------ src/lm.jl | 2 ++ 3 files changed, 14 insertions(+), 20 deletions(-) diff --git a/src/glmfit.jl b/src/glmfit.jl index c0bd30bb..abdfca7d 100644 --- a/src/glmfit.jl +++ b/src/glmfit.jl @@ -602,7 +602,7 @@ function fit(::Type{M}, throw(ArgumentError("`wts` should be an AbstractVector coercible to AbstractWeights")) end rr = GlmResp(y, d, l, offset, _wts) - res = M(rr, cholpred(X, false, _wts), false) + res = M(rr, cholpred(X, dropcollinear, _wts), false) return dofit ? fit!(res; fitargs...) : res end @@ -789,11 +789,16 @@ end ## momentmatrix(m::RegressionModel) = momentmatrix(m.model) +invloglikhessian(m::RegressionModel) = invloglikhessian(m.model) leverage(m::RegressionModel) = leverage(m.model) function momentmatrix(m::GeneralizedLinearModel; weighted::Bool = isweighted(m)) X = modelmatrix(m; weighted=false) - #r = m.rr.wrkwt .* m.rr.wrkresid r, d = varstruct(m) return mul!(m.pp.scratchm1, Diagonal(r.*d), X) end + +function invloglikhessian(m::GeneralizedLinearModel) + r, d = varstruct(m) + return invchol(m.pp)/d +end diff --git a/src/linpred.jl b/src/linpred.jl index c2a0cf24..b9312713 100644 --- a/src/linpred.jl +++ b/src/linpred.jl @@ -278,29 +278,18 @@ invchol(x::SparsePredChol) = cholesky!(x) \ Matrix{Float64}(I, size(x.X, 2), siz working_residuals(x::LinPredModel) = x.rr.wrkresid working_weights(x::LinPredModel) = x.rr.wrkwt -## Rewrite vcov -struct IID end -struct NIID end - function vcov(x::LinPredModel) if weights(x) isa ProbabilityWeights ## df with ProbabilityWeights n-1 - vcov(NIID(), x).*dof_residual(x)/(nobs(x)-1) + s = nobs(x)/dof_residual(x) + mm = momentmatrix(x) + A = invloglikhessian(x) + _vcov(x.pp, mm, A).*s else - vcov(IID(), x) + rmul!(invchol(x.pp), dispersion(x, true)) end end -vcov(::IID, x::LinPredModel) = rmul!(invchol(x.pp), dispersion(x, true)) - -function vcov(::NIID, x::LinPredModel) - s = nobs(x)/dof_residual(x) - mm = momentmatrix(x) - _, d = varstruct(x) - A = invchol(x.pp)./d - _vcov(x.pp, mm, A).*s -end - function _vcov(pp::DensePredChol, Z::Matrix, A::Matrix) if pp.chol isa CholeskyPivoted && rank(pp.chol) != size(A, 1) nancols = [all(isnan, col) for col in eachcol(A)] @@ -353,7 +342,6 @@ end leverage(x::LinPredModel) = leverage(x.pp) - function leverage(pp::DensePredChol{T, C, W}) where {T, C<:CholeskyPivoted, W} X = modelmatrix(pp; weighted=isweighted(pp)) _, k = size(X) @@ -369,7 +357,6 @@ function leverage(pp::DensePredChol{T, C, W}) where {T, C<:Cholesky, W} sum(x -> x^2, X/pp.chol.U, dims=2) end - response(obj::LinPredModel) = obj.rr.y fitted(m::LinPredModel) = m.rr.mu diff --git a/src/lm.jl b/src/lm.jl index d60c7837..42cce58b 100644 --- a/src/lm.jl +++ b/src/lm.jl @@ -333,6 +333,8 @@ function momentmatrix(m::LinearModel; weighted=isweighted(m)) end end +invloglikhessian(m::LinearModel) = invchol(m.pp) + function varstruct(x::LinearModel) wrkwt = working_weights(x) wrkres = working_residuals(x) From 8832e9daeb29ad94a0668c78f62a1d38b5db15ba Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Thu, 17 Nov 2022 23:49:08 +0100 Subject: [PATCH 073/106] Fix test --- src/linpred.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/linpred.jl b/src/linpred.jl index b9312713..dc4f0061 100644 --- a/src/linpred.jl +++ b/src/linpred.jl @@ -280,8 +280,8 @@ working_weights(x::LinPredModel) = x.rr.wrkwt function vcov(x::LinPredModel) if weights(x) isa ProbabilityWeights - ## df with ProbabilityWeights n-1 - s = nobs(x)/dof_residual(x) + ## df with ProbabilityWeights n-1 + s = nobs(x)/(nobs(x) - 1) mm = momentmatrix(x) A = invloglikhessian(x) _vcov(x.pp, mm, A).*s From 587c129c40b118585dd859936ba2d82aa55c037a Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Tue, 20 Dec 2022 18:09:12 +0100 Subject: [PATCH 074/106] Test on 1.6 --- .github/workflows/CI-stable.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/CI-stable.yml b/.github/workflows/CI-stable.yml index 89bb1683..50663d18 100644 --- a/.github/workflows/CI-stable.yml +++ b/.github/workflows/CI-stable.yml @@ -19,7 +19,7 @@ jobs: strategy: fail-fast: false matrix: - version: ['1.0', '1'] + version: ['1.6', '1'] os: ['ubuntu-latest', 'macos-latest', 'windows-latest'] arch: ['x64'] steps: From fa63a9aa27f6e0da7ca6b9beeb26fe2407633da0 Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Thu, 29 Dec 2022 23:20:00 +0100 Subject: [PATCH 075/106] Address reviwer comments --- src/glmfit.jl | 38 +++++++++++++++++++++---------------- src/linpred.jl | 27 ++++++++------------------ src/lm.jl | 19 ++++++++++--------- test/probability_weights.jl | 13 +++++++------ test/runtests.jl | 5 ++--- 5 files changed, 49 insertions(+), 53 deletions(-) diff --git a/src/glmfit.jl b/src/glmfit.jl index a226cb69..dd0661fa 100644 --- a/src/glmfit.jl +++ b/src/glmfit.jl @@ -106,7 +106,7 @@ function updateμ!(r::GlmResp{T,D,L,<:AbstractWeights}, linPr::T) where {T<:FPVe updateμ!(r) if isweighted(r) map!(*, r.devresid, r.devresid, r.wts) - map!(*, r.wrkwt, r.wrkwt, r.wts) + map!(*, r.wrkwt, r.wrkwt, r.wts) end return r end @@ -274,7 +274,7 @@ function nulldeviance(m::GeneralizedLinearModel) d = r.d offset = r.offset hasint = hasintercept(m) - dev = zero(eltype(y)) + dev = zero(eltype(y)) if isempty(offset) # Faster method if isweighted(m) mu = hasint ? @@ -306,7 +306,7 @@ loglikelihood(m::AbstractGLM) = loglikelihood(m.rr) function loglikelihood(r::GlmResp{T,D,L,<:AbstractWeights}) where {T,D,L} y = r.y mu = r.mu - wts = weights(r) + wts = weights(r) d = r.d ll = zero(eltype(mu)) n = nobs(r) @@ -325,7 +325,7 @@ function loglikelihood(r::GlmResp{T,D,L,<:AbstractWeights}) where {T,D,L} else @inbounds for i in eachindex(y, mu, wts) throw(ArgumentError("The `loglikelihood` for probability weighted models is not currently supported.")) - end + end end return ll end @@ -351,7 +351,7 @@ function nullloglikelihood(m::GeneralizedLinearModel) else @inbounds for i in eachindex(y, wts) ll += loglik_apweights_obs(d, y[i], mu, wts[i], δ, sumwt, N) - end + end end else X = fill(1.0, length(y), hasint ? 1 : 0) @@ -578,7 +578,7 @@ function fit(::Type{M}, wts::AbstractVector{<:Real} = uweights(length(y)), offset::AbstractVector{<:Real} = similar(y, 0), fitargs...) where {M<:AbstractGLM} - + if dofit === nothing dofit = true else @@ -647,7 +647,7 @@ function fit(::Type{M}, end off = offset === nothing ? similar(y, 0) : offset - + rr = GlmResp(y, d, l, off, _wts) res = M(rr, cholpred(X, dropcollinear, _wts), f, false) return dofit ? fit!(res; fitargs...) : res @@ -863,12 +863,12 @@ nobs(r::GlmResp{V,D,L,W}) where {V,D,L,W<:FrequencyWeights} = sum(r.wts) function residuals(r::GlmResp; weighted::Bool=false) y, η, μ = r.y, r.eta, r.mu - dres = similar(μ) + dres = similar(μ) @inbounds for i in eachindex(y, μ) - μi = μ[i] + μi = μ[i] yi = y[i] - dres[i] = sqrt(max(0, devresid(r.d, yi, μi)))*sign(yi-μi) + dres[i] = sqrt(max(0, devresid(r.d, yi, μi)))*sign(yi-μi) end if weighted @@ -878,17 +878,23 @@ function residuals(r::GlmResp; weighted::Bool=false) return dres end -## -# momentmatrix(m::LinPredModel) = momentmatrix(m.model) -# invloglikhessian(m::LinPredModel) = invloglikhessian(m.model) -# leverage(m::LinPredModel) = leverage(m.model) - -function momentmatrix(m::GeneralizedLinearModel; weighted::Bool = isweighted(m)) +function momentmatrix(m::GeneralizedLinearModel) X = modelmatrix(m; weighted=false) r, d = varstruct(m) return mul!(m.pp.scratchm1, Diagonal(r.*d), X) end +function varstruct(x::GeneralizedLinearModel) + wrkwt = working_weights(x) + wrkres = working_residuals(x) + r = wrkwt .* wrkres + if x.rr.d isa Union{Gamma, Geometric, InverseGaussian} + r, sum(wrkwt)/sum(abs2, r) + else + r, 1.0 + end +end + function invloglikhessian(m::GeneralizedLinearModel) r, d = varstruct(m) return invchol(m.pp)/d diff --git a/src/linpred.jl b/src/linpred.jl index 63a7950b..84fb9097 100644 --- a/src/linpred.jl +++ b/src/linpred.jl @@ -136,13 +136,13 @@ function cholesky(p::DensePredChol{T}) where T<:FP end cholesky!(p::DensePredQR{T}) where {T<:FP} = Cholesky{T,typeof(p.X)}(p.qr.R, 'U', 0) -function delbeta!(p::DensePredChol{T,<:Cholesky,<:AbstractWeights}, r::Vector{T}) where T<:BlasReal +function delbeta!(p::DensePredChol{T,<:Cholesky}, r::Vector{T}) where T<:BlasReal X = p.wts isa UnitWeights ? p.scratchm1 .= p.X : mul!(p.scratchm1, Diagonal(p.wts), p.X) ldiv!(p.chol, mul!(p.delbeta, transpose(X), r)) p end -function delbeta!(p::DensePredChol{T,<:CholeskyPivoted,<:AbstractWeights}, r::Vector{T}) where T<:BlasReal +function delbeta!(p::DensePredChol{T,<:CholeskyPivoted}, r::Vector{T}) where T<:BlasReal ch = p.chol X = p.wts isa UnitWeights ? p.scratchm1 .= p.X : mul!(p.scratchm1, Diagonal(p.wts), p.X) delbeta = mul!(p.delbeta, adjoint(X), r) @@ -280,7 +280,8 @@ working_weights(x::LinPredModel) = x.rr.wrkwt function vcov(x::LinPredModel) if weights(x) isa ProbabilityWeights - ## df with ProbabilityWeights n-1 + ## n-1 degrees of freedom - This is coherent with the `R` package `survey`, + ## `STATA` uses n-k s = nobs(x)/(nobs(x) - 1) mm = momentmatrix(x) A = invloglikhessian(x) @@ -309,10 +310,10 @@ function _vcov(pp::DensePredChol, Z::Matrix, A::Matrix) end function _vcov(pp::SparsePredChol, Z::Matrix, A::Matrix) - ## SparsePredChol does not handle rankdeficient cases - B = Z'*Z - V = A*B*A - return V + ## SparsePredChol does not handle rankdeficient cases + B = Z'*Z + V = A*B*A + return V end function cor(x::LinPredModel) @@ -402,18 +403,6 @@ dof_residual(obj::LinPredModel) = nobs(obj) - dof(obj) + 1 hasintercept(m::LinPredModel) = any(i -> all(==(1), view(m.pp.X , :, i)), 1:size(m.pp.X, 2)) - -function varstruct(x::LinPredModel) - wrkwt = working_weights(x) - wrkres = working_residuals(x) - r = wrkwt .* wrkres - if x.rr.d isa Union{Gamma, Geometric, InverseGaussian} - r, sum(wrkwt)/sum(abs2, r) - else - r, 1.0 - end -end - linpred_rank(x::LinPred) = length(x.beta0) linpred_rank(x::DensePredChol{<:Any, <:CholeskyPivoted}) = x.chol.rank diff --git a/src/lm.jl b/src/lm.jl index 070ec2fc..13b8e618 100644 --- a/src/lm.jl +++ b/src/lm.jl @@ -43,7 +43,7 @@ end updateμ!(r::LmResp{V}, linPr) where {V<:FPVector} = updateμ!(r, convert(V, vec(linPr))) -function deviance(r::LmResp{T,<:AbstractWeights}) where T +function deviance(r::LmResp) where T y = r.y mu = r.mu wts = r.wts @@ -72,10 +72,10 @@ function loglikelihood(r::LmResp{T,<:Union{UnitWeights, FrequencyWeights}}) wher -n/2 * (log(2π * deviance(r)/n) + 1) end -function loglikelihood(r::LmResp{T,<:AbstractWeights}) where T +function loglikelihood(r::LmResp{T,<:AnalyticWeights}) where T N = length(r.y) n = sum(log, weights(r)) - 0.5*(n - N * (log(2π * deviance(r)/N) + 1)) + (n - N * (log(2π * deviance(r)/N) + 1))/2 end function loglikelihood(r::LmResp{T,<:ProbabilityWeights}) where T @@ -369,13 +369,14 @@ function confint(obj::LinearModel; level::Real=0.95) quantile(TDist(dof_residual(obj)), (1. - level)/2.) * [1. -1.] end -function momentmatrix(m::LinearModel; weighted=isweighted(m)) +function momentmatrix(m::LinearModel) X = modelmatrix(m; weighted=false) r = residuals(m; weighted=false) - if weighted && isweighted(m) - return X .* r .* weights(m) + mm = X .* r + if isweighted(m) + mm .* weights(m) else - return X .* r + mm end end @@ -385,7 +386,7 @@ function varstruct(x::LinearModel) wrkwt = working_weights(x) wrkres = working_residuals(x) r = wrkwt .* wrkres - return r, 1.0 + return r, one(eltype(r)) end """ @@ -406,6 +407,6 @@ function StatsBase.cooksdistance(obj::LinearModel) mse = GLM.dispersion(obj,true) k = dof(obj)-1 hii = leverage(obj) - D = @. u^2 * (hii / (1 - hii)^2) / (k*mse) + D = @. u^2 * (hii / (1 - hii)^2) / (k*mse) return D end diff --git a/test/probability_weights.jl b/test/probability_weights.jl index 2664203e..19aaa333 100644 --- a/test/probability_weights.jl +++ b/test/probability_weights.jl @@ -176,9 +176,9 @@ end 0.3562972562034377, 0.34801618219815034, ] rtol = 1e-04 - ## Test broken because of https://github.com/JuliaStats/GLM.jl/issues/509 - ## here the test is actually right! - ## @test_broken dof_residual(model) == 139.0 + ## Test shouldbe broken because of https://github.com/JuliaStats/GLM.jl/issues/509 + ## but since negbinomial is correct, by mistake + @test dof_residual(model) == 139.0 @test stderror(model) ≈ [ 0.20080246284436692, 0.14068933863735536, @@ -210,9 +210,10 @@ end 0.7456070470961171, 0.5840284357554048, ] rtol = 1e-07 - ## Test broken because of https://github.com/JuliaStats/GLM.jl/issues/509 - ## This is right, by mistake! - ## @test_broken dof_residual(model) == 139.0 + + ## Test should be broken because of https://github.com/JuliaStats/GLM.jl/issues/509. + ## However, in the negative binomial case the test passes + @test dof_residual(model) == 139.0 @test stderror(model) ≈ [ 0.4156607040373307, 0.30174203746555045, diff --git a/test/runtests.jl b/test/runtests.jl index 12759eaa..97fa81b8 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -591,8 +591,7 @@ end # Logistic regression using aggregated data and weights admit_agr = DataFrame(count = [28., 97, 93, 55, 33, 54, 28, 12], admit = repeat([false, true], inner=[4]), - rank = categorical(repeat(1:4, outer=2)) - ) + rank = categorical(repeat(1:4, outer=2))) @testset "Aggregated Binomial LogitLink (FrequencyWeights)" begin for distr in (Binomial, Bernoulli) @@ -910,7 +909,7 @@ end @test isapprox(deviance(gmsparse), deviance(gmdense)) @test isapprox(coef(gmsparse), coef(gmdense)) @test isapprox(vcov(gmsparse), vcov(gmdense)) - @test isapprox(Matrix(modelmatrix(gmsparse; weighted=true)), modelmatrix(gmdense; weighted=true)) + @test isapprox(modelmatrix(gmsparse; weighted=true), modelmatrix(gmdense; weighted=true)) end end end From ba52ce9aced0e634773f43a70c0a9e7d97b21339 Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Tue, 19 Nov 2024 22:32:35 +0100 Subject: [PATCH 076/106] Merge from origin --- docs/src/api.md | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/docs/src/api.md b/docs/src/api.md index 6ea00953..99ff2a24 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -21,7 +21,7 @@ GLM.ModResp ## Constructors for models The most general approach to fitting a model is with the `fit` function, as in -```jldoctest +```jldoctest constructors julia> using RDatasets julia> df = RDatasets.dataset("mlmRev", "Oxboys"); @@ -39,19 +39,17 @@ x2 6.52102 0.816987 7.98 <1e-13 4.91136 8.13068 ``` This model can also be fit as -```jldoctest -julia> using GLM, StableRNGs - -julia> lm(hcat(ones(10), 1:10), randn(MersenneTwister(12321), 10)) +```jldoctest constructors +julia> lm(hcat(ones(nrow(df)), df.Age), df.Height) LinearModel Coefficients: -──────────────────────────────────────────────────────────────── - Coef. Std. Error t Pr(>|t|) Lower 95% Upper 95% -──────────────────────────────────────────────────────────────── -x1 0.361896 0.69896 0.52 0.6186 -1.24991 1.9737 -x2 -0.012125 0.112648 -0.11 0.9169 -0.271891 0.247641 -──────────────────────────────────────────────────────────────── +───────────────────────────────────────────────────────────────── + Coef. Std. Error t Pr(>|t|) Lower 95% Upper 95% +───────────────────────────────────────────────────────────────── +x1 149.372 0.528565 282.60 <1e-99 148.33 150.413 +x2 6.52102 0.816987 7.98 <1e-13 4.91136 8.13068 +───────────────────────────────────────────────────────────────── ``` ```@docs From 5e790dfc9477a55ea73df6a1d20fad493002d34d Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Tue, 19 Nov 2024 22:40:41 +0100 Subject: [PATCH 077/106] Fix broken test of dof_residual --- test/probability_weights.jl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/probability_weights.jl b/test/probability_weights.jl index 19aaa333..0e6e6750 100644 --- a/test/probability_weights.jl +++ b/test/probability_weights.jl @@ -41,7 +41,9 @@ dobson.pweights = size(dobson, 1) .* (dobson.w ./ sum(dobson.w)) @test nulldeviance(model) ≈ 60.82748267747685 rtol = 1e-07 @test coef(model) ≈ [-0.5241460813701, 0.14468927249342, 2.487500063309] rtol = 1e-06 ## Test broken because of https://github.com/JuliaStats/GLM.jl/issues/509 - @test_broken dof_residual(model) == 47.0 + ## @test_broken dof_residual(model) == 47.0 + ## It has now been fixed + @test dof_residual(model) == 47.0 @test stderror(model) ≈ [1.07077535201799, 1.4966446912323, 0.7679252464101] rtol = 1e-05 end From 50c1a969b49ed753af1f85107656bd9b6def5b0e Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Tue, 19 Nov 2024 23:00:43 +0100 Subject: [PATCH 078/106] Fix testing issues --- docs/src/index.md | 4 ++-- test/analytic_weights.jl | 26 +++++++++++++------------- test/probability_weights.jl | 10 +++++----- 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/docs/src/index.md b/docs/src/index.md index ed831fdb..8f2c6876 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -223,8 +223,8 @@ julia> loglikelihood(m_aweights) julia> loglikelihood(m_fweights) -25.51860961756451 -julia> loglikelihood(m_pweights) --16.296307561384253 +#julia> loglikelihood(m_pweights) +#-16.296307561384253 ``` ## Comparing models with F-test diff --git a/test/analytic_weights.jl b/test/analytic_weights.jl index c15c704e..d5cd379e 100644 --- a/test/analytic_weights.jl +++ b/test/analytic_weights.jl @@ -186,13 +186,13 @@ end @testset "GLM: Gamma with IdentityLink link - AnalyticWeights" begin model = glm(@formula(lot1 ~ 1 + u), clotting, Gamma(), IdentityLink(), - wts=aweights(clotting.w), rtol=1e-16, atol=1e-16, minstepfac=0.00001) + wts=aweights(clotting.w), rtol=1e-10, atol=1e-10, minstepfac=0.00001) @test deviance(model) ≈ 1.3435348802929383 rtol = 1e-07 @test loglikelihood(model) ≈ -101.19916126647321 rtol = 1e-07 @test coef(model) ≈ [86.45700434128152, -15.320695650698417] rtol = 1e-05 @test stderror(model) ≈ [16.07962739541372, 3.766841480457265] rtol = 1e-05 - @test aic(model) ≈ 208.39832253294642 rtol = 1e-07 - @test bic(model) ≈ 208.9899962649551 rtol = 1e-07 + @test GLM.aic(model) ≈ 208.39832253294642 rtol = 1e-07 + @test GLM.bic(model) ≈ 208.9899962649551 rtol = 1e-07 @test GLM.momentmatrix(model) ≈ [ 0.26061914480947884 0.4194503323625281; 0.06148544891860896 0.14157547811603585; -0.019061929106842457 -0.051620660951180786; @@ -211,8 +211,8 @@ end @test loglikelihood(model) ≈ -81.79777246247532 rtol = 1e-07 @test coef(model) ≈ [5.325107090308856, -0.5495682740033511] rtol = 1e-07 @test stderror(model) ≈ [0.20287310816341905, 0.053062600599660774] rtol = 1e-07 - @test aic(model) ≈ 169.59554492495064 rtol = 1e-07 - @test bic(model) ≈ 170.18721865695932 rtol = 1e-07 + @test GLM.aic(model) ≈ 169.59554492495064 rtol = 1e-07 + @test GLM.bic(model) ≈ 170.18721865695932 rtol = 1e-07 @test GLM.momentmatrix(model) ≈ [ 14.39716447431257 23.171342336508012; 0.0374983950207553 0.0863432453859933; -2.5490869750808054 -6.903055495494598; @@ -231,8 +231,8 @@ end @test loglikelihood(model) ≈ -43.359078787690514 rtol = 1e-07 @test coef(model) ≈ [-0.017217012596343607, 0.015649040406186487] rtol = 1e-07 @test stderror(model) ≈ [0.0009144223353860925, 0.0003450913537314497] rtol = 1e-07 - @test aic(model) ≈ 92.71815757538103 rtol = 1e-07 - @test bic(model) ≈ 93.30983130738969 rtol = 1e-07 + @test GLM.aic(model) ≈ 92.71815757538103 rtol = 1e-07 + @test GLM.bic(model) ≈ 93.30983130738969 rtol = 1e-07 @test GLM.momentmatrix(model) ≈ [ 1900.1063511093867 3058.103199132267; -1643.317155973023 -3783.877586404854; -420.13783432322964 -1137.7543467296691; @@ -251,8 +251,8 @@ end @test loglikelihood(model) ≈ -86.82546665077861 rtol = 1e-07 @test coef(model) ≈ [-0.0012633718975150973, 0.0008126490405747128] rtol = 1e-07 @test stderror(model) ≈ [0.00016779409928094252, 9.025235597677238e-5] rtol = 1e-07 - @test aic(model) ≈ 179.65093330155722 rtol = 1e-07 - @test bic(model) ≈ 180.2426070335659 rtol = 1e-07 + @test GLM.aic(model) ≈ 179.65093330155722 rtol = 1e-07 + @test GLM.bic(model) ≈ 180.2426070335659 rtol = 1e-07 @test GLM.momentmatrix(model) ≈ [ 28815.030725087538 46376.00289690935; -21039.070620903 -48444.250382140235; -6195.618377983015 -16778.045594449453; @@ -440,8 +440,8 @@ end @test stderror(model) ≈ [0.1950707397084349, 0.13200639191036218, 0.1373161597645507, 0.2088476016141468, 0.20252412726336674, 0.21060778935484836, 0.16126722793064027] rtol = 1e-07 - @test_broken aic(model) ≈ 4023.1878928645556 rtol = 1e-07 - @test_broken bic(model) ≈ 4044.073139216514 rtol = 1e-07 + @test_broken GLM.aic(model) ≈ 4023.1878928645556 rtol = 1e-07 + @test_broken GLM.bic(model) ≈ 4044.073139216514 rtol = 1e-07 @test GLM.momentmatrix(model) ≈ [ -3.866780529709063 -0.0 -3.866780529709063 -0.0 -0.0 -0.0 -3.866780529709063 -4.370085797122667 -0.0 -4.370085797122667 -0.0 -0.0 -0.0 -4.370085797122667 @@ -603,8 +603,8 @@ end @test stderror(model) ≈ [0.42307979153860564, 0.286636744566765, 0.29612422536777805, 0.42042723748229144, 0.45565954626859695, 0.4766324296069839, 0.3235019638755972] rtol = 1e-06 - @test_broken aic(model) ≈ 4025.0711662068925 rtol = 1e-07 - @test_broken bic(model) ≈ 4045.956412558851 rtol = 1e-07 + @test_broken GLM.aic(model) ≈ 4025.0711662068925 rtol = 1e-07 + @test_broken GLM.bic(model) ≈ 4045.956412558851 rtol = 1e-07 @test GLM.momentmatrix(model) ≈ [ -1.4294351675636041 -0.0 -1.4294351675636041 -0.0 -0.0 -0.0 -1.4294351675636041 -1.5410055711037194 -0.0 -1.5410055711037194 -0.0 -0.0 -0.0 -1.5410055711037194 diff --git a/test/probability_weights.jl b/test/probability_weights.jl index 0e6e6750..3c1b2ab7 100644 --- a/test/probability_weights.jl +++ b/test/probability_weights.jl @@ -60,7 +60,7 @@ end @test nulldeviance(model) ≈ 60.82748267747685 rtol = 1e-07 @test coef(model) ≈ [-0.379823362118, 0.17460125170132, 1.4927538978259] rtol = 1e-07 ## Test broken because of https://github.com/JuliaStats/GLM.jl/issues/509 - @test_broken dof_residual(model) == 47.0 + @test dof_residual(model) == 47.0 @test stderror(model) ≈ [0.6250657160317, 0.851366312489, 0.4423686640689] rtol = 1e-05 end @@ -77,7 +77,7 @@ end @test nulldeviance(model) ≈ 60.82748267747685 rtol = 1e-07 @test coef(model) ≈ [-0.007674579802284, -0.5378132620063, 2.994759904353] rtol = 1e-06 ## Test broken because of https://github.com/JuliaStats/GLM.jl/issues/509 - @test_broken dof_residual(model) == 47.0 + @test dof_residual(model) == 47.0 @test stderror(model) ≈ [1.020489214335, 1.5748610330014, 1.5057621596148] rtol = 1e-03 end @@ -94,7 +94,7 @@ end @test nulldeviance(model) ≈ 60.82748267747685 rtol = 1e-07 @test coef(model) ≈ [-0.9897210433718, 0.449902058467, 1.5467108410611] rtol = 1e-07 ## Test broken because of https://github.com/JuliaStats/GLM.jl/issues/509 - @test_broken dof_residual(model) == 47.0 + @test dof_residual(model) == 47.0 @test stderror(model) ≈ [0.647026270959, 0.74668663622095, 0.49056337945919] rtol = 1e-04 end @@ -180,7 +180,7 @@ end ] rtol = 1e-04 ## Test shouldbe broken because of https://github.com/JuliaStats/GLM.jl/issues/509 ## but since negbinomial is correct, by mistake - @test dof_residual(model) == 139.0 + @test dof_residual(model) == 139.0 @test stderror(model) ≈ [ 0.20080246284436692, 0.14068933863735536, @@ -245,7 +245,7 @@ end 0.010415729161988225, ] rtol = 1e-07 ## Test broken because of https://github.com/JuliaStats/GLM.jl/issues/509 - @test_broken dof_residual(model) == 4.0 + @test dof_residual(model) == 4.0 @test stderror(model) ≈ [ 0.15474638805584298, 0.13467582259453692, From c4f79590379da0dd771382131eecef32bf3c4a1d Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Tue, 19 Nov 2024 23:19:18 +0100 Subject: [PATCH 079/106] Fix docs --- docs/src/examples.md | 4 ++-- docs/src/index.md | 10 ++++------ 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/docs/src/examples.md b/docs/src/examples.md index 7a7e3319..73061e3a 100644 --- a/docs/src/examples.md +++ b/docs/src/examples.md @@ -533,9 +533,9 @@ Coefficients: ──────────────────────────────────────────────────────────────────────────── (Intercept) -1.07586 0.352543 -3.05 0.0023 -1.76684 -0.384892 Height 0.0232172 0.00523331 4.44 <1e-05 0.0129601 0.0334743 -Girth 0.242837 0.00922555 26.32 <1e-99 0.224756 0.260919 +Girth 0.242837 0.00922556 26.32 <1e-99 0.224756 0.260919 ──────────────────────────────────────────────────────────────────────────── julia> round(optimal_bic.minimum, digits=5) 156.37638 -``` \ No newline at end of file +``` diff --git a/docs/src/index.md b/docs/src/index.md index 8f2c6876..b5ca3cfa 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -199,9 +199,10 @@ Coefficients: ─────────────────────────────────────────────────────────────────────────── Coef. Std. Error t Pr(>|t|) Lower 95% Upper 95% ─────────────────────────────────────────────────────────────────────────── -(Intercept) 0.51673 0.0288654 17.90 <1e-31 0.459447 0.574012 -x -0.0478667 0.0266884 -1.79 0.0760 -0.100829 0.00509556 +(Intercept) 0.51673 0.0287193 17.99 <1e-32 0.459737 0.573722 +x -0.0478667 0.0265532 -1.80 0.0745 -0.100561 0.00482739 ─────────────────────────────────────────────────────────────────────────── + ``` !!! warning @@ -222,9 +223,6 @@ julia> loglikelihood(m_aweights) julia> loglikelihood(m_fweights) -25.51860961756451 - -#julia> loglikelihood(m_pweights) -#-16.296307561384253 ``` ## Comparing models with F-test @@ -320,7 +318,7 @@ Note that it's currently only implemented for linear models without weights. ```jldoctest methods julia> round.(cooksdistance(mdl); digits=8) -3-element Vector{Float64}: +3×1 Matrix{Float64}: 2.5 0.25 2.5 From d2b5cb06647ad8310f39fc30ec05a8422a1b9a1b Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Mon, 25 Nov 2024 17:29:45 +0100 Subject: [PATCH 080/106] Added tests for ftest. They throw for pweights --- test/analytic_weights.jl | 12 ++++++++++++ test/probability_weights.jl | 34 +++++++++++++++++++++------------- 2 files changed, 33 insertions(+), 13 deletions(-) diff --git a/test/analytic_weights.jl b/test/analytic_weights.jl index d5cd379e..8e27706e 100644 --- a/test/analytic_weights.jl +++ b/test/analytic_weights.jl @@ -7,6 +7,7 @@ w = rand(rng, 25) * 6 w = floor.(w) .+ 1 df = DataFrame(y=y, x1=x1, x2=x2, w=w) + clotting = DataFrame( u=log.([5, 10, 15, 20, 30, 40, 60, 80, 100]), lot1=[118, 58, 42, 35, 27, 25, 21, 19, 18], @@ -24,6 +25,17 @@ dobson = DataFrame( w=[1, 2, 1, 2, 3, 4, 3, 2, 1] ) +@testset "Linear model ftest" begin + model_0 = lm(@formula(y ~ x1), df; wts=aweights(df.w)) + model_1 = lm(@formula(y ~ x1 + x2), df; wts=aweights(df.w)) + X = hcat(ones(length(df.y)), df.x1, df.x2) + model_2 = lm(X, y; wts=aweights(df.w)) + @test ftest(model_1).fstat ≈ 1.551275 rtol = 1e-05 + @test ftest(model_2) === ftest(model_1) + @test ftest(model_0, model_1).fstat[2] ≈ 1.7860438 rtol = 1e-05 + @test ftest(model_0, model_2).fstat[2] ≈ 1.7860438 rtol = 1e-05 +end + @testset "GLM: Binomial with LogitLink link - AnalyticWeights" begin model = glm(@formula(y ~ 1 + x1 + x2), df, Binomial(), LogitLink(), wts=aweights(df.w), atol=1e-08, rtol=1e-08) diff --git a/test/probability_weights.jl b/test/probability_weights.jl index 3c1b2ab7..5a81bb8d 100644 --- a/test/probability_weights.jl +++ b/test/probability_weights.jl @@ -28,6 +28,16 @@ dobson = DataFrame( dobson.pweights = size(dobson, 1) .* (dobson.w ./ sum(dobson.w)) +@testset "Linear Model ftest/loglikelihod" begin + model_1 = lm(@formula(y ~ x1 + x2), df; wts=pweights(df.pweights)) + X = hcat(ones(length(df.y)), df.x1, df.x2) + model_2 = lm(X, y; wts=pweights(df.pweights)) + @test_throws ArgumentError ftest(model_1) + @test_throws ArgumentError ftest(model_2) + @test_throws ArgumentError loglikelihood(model_1) + @test_throws ArgumentError loglikelihood(model_2) +end + @testset "GLM: Binomial with LogitLink link - ProbabilityWeights" begin model = glm( @formula(y ~ 1 + x1 + x2), @@ -37,12 +47,10 @@ dobson.pweights = size(dobson, 1) .* (dobson.w ./ sum(dobson.w)) wts = pweights(df.pweights), rtol = 1e-07, ) + @test_throws ArgumentError loglikelihood(model) @test deviance(model) ≈ 47.311214978934785 rtol = 1e-07 @test nulldeviance(model) ≈ 60.82748267747685 rtol = 1e-07 @test coef(model) ≈ [-0.5241460813701, 0.14468927249342, 2.487500063309] rtol = 1e-06 - ## Test broken because of https://github.com/JuliaStats/GLM.jl/issues/509 - ## @test_broken dof_residual(model) == 47.0 - ## It has now been fixed @test dof_residual(model) == 47.0 @test stderror(model) ≈ [1.07077535201799, 1.4966446912323, 0.7679252464101] rtol = 1e-05 end @@ -56,10 +64,10 @@ end wts = pweights(df.pweights), rtol = 1e-09, ) + @test_throws ArgumentError loglikelihood(model) @test deviance(model) ≈ 47.280413566179 rtol = 1e-07 @test nulldeviance(model) ≈ 60.82748267747685 rtol = 1e-07 @test coef(model) ≈ [-0.379823362118, 0.17460125170132, 1.4927538978259] rtol = 1e-07 - ## Test broken because of https://github.com/JuliaStats/GLM.jl/issues/509 @test dof_residual(model) == 47.0 @test stderror(model) ≈ [0.6250657160317, 0.851366312489, 0.4423686640689] rtol = 1e-05 end @@ -73,10 +81,10 @@ end wts = pweights(df.pweights), rtol = 1e-07, ) + @test_throws ArgumentError loglikelihood(model) @test deviance(model) ≈ 47.17915872474391 rtol = 1e-07 @test nulldeviance(model) ≈ 60.82748267747685 rtol = 1e-07 @test coef(model) ≈ [-0.007674579802284, -0.5378132620063, 2.994759904353] rtol = 1e-06 - ## Test broken because of https://github.com/JuliaStats/GLM.jl/issues/509 @test dof_residual(model) == 47.0 @test stderror(model) ≈ [1.020489214335, 1.5748610330014, 1.5057621596148] rtol = 1e-03 end @@ -90,6 +98,7 @@ end wts = pweights(df.pweights), rtol = 1e-09, ) + @test_throws ArgumentError loglikelihood(model) @test deviance(model) ≈ 47.063354817529856 rtol = 1e-07 @test nulldeviance(model) ≈ 60.82748267747685 rtol = 1e-07 @test coef(model) ≈ [-0.9897210433718, 0.449902058467, 1.5467108410611] rtol = 1e-07 @@ -108,6 +117,7 @@ end rtol = 1e-12, atol = 1e-9, ) + @test_throws ArgumentError loglikelihood(model) @test deviance(model) ≈ 0.012601328117859285 rtol = 1e-07 @test nulldeviance(model) ≈ 0.28335799805430917 rtol = 1e-07 @test coef(model) ≈ [5.325098274654255, -0.5495659110653159] rtol = 1e-5 @@ -125,6 +135,7 @@ end wts = pweights(quine.pweights), atol = 1e-09, ) + @test_throws ArgumentError loglikelihood(model) @test deviance(model) ≈ 178.46174895746665 rtol = 1e-07 @test nulldeviance(model) ≈ 214.52243528092782 rtol = 1e-07 @test coef(model) ≈ [ @@ -136,7 +147,6 @@ end 0.3562972562034377, 0.34801618219815034, ] rtol = 1e-04 - ## Test broken because of https://github.com/JuliaStats/GLM.jl/issues/509 @test dof_residual(model) == 139.0 @test_broken stderror(model) ≈ [ 0.20080246284436692, @@ -167,6 +177,7 @@ end wts = pweights(quine.pweights), rtol = 1e-09, ) + @test_throws ArgumentError loglikelihood(model) @test deviance(model) ≈ 178.46174895746665 rtol = 1e-07 @test nulldeviance(model) ≈ 214.52243528092782 rtol = 1e-07 @test coef(model) ≈ [ @@ -178,8 +189,6 @@ end 0.3562972562034377, 0.34801618219815034, ] rtol = 1e-04 - ## Test shouldbe broken because of https://github.com/JuliaStats/GLM.jl/issues/509 - ## but since negbinomial is correct, by mistake @test dof_residual(model) == 139.0 @test stderror(model) ≈ [ 0.20080246284436692, @@ -201,6 +210,7 @@ end wts = pweights(quine.pweights), rtol = 1e-08, ) + @test_throws ArgumentError loglikelihood(model) @test deviance(model) ≈ 178.99970038364276 rtol = 1e-07 @test nulldeviance(model) ≈ 214.52243528092782 rtol = 1e-07 @test coef(model) ≈ [ @@ -213,9 +223,7 @@ end 0.5840284357554048, ] rtol = 1e-07 - ## Test should be broken because of https://github.com/JuliaStats/GLM.jl/issues/509. - ## However, in the negative binomial case the test passes - @test dof_residual(model) == 139.0 + @test dof_residual(model) == 139.0 @test stderror(model) ≈ [ 0.4156607040373307, 0.30174203746555045, @@ -235,6 +243,7 @@ end LogLink(), wts = pweights(dobson.pweights), ) + @test_throws ArgumentError loglikelihood(model) @test deviance(model) ≈ 4.837327189925912 rtol = 1e-07 @test nulldeviance(model) ≈ 12.722836814903907 rtol = 1e-07 @test coef(model) ≈ [ @@ -243,8 +252,7 @@ end -0.19731134600684794, -0.05011966661241072, 0.010415729161988225, - ] rtol = 1e-07 - ## Test broken because of https://github.com/JuliaStats/GLM.jl/issues/509 + ] rtol = 1e-07 @test dof_residual(model) == 4.0 @test stderror(model) ≈ [ 0.15474638805584298, From cd165d70c4bdc18b80b85c8f927752406e6ac312 Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Mon, 25 Nov 2024 17:30:14 +0100 Subject: [PATCH 081/106] Make ftest throw if a model weighted by pweights is passed --- src/ftest.jl | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/ftest.jl b/src/ftest.jl index b29f6644..9043cd53 100644 --- a/src/ftest.jl +++ b/src/ftest.jl @@ -57,7 +57,10 @@ F-statistic: 241.62 on 12 observations and 1 degrees of freedom, p-value: <1e-07 """ function ftest(mod::LinearModel) hasintercept(mod) || throw(ArgumentError("ftest only works for models with an intercept")) - + wts = weights(mod) + if wts isa ProbabilityWeights + throw(ArgumentError("`ftest` for probability weighted models is not currently supported.")) + end rss = deviance(mod) tss = nulldeviance(mod) @@ -228,3 +231,7 @@ function show(io::IO, ftr::FTestResult{N}) where N end print(io, '─'^totwidth) end + +function ftest(r::LinearModel{T,<:ProbabilityWeights}) where {T} + throw(ArgumentError("`ftest` for probability weighted models is not currently supported.")) +end From 606a41957ef5ac1f9b85059b8d68afc10b36b28f Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Mon, 25 Nov 2024 17:30:48 +0100 Subject: [PATCH 082/106] Fix how loglikelihood throws for pweights weighted models --- Project.toml | 2 ++ docs/Project.toml | 3 ++- src/glmfit.jl | 4 ++-- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/Project.toml b/Project.toml index 02d75dd4..9d7c27cf 100644 --- a/Project.toml +++ b/Project.toml @@ -14,6 +14,7 @@ StatsAPI = "82ae8749-77ed-4fe6-ae5f-f523153014b0" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c" StatsModels = "3eaba693-59b7-5ba5-a881-562e759f1c8d" +StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd" Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" [compat] @@ -29,6 +30,7 @@ StatsAPI = "1.4" StatsBase = "0.33.5, 0.34" StatsFuns = "0.6, 0.7, 0.8, 0.9, 1.0" StatsModels = "0.7.3" +StatsPlots = "0.15.7" Tables = "1" julia = "1.6" diff --git a/docs/Project.toml b/docs/Project.toml index a6e35558..60fd6748 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -3,6 +3,7 @@ CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597" DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" +GLM = "38e38edf-8417-5370-95a0-9cbb8c7f171a" Optim = "429524aa-4258-5aef-a3af-852621145aeb" RDatasets = "ce6b1742-4840-55fa-b093-852dadbb1d8b" StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3" @@ -12,4 +13,4 @@ StatsModels = "3eaba693-59b7-5ba5-a881-562e759f1c8d" [compat] DataFrames = "1" Documenter = "1" -Optim = "1.6.2" \ No newline at end of file +Optim = "1.6.2" diff --git a/src/glmfit.jl b/src/glmfit.jl index a43ee079..68a504e2 100644 --- a/src/glmfit.jl +++ b/src/glmfit.jl @@ -324,9 +324,9 @@ function loglikelihood(r::GlmResp{T,D,L,<:AbstractWeights}) where {T,D,L} ll += loglik_apweights_obs(d, y[i], mu[i], wts[i], δ, wts.sum, N) end else - @inbounds for i in eachindex(y, mu, wts) + #@inbounds for i in eachindex(y, mu, wts) throw(ArgumentError("The `loglikelihood` for probability weighted models is not currently supported.")) - end + #end end return ll end From 5d948de64e5bd6126726967bdb0072165224a3af Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Tue, 26 Nov 2024 00:13:48 +0100 Subject: [PATCH 083/106] Remove StatsPlots dependence. --- Project.toml | 2 -- 1 file changed, 2 deletions(-) diff --git a/Project.toml b/Project.toml index 7ce047fe..ddd8ca51 100644 --- a/Project.toml +++ b/Project.toml @@ -14,7 +14,6 @@ StatsAPI = "82ae8749-77ed-4fe6-ae5f-f523153014b0" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c" StatsModels = "3eaba693-59b7-5ba5-a881-562e759f1c8d" -StatsPlots = "f3b207a7-027a-5e70-b257-86293d7955fd" Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" [compat] @@ -30,7 +29,6 @@ StatsAPI = "1.4" StatsBase = "0.33.5, 0.34" StatsFuns = "0.6, 0.7, 0.8, 0.9, 1.0" StatsModels = "0.7.3" -StatsPlots = "0.15.7" Tables = "1" julia = "1.6" From 4fb18df9fb62962efaecb85ae6edd7f15c705ca8 Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Tue, 26 Nov 2024 00:14:20 +0100 Subject: [PATCH 084/106] Fix weighting with :qr method. --- src/glmfit.jl | 7 ++-- src/linpred.jl | 63 +++++++++++++++++------------------ test/analytic_weights.jl | 66 +++++++++++++++++++++---------------- test/probability_weights.jl | 31 ++++++++++------- test/runtests.jl | 54 +++++++++++++++--------------- 5 files changed, 121 insertions(+), 100 deletions(-) diff --git a/src/glmfit.jl b/src/glmfit.jl index 80220482..8399394b 100644 --- a/src/glmfit.jl +++ b/src/glmfit.jl @@ -901,7 +901,7 @@ end function momentmatrix(m::GeneralizedLinearModel) X = modelmatrix(m; weighted=false) r, d = varstruct(m) - return mul!(m.pp.scratchm1, Diagonal(r.*d), X) + return Diagonal(r.*d)*X end function varstruct(x::GeneralizedLinearModel) @@ -917,5 +917,8 @@ end function invloglikhessian(m::GeneralizedLinearModel) r, d = varstruct(m) - return invchol(m.pp)/d + return invfact(m.pp)/d end + +invfact(f::DensePredChol) = invchol(f) +invfact(f::DensePredQR) = invqr(f) diff --git a/src/linpred.jl b/src/linpred.jl index 3df8c2de..9a978626 100644 --- a/src/linpred.jl +++ b/src/linpred.jl @@ -41,12 +41,16 @@ mutable struct DensePredQR{T<:BlasReal, Q<:Union{QRCompactWY, QRPivoted}, W<:Abs wts::W scratchm1::Matrix{T} - function DensePredQR(X::AbstractMatrix, pivot::Bool, wts::W) where W<:Union{AbstractWeights, AbstractVector} + function DensePredQR(X::AbstractMatrix, pivot::Bool, wts::W) where W<:Union{AbstractWeights} n, p = size(X) T = typeof(float(zero(eltype(X)))) Q = pivot ? QRPivoted : QRCompactWY fX = float(X) - cfX = fX === X ? copy(fX) : fX + if wts isa UnitWeights + cfX = fX === X ? copy(fX) : fX + else + cfX = Diagonal(sqrt.(wts))*fX + end F = pivot ? pivoted_qr!(cfX) : qr!(cfX) new{T,Q,W}(Matrix{T}(X), zeros(T, p), @@ -67,19 +71,18 @@ Evaluate and return `p.delbeta` the increment to the coefficient vector from res function delbeta! end function delbeta!(p::DensePredQR{T, <:QRCompactWY}, r::Vector{T}) where T<:BlasReal - rnk = rank(p.qr.R) - rnk == length(p.delbeta) || throw(RankDeficientException(rnk)) - p.delbeta = p.qr\r - mul!(p.scratchm1, Diagonal(ones(size(r))), p.X) + r̃ = p.wts isa UnitWeights ? r : (wtsqrt = sqrt.(p.wts); wtsqrt .*= r; wtsqrt) + #rnk = rank(p.qr.R) + #rnk == length(p.delbeta) || throw(RankDeficientException(rnk)) + p.delbeta = p.qr\r̃ + #mul!(p.scratchm1, Diagonal(ones(size(r))), p.X) return p end -function delbeta!(p::DensePredQR{T, <:QRCompactWY}, r::Vector{T}, wt::AbstractVector{T}) where T<:BlasReal - rnk = rank(p.qr.R) - rnk == length(p.delbeta) || throw(RankDeficientException(rnk)) +function delbeta!(p::DensePredQR{T, <:QRCompactWY}, r::Vector{T}, wt::AbstractVector) where T<:BlasReal X = p.X - W = Diagonal(wt) - sqrtW = Diagonal(sqrt.(wt)) + wtsqrt = sqrt.(wt) + sqrtW = Diagonal(wtsqrt) mul!(p.scratchm1, sqrtW, X) ỹ = (wtsqrt .*= r) # to reuse wtsqrt's memory p.qr = qr!(p.scratchm1) @@ -88,16 +91,15 @@ function delbeta!(p::DensePredQR{T, <:QRCompactWY}, r::Vector{T}, wt::AbstractVe end function delbeta!(p::DensePredQR{T,<:QRPivoted}, r::Vector{T}) where T<:BlasReal + r̃ = p.wts isa UnitWeights ? r : (wtsqrt = sqrt.(p.wts); wtsqrt .*= r; wtsqrt) rnk = rank(p.qr.R) if rnk == length(p.delbeta) - p.delbeta = p.qr \ r + p.delbeta = p.qr \ r̃ else - #R = @view p.qr.R[:, 1:rnk] R = UpperTriangular(view(parent(p.qr.R), 1:rnk, 1:rnk)) - Q = @view p.qr.Q[:, 1:size(R, 1)] piv = p.qr.p fill!(p.delbeta, 0) - p.delbeta[1:rnk] = R \ Q'r + p.delbeta[1:rnk] = R \ view(p.qr.Q'*r̃, 1:rnk) invpermute!(p.delbeta, piv) end return p @@ -105,7 +107,6 @@ end function delbeta!(p::DensePredQR{T,<:QRPivoted}, r::Vector{T}, wt::AbstractVector{T}) where T<:BlasReal X = p.X - W = Diagonal(wt) wtsqrt = sqrt.(wt) sqrtW = Diagonal(wtsqrt) mul!(p.scratchm1, sqrtW, X) @@ -308,12 +309,12 @@ end LinearAlgebra.cholesky(p::SparsePredChol{T}) where {T} = copy(p.chol) LinearAlgebra.cholesky!(p::SparsePredChol{T}) where {T} = p.chol -function invqr(p::DensePredQR{T,<: QRCompactWY}) where T +function invqr(p::DensePredQR{T,<: QRCompactWY, <:AbstractWeights}) where T Rinv = inv(p.qr.R) Rinv*Rinv' end -function invqr(p::DensePredQR{T,<: QRPivoted}) where T +function invqr(p::DensePredQR{T,<: QRPivoted, <:AbstractWeights}) where T rnk = rank(p.qr.R) k = length(p.delbeta) if rnk == k @@ -370,19 +371,20 @@ function vcov(x::LinPredModel) end end -function _vcov(pp::DensePredChol, Z::Matrix, A::Matrix) - if pp.chol isa CholeskyPivoted && rank(pp.chol) != size(A, 1) +function _vcov(pp::DensePred, Z::Matrix, A::Matrix) + if linpred_rank(pp) < size(Z, 2) nancols = [all(isnan, col) for col in eachcol(A)] nnancols = .!nancols - Zv = view(Z, :, nnancols) + idx, nidx = findall(nancols), findall(nnancols) + Zv = view(Z, :, nidx) B = Zv'Zv - Av = view(A, nnancols, nnancols) - V = similar(pp.scratchm2) - V[nnancols, nnancols] = Av * B * Av - V[nancols, :] .= NaN - V[:, nancols] .= NaN + Av = view(A, nidx, nidx) + V = similar(pp.scratchm1, (size(A)...)) + V[nidx, nidx] = Av * B * Av + V[idx, :] .= NaN + V[:, idx] .= NaN else - B = mul!(pp.scratchm2, Z', Z) + B = Z'Z V = A * B * A end return V @@ -427,7 +429,7 @@ modelmatrix(obj::LinPredModel; weighted::Bool=isweighted(obj)) = modelmatrix(obj function modelmatrix(pp::LinPred; weighted::Bool=isweighted(pp)) Z = if weighted - mul!(pp.scratchm1, Diagonal(sqrt.(pp.wts)), pp.X) + Diagonal(sqrt.(pp.wts))*pp.X else pp.X end @@ -463,12 +465,9 @@ end function leverage(pp::DensePredQR{T, C, W}) where {T, C<:Cholesky, W} X = modelmatrix(pp; weighted=isweighted(pp)) - sum(x -> x^2, X/pp.chol.R, dims=2) + sum(x -> x^2, X/pp.qr.R, dims=2) end - - - response(obj::LinPredModel) = obj.rr.y fitted(m::LinPredModel) = m.rr.mu diff --git a/test/analytic_weights.jl b/test/analytic_weights.jl index 8e27706e..c43ccdb1 100644 --- a/test/analytic_weights.jl +++ b/test/analytic_weights.jl @@ -25,20 +25,23 @@ dobson = DataFrame( w=[1, 2, 1, 2, 3, 4, 3, 2, 1] ) -@testset "Linear model ftest" begin - model_0 = lm(@formula(y ~ x1), df; wts=aweights(df.w)) - model_1 = lm(@formula(y ~ x1 + x2), df; wts=aweights(df.w)) +for dmethod ∈ (:cholesky, :qr) +@testset "Linear model ftest with $dmethod method" for dmethod ∈ (:cholesky, :qr) + model_0 = lm(@formula(y ~ x1), df; wts=aweights(df.w), method=dmethod) + model_1 = lm(@formula(y ~ x1 + x2), df; wts=aweights(df.w), method=dmethod) X = hcat(ones(length(df.y)), df.x1, df.x2) - model_2 = lm(X, y; wts=aweights(df.w)) + model_2 = lm(X, y; wts=aweights(df.w), method=dmethod) @test ftest(model_1).fstat ≈ 1.551275 rtol = 1e-05 @test ftest(model_2) === ftest(model_1) @test ftest(model_0, model_1).fstat[2] ≈ 1.7860438 rtol = 1e-05 @test ftest(model_0, model_2).fstat[2] ≈ 1.7860438 rtol = 1e-05 end +end -@testset "GLM: Binomial with LogitLink link - AnalyticWeights" begin +for dmethod ∈ (:cholesky, :qr) +@testset "GLM: Binomial with LogitLink link - AnalyticWeights with $dmethod method" for dmethod ∈ (:cholesky, :qr) model = glm(@formula(y ~ 1 + x1 + x2), df, Binomial(), LogitLink(), wts=aweights(df.w), - atol=1e-08, rtol=1e-08) + method=dmethod, atol=1e-08, rtol=1e-08) @test deviance(model) ≈ 39.58120350785813 rtol = 1e-06 @test loglikelihood(model) ≈ -19.79060175392906 rtol = 1e-06 @test coef(model) ≈ [0.6333582770515337, 1.8861277804531265, 18.61281712203539] rtol = 1e-06 @@ -71,10 +74,12 @@ end 1.6687848932140258e-8 3.1458514759844027e-9 1.67e-8; 0.4123258762224241 0.2630623634882926 0.0] rtol = 1e-07 end +end + -@testset "GLM: Binomial with ProbitLink link - AnalyticWeights" begin +@testset "GLM: Binomial with ProbitLink link - AnalyticWeights with $dmethod method" for dmethod ∈ (:cholesky, :qr) model = glm(@formula(y ~ 1 + x1 + x2), df, Binomial(), ProbitLink(), - wts=aweights(df.w), rtol=1e-09) + wts=aweights(df.w), method=dmethod, rtol=1e-09) @test deviance(model) ≈ 39.595360462143866 rtol = 1e-06 @test loglikelihood(model) ≈ -19.797680231071933 rtol = 1e-06 @test coef(model) ≈ [0.42120722997197313, 1.0416447141541567, 4.916910225354065] rtol = 1e-07 @@ -108,8 +113,9 @@ end 0.7707735136764122 0.49175061259680825 0.0] rtol = 1e-07 end -@testset "GLM: Binomial with CauchitLink link - AnalyticWeights" begin - model = glm(@formula(y ~ 1 + x1 + x2), df, Binomial(), CauchitLink(), wts=aweights(df.w), rtol=1e-08, atol=1e-08) +@testset "GLM: Binomial with CauchitLink link - AnalyticWeights - method $dmethod" for dmethod ∈ (:cholesky, :qr) + model = glm(@formula(y ~ 1 + x1 + x2), df, Binomial(), CauchitLink(), wts=aweights(df.w), + method=dmethod, rtol=1e-08, atol=1e-08) @test deviance(model) ≈ 39.627559015619845 rtol = 1e-07 @test loglikelihood(model) ≈ -19.813779507809922 rtol = 1e-07 @test aic(model) ≈ 45.627559015619845 rtol = 1e-07 @@ -141,8 +147,9 @@ end 0.21554272008110664 0.1375154474822352 0.0] rtol = 1e-07 end -@testset "GLM: Binomial with CloglogLink link - AnalyticWeights" begin - model = glm(@formula(y ~ 1 + x1 + x2), df, Binomial(), CloglogLink(), wts=aweights(df.w), rtol=5e-10, atol=1e-10) +@testset "GLM: Binomial with CloglogLink link - AnalyticWeights with $dmethod method" for dmethod ∈ (:cholesky, :qr) + model = glm(@formula(y ~ 1 + x1 + x2), df, Binomial(), CloglogLink(), wts=aweights(df.w), + method=dmethod, rtol=5e-10, atol=1e-10) @test deviance(model) ≈ 39.61484762863061 rtol = 1e-07 @test loglikelihood(model) ≈ -19.807423814315307 rtol = 1e-07 # @test coef(model) ≈ [0.12095167614339054, 0.8666201161364425, 2.5534670172943965] rtol=1e-07 @@ -176,9 +183,9 @@ end 0.9629196988432743 0.6143391585021523 0.0] rtol = 1e-05 end -@testset "GLM: Gamma with InverseLink link - AnalyticWeights" begin +@testset "GLM: Gamma with InverseLink link - AnalyticWeights with $dmethod method" for dmethod ∈ (:cholesky, :qr) model = glm(@formula(lot1 ~ 1 + u), clotting, Gamma(), InverseLink(), - wts=aweights(clotting.w), atol=1e-07, rtol=1e-08) + wts=aweights(clotting.w), method=dmethod, atol=1e-07, rtol=1e-08) @test deviance(model) ≈ 0.03933389380881642 rtol = 1e-07 @test loglikelihood(model) ≈ -43.359078787690514 rtol = 1e-07 @test coef(model) ≈ [-0.017217012596343607, 0.015649040406186487] rtol = 1e-07 @@ -196,9 +203,9 @@ end 262.77277766267576 1210.113361381432] rtol = 1e-07 end -@testset "GLM: Gamma with IdentityLink link - AnalyticWeights" begin +@testset "GLM: Gamma with IdentityLink link - AnalyticWeights with $dmethod method" for dmethod ∈ (:cholesky, :qr) model = glm(@formula(lot1 ~ 1 + u), clotting, Gamma(), IdentityLink(), - wts=aweights(clotting.w), rtol=1e-10, atol=1e-10, minstepfac=0.00001) + wts=aweights(clotting.w), method=dmethod, rtol=1e-10, atol=1e-10, minstepfac=0.00001) @test deviance(model) ≈ 1.3435348802929383 rtol = 1e-07 @test loglikelihood(model) ≈ -101.19916126647321 rtol = 1e-07 @test coef(model) ≈ [86.45700434128152, -15.320695650698417] rtol = 1e-05 @@ -216,9 +223,9 @@ end 0.6561290267416002 3.0215858321118008] rtol = 1e-04 end -@testset "GLM: Gamma with LogLink link - AnalyticWeights" begin +@testset "GLM: Gamma with LogLink link - AnalyticWeights with $dmethod method" for dmethod ∈ (:cholesky, :qr) model = glm(@formula(lot1 ~ 1 + u), clotting, Gamma(), LogLink(), - wts=aweights(clotting.w), atol=1e-09, rtol=1e-09) + wts=aweights(clotting.w), method=dmethod, atol=1e-09, rtol=1e-09) @test deviance(model) ≈ 0.41206342934199663 rtol = 1e-07 @test loglikelihood(model) ≈ -81.79777246247532 rtol = 1e-07 @test coef(model) ≈ [5.325107090308856, -0.5495682740033511] rtol = 1e-07 @@ -236,9 +243,9 @@ end 16.590486289982852 76.40201283367323] rtol = 1e-07 end -@testset "GLM: Gamma with InverseLink link - AnalyticWeights" begin +@testset "GLM: Gamma with InverseLink link - AnalyticWeights with $dmethod method" for dmethod ∈ (:cholesky, :qr) model = glm(@formula(lot1 ~ 1 + u), clotting, Gamma(), InverseLink(), - wts=aweights(clotting.w), atol=1e-09, rtol=1e-09) + wts=aweights(clotting.w), method=dmethod, atol=1e-09, rtol=1e-09) @test deviance(model) ≈ 0.03933389380881642 rtol = 1e-07 @test loglikelihood(model) ≈ -43.359078787690514 rtol = 1e-07 @test coef(model) ≈ [-0.017217012596343607, 0.015649040406186487] rtol = 1e-07 @@ -256,9 +263,9 @@ end 262.77277766267576 1210.113361381432] rtol = 1e-07 end -@testset "GLM: InverseGaussian with InverseSquareLink link - AnalyticWeights" begin +@testset "GLM: InverseGaussian with InverseSquareLink link - AnalyticWeights with $dmethod method" for dmethod ∈ (:cholesky, :qr) model = glm(@formula(lot1 ~ 1 + u), clotting, InverseGaussian(), InverseSquareLink(), - wts=aweights(clotting.w), atol=1e-09, rtol=1e-09) + wts=aweights(clotting.w), method=dmethod, atol=1e-09, rtol=1e-09) @test deviance(model) ≈ 0.021377370485120707 rtol = 1e-07 @test loglikelihood(model) ≈ -86.82546665077861 rtol = 1e-07 @test coef(model) ≈ [-0.0012633718975150973, 0.0008126490405747128] rtol = 1e-07 @@ -276,9 +283,10 @@ end 8424.676595366931 38797.069483575455] rtol = 1e-06 end -@testset "GLM: NegativeBinomial with LogLink link - AnalyticWeights" begin +@testset "GLM: NegativeBinomial with LogLink link - AnalyticWeights - method: dmethod" for dmethod ∈ (:cholesky, :qr) model = glm(@formula(Days ~ Eth + Sex + Age + Lrn), quine, NegativeBinomial(2), - LogLink(), wts=aweights(quine.aweights), atol=1e-08, rtol=1e-08) + LogLink(), wts=aweights(quine.aweights), method=dmethod, + atol=1e-08, rtol=1e-08) @test deviance(model) ≈ 624.7631999565588 rtol = 1e-07 @test loglikelihood(model) ≈ -2004.5939464322778 rtol = 1e-07 @@ -441,9 +449,10 @@ end ] rtol = 1e-04 end -@testset "GLM: NegativeBinomial with LogLink link - AnalyticWeights" begin +@testset "GLM: NegativeBinomial with LogLink link - AnalyticWeights with $dmethod method" for dmethod ∈ (:cholesky, :qr) model = glm(@formula(Days ~ Eth + Sex + Age + Lrn), quine, - NegativeBinomial(2), LogLink(), wts=aweights(quine.aweights), rtol=1e-08, atol=1e-08) + NegativeBinomial(2), LogLink(), wts=aweights(quine.aweights), + method=dmethod, rtol=1e-08, atol=1e-08) @test deviance(model) ≈ 624.7631999565588 rtol = 1e-07 @test loglikelihood(model) ≈ -2004.5939464322778 rtol = 1e-07 @test coef(model) ≈ [3.02411915515531, -0.4641576651688563, 0.0718560942992554, @@ -604,9 +613,10 @@ end ] rtol = 1e-04 end -@testset "GLM: NegativeBinomial with SqrtLink link - AnalyticWeights" begin +@testset "GLM: NegativeBinomial with SqrtLink link - AnalyticWeights with $dmethod method" for dmethod ∈ (:cholesky, :qr) model = glm(@formula(Days ~ Eth + Sex + Age + Lrn), quine, NegativeBinomial(2), - SqrtLink(), wts=aweights(quine.aweights), rtol=1e-08, atol=1e-09) + SqrtLink(), wts=aweights(quine.aweights), method=dmethod, + rtol=1e-08, atol=1e-09) @test deviance(model) ≈ 626.6464732988984 rtol = 1e-07 @test loglikelihood(model) ≈ -2005.5355831034462 rtol = 1e-07 @test coef(model) ≈ [4.733877229152363, -1.007977895471349, 0.02522392818548873, diff --git a/test/probability_weights.jl b/test/probability_weights.jl index 5a81bb8d..81f07970 100644 --- a/test/probability_weights.jl +++ b/test/probability_weights.jl @@ -28,8 +28,8 @@ dobson = DataFrame( dobson.pweights = size(dobson, 1) .* (dobson.w ./ sum(dobson.w)) -@testset "Linear Model ftest/loglikelihod" begin - model_1 = lm(@formula(y ~ x1 + x2), df; wts=pweights(df.pweights)) +@testset "Linear Model ftest/loglikelihod with $dmethod method" for dmethod ∈ (:cholesky, :qr) + model_1 = lm(@formula(y ~ x1 + x2), df; wts=pweights(df.pweights), method = dmethod) X = hcat(ones(length(df.y)), df.x1, df.x2) model_2 = lm(X, y; wts=pweights(df.pweights)) @test_throws ArgumentError ftest(model_1) @@ -38,13 +38,14 @@ dobson.pweights = size(dobson, 1) .* (dobson.w ./ sum(dobson.w)) @test_throws ArgumentError loglikelihood(model_2) end -@testset "GLM: Binomial with LogitLink link - ProbabilityWeights" begin +@testset "GLM: Binomial with LogitLink link - ProbabilityWeights with $dmethod method" for dmethod ∈ (:cholesky, :qr) model = glm( @formula(y ~ 1 + x1 + x2), df, Binomial(), LogitLink(), wts = pweights(df.pweights), + method = dmethod, rtol = 1e-07, ) @test_throws ArgumentError loglikelihood(model) @@ -55,13 +56,14 @@ end @test stderror(model) ≈ [1.07077535201799, 1.4966446912323, 0.7679252464101] rtol = 1e-05 end -@testset "GLM: Binomial with ProbitLink link - ProbabilityWeights" begin +@testset "GLM: Binomial with ProbitLink link - ProbabilityWeights with $dmethod method" for dmethod ∈ (:cholesky, :qr) model = glm( @formula(y ~ 1 + x1 + x2), df, Binomial(), ProbitLink(), wts = pweights(df.pweights), + method = dmethod, rtol = 1e-09, ) @test_throws ArgumentError loglikelihood(model) @@ -72,13 +74,14 @@ end @test stderror(model) ≈ [0.6250657160317, 0.851366312489, 0.4423686640689] rtol = 1e-05 end -@testset "GLM: Binomial with CauchitLink link - ProbabilityWeights" begin +@testset "GLM: Binomial with CauchitLink link - ProbabilityWeights with $dmethod method" for dmethod ∈ (:cholesky, :qr) model = glm( @formula(y ~ 1 + x1 + x2), df, Binomial(), CauchitLink(), wts = pweights(df.pweights), + method = dmethod, rtol = 1e-07, ) @test_throws ArgumentError loglikelihood(model) @@ -89,13 +92,14 @@ end @test stderror(model) ≈ [1.020489214335, 1.5748610330014, 1.5057621596148] rtol = 1e-03 end -@testset "GLM: Binomial with CloglogLink link - ProbabilityWeights" begin +@testset "GLM: Binomial with CloglogLink link - ProbabilityWeights with $dmethod method" for dmethod ∈ (:cholesky, :qr) model = glm( @formula(y ~ 1 + x1 + x2), df, Binomial(), CloglogLink(), wts = pweights(df.pweights), + method = dmethod, rtol = 1e-09, ) @test_throws ArgumentError loglikelihood(model) @@ -107,13 +111,14 @@ end @test stderror(model) ≈ [0.647026270959, 0.74668663622095, 0.49056337945919] rtol = 1e-04 end -@testset "GLM: Gamma with LogLink link - ProbabilityWeights" begin +@testset "GLM: Gamma with LogLink link - ProbabilityWeights with $dmethod method" for dmethod ∈ (:cholesky, :qr) model = glm( @formula(lot1 ~ 1 + u), clotting, Gamma(), LogLink(), wts = pweights(clotting.pweights), + method = dmethod, rtol = 1e-12, atol = 1e-9, ) @@ -126,13 +131,14 @@ end @test stderror(model) ≈ [0.2651749940925478, 0.06706321966020713] rtol = 1e-07 end -@testset "GLM: NegativeBinomial(2) with LogLink link - ProbabilityWeights" begin +@testset "GLM: NegativeBinomial(2) with LogLink link - ProbabilityWeights with $dmethod method" for dmethod ∈ (:cholesky, :qr) model = glm( @formula(Days ~ Eth + Sex + Age + Lrn), quine, NegativeBinomial(2), LogLink(), wts = pweights(quine.pweights), + method = dmethod, atol = 1e-09, ) @test_throws ArgumentError loglikelihood(model) @@ -168,13 +174,14 @@ end ] rtol = 1e-04 end -@testset "GLM: with LogLink link - ProbabilityWeights" begin +@testset "GLM: with LogLink link - ProbabilityWeights with $dmethod method" for dmethod ∈ (:cholesky, :qr) model = glm( @formula(Days ~ Eth + Sex + Age + Lrn), quine, NegativeBinomial(2), LogLink(), wts = pweights(quine.pweights), + method = dmethod, rtol = 1e-09, ) @test_throws ArgumentError loglikelihood(model) @@ -201,13 +208,14 @@ end ] rtol = 1e-04 end -@testset "GLM: NegaiveBinomial(2) with SqrtLink link - ProbabilityWeights" begin +@testset "GLM: NegaiveBinomial(2) with SqrtLink link - ProbabilityWeights with $dmethod method" for dmethod ∈ (:cholesky, :qr) model = glm( @formula(Days ~ Eth + Sex + Age + Lrn), quine, NegativeBinomial(2), SqrtLink(), wts = pweights(quine.pweights), + method = dmethod, rtol = 1e-08, ) @test_throws ArgumentError loglikelihood(model) @@ -235,13 +243,14 @@ end ] rtol = 1e-04 end -@testset "GLM: Poisson with LogLink link - ProbabilityWeights" begin +@testset "GLM: Poisson with LogLink link - ProbabilityWeights with $dmethod method" for dmethod ∈ (:cholesky, :qr) model = glm( @formula(Counts ~ 1 + Outcome + Treatment), dobson, Poisson(), LogLink(), wts = pweights(dobson.pweights), + method = dmethod, ) @test_throws ArgumentError loglikelihood(model) @test deviance(model) ≈ 4.837327189925912 rtol = 1e-07 diff --git a/test/runtests.jl b/test/runtests.jl index 56679a51..1012f0a6 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -525,7 +525,7 @@ end @testset "Poisson LogLink offset with weights with $dmethod" for dmethod in (:cholesky, :qr) gm7pw = fit(GeneralizedLinearModel, @formula(round(Postwt) ~ 1 + Prewt + Treat), anorexia, Poisson(), LogLink(), method=dmethod, offset=log.(anorexia.Prewt), - wts=repeat(1:4, outer=18), rtol=1e-8) + wts=fweights(repeat(1:4, outer=18)), rtol=1e-8) @test GLM.cancancel(gm7pw.rr) test_show(gm7pw) @@ -624,7 +624,7 @@ admit_agr = DataFrame(count = [28., 97, 93, 55, 33, 54, 28, 12], @testset "Aggregated Binomial LogitLink" begin for distr in (Binomial, Bernoulli) gm14 = fit(GeneralizedLinearModel, @formula(admit ~ 1 + rank), admit_agr, distr(), - wts=Array(admit_agr.count)) + wts=fweights(Array(admit_agr.count))) @test dof(gm14) == 4 @test nobs(gm14) == 400 @test isapprox(deviance(gm14), 474.9667184280627) @@ -664,7 +664,7 @@ admit_agr2.p = admit_agr2.admit ./ admit_agr2.count ## The model matrix here is singular so tests like the deviance are just round off error @testset "Binomial LogitLink aggregated with $dmethod" for dmethod in (:cholesky, :qr) gm15 = fit(GeneralizedLinearModel, @formula(p ~ rank), admit_agr2, Binomial(), - wts=admit_agr2.count) + wts=fweights(admit_agr2.count)) test_show(gm15) @test dof(gm15) == 4 @test nobs(gm15) == 400 @@ -681,7 +681,7 @@ end # Weighted Gamma example (weights are totally made up) @testset "Gamma InverseLink Weights with $dmethod" for dmethod in (:cholesky, :qr) gm16 = fit(GeneralizedLinearModel, @formula(lot1 ~ 1 + u), clotting, Gamma(), - wts=[1.5,2.0,1.1,4.5,2.4,3.5,5.6,5.4,6.7]) + wts=fweights([1.5,2.0,1.1,4.5,2.4,3.5,5.6,5.4,6.7])) test_show(gm16) @test dof(gm16) == 3 @test nobs(gm16) == 32.7 @@ -698,7 +698,7 @@ end # Weighted Poisson example (weights are totally made up) @testset "Poisson LogLink Weights" begin gm17 = fit(GeneralizedLinearModel, @formula(Counts ~ Outcome + Treatment), dobson, Poisson(), - wts = [1.5,2.0,1.1,4.5,2.4,3.5,5.6,5.4,6.7]) + wts = fweights([1.5,2.0,1.1,4.5,2.4,3.5,5.6,5.4,6.7])) test_show(gm17) @test dof(gm17) == 5 @test isapprox(deviance(gm17), 17.699857821414266) @@ -789,7 +789,7 @@ end @testset "Weighted NegativeBinomial LogLink, θ to be estimated with Cholesky" begin halfn = round(Int, 0.5*size(quine, 1)) wts = vcat(fill(0.8, halfn), fill(1.2, size(quine, 1) - halfn)) - gm20a = negbin(@formula(Days ~ Eth+Sex+Age+Lrn), quine, LogLink(); wts=wts) + gm20a = negbin(@formula(Days ~ Eth+Sex+Age+Lrn), quine, LogLink(); wts=fweights(wts)) test_show(gm20a) @test dof(gm20a) == 8 @test isapprox(deviance(gm20a), 164.45910399188858, rtol = 1e-7) @@ -911,7 +911,7 @@ end # Poisson with categorical predictors, weights and offset nointglm3 = fit(GeneralizedLinearModel, @formula(round(Postwt) ~ 0 + Prewt + Treat), anorexia, Poisson(), LogLink(); offset=log.(anorexia.Prewt), - wts=repeat(1:4, outer=18), rtol=1e-8, dropcollinear=false) + wts=fweights(repeat(1:4, outer=18)), rtol=1e-8, dropcollinear=false) @test !hasintercept(nointglm3) @test GLM.cancancel(nointglm3.rr) test_show(nointglm3) @@ -966,7 +966,7 @@ end # Poisson with categorical predictors, weights and offset nointglm3 = fit(GeneralizedLinearModel, @formula(round(Postwt) ~ 0 + Prewt + Treat), anorexia, Poisson(), LogLink(); method=dmethod, offset=log.(anorexia.Prewt), - wts=repeat(1:4, outer=18), rtol=1e-8, dropcollinear=false) + wts=fweights(repeat(1:4, outer=18)), rtol=1e-8, dropcollinear=false) @test !hasintercept(nointglm3) @test GLM.cancancel(nointglm3.rr) test_show(nointglm3) @@ -1610,14 +1610,14 @@ end lm4 = lm(view(x, :, :), view(y, :); method=dmethod) @test coef(lm1) == coef(lm2) == coef(lm3) == coef(lm4) - lm5 = lm(x, y, wts=w, method=dmethod) - lm6 = lm(x, view(y, :), method=dmethod, wts=w) - lm7 = lm(view(x, :, :), y, method=dmethod, wts=w) - lm8 = lm(view(x, :, :), view(y, :), method=dmethod, wts=w) - lm9 = lm(x, y, method=dmethod, wts=view(w, :)) - lm10 = lm(x, view(y, :), method=dmethod, wts=view(w, :)) - lm11 = lm(view(x, :, :), y, method=dmethod, wts=view(w, :)) - lm12 = lm(view(x, :, :), view(y, :), method=dmethod, wts=view(w, :)) + lm5 = lm(x, y, wts=fweights(w), method=dmethod) + lm6 = lm(x, view(y, :), method=dmethod, wts=fweights(w)) + lm7 = lm(view(x, :, :), y, method=dmethod, wts=fweights(w)) + lm8 = lm(view(x, :, :), view(y, :), method=dmethod, wts=fweights(w)) + lm9 = lm(x, y, method=dmethod, wts=fweights(view(w, :))) + lm10 = lm(x, view(y, :), method=dmethod, wts=fweights(view(w, :))) + lm11 = lm(view(x, :, :), y, method=dmethod, wts=fweights(view(w, :))) + lm12 = lm(view(x, :, :), view(y, :), method=dmethod, wts=fweights(view(w, :))) @test coef(lm5) == coef(lm6) == coef(lm7) == coef(lm8) == coef(lm9) == coef(lm10) == coef(lm11) == coef(lm12) @@ -1628,14 +1628,14 @@ end glm4 = glm(view(x, :, :), view(y, :), Binomial(), method=dmethod) @test coef(glm1) == coef(glm2) == coef(glm3) == coef(glm4) - glm5 = glm(x, y, Binomial(), wts=w) - glm6 = glm(x, view(y, :), Binomial(), wts=w) - glm7 = glm(view(x, :, :), y, Binomial(), wts=w) - glm8 = glm(view(x, :, :), view(y, :), Binomial(), wts=w) - glm9 = glm(x, y, Binomial(), wts=view(w, :)) - glm10 = glm(x, view(y, :), Binomial(), wts=view(w, :)) - glm11 = glm(view(x, :, :), y, Binomial(), wts=view(w, :)) - glm12 = glm(view(x, :, :), view(y, :), Binomial(), wts=view(w, :)) + glm5 = glm(x, y, Binomial(), wts=fweights(w)) + glm6 = glm(x, view(y, :), Binomial(), wts=fweights(w)) + glm7 = glm(view(x, :, :), y, Binomial(), wts=fweights(w)) + glm8 = glm(view(x, :, :), view(y, :), Binomial(), wts=fweights(w)) + glm9 = glm(x, y, Binomial(), wts=fweights(view(w, :))) + glm10 = glm(x, view(y, :), Binomial(), wts=fweights(view(w, :))) + glm11 = glm(view(x, :, :), y, Binomial(), wts=fweights(view(w, :))) + glm12 = glm(view(x, :, :), view(y, :), Binomial(), wts=fweights(view(w, :))) @test coef(glm5) == coef(glm6) == coef(glm7) == coef(glm8) == coef(glm9) == coef(glm10) == coef(glm11) == coef(glm12) end @@ -1772,7 +1772,7 @@ end -0.33333334610634496 -0.33333334610634496 -0.0 -0.0 -0.33333334610634496; -3.6666667654825043 -0.0 -3.6666667654825043 -0.0 -3.6666667654825043] - gm_poisw = fit(GeneralizedLinearModel, f, dobson, Poisson(), wts = dobson.Weights) + gm_poisw = fit(GeneralizedLinearModel, f, dobson, Poisson(), wts = fweights(dobson.Weights)) mm0_poisw = [-0.9624647521850039 -0.0 -0.0 -0.0 -0.0; 0.6901050904949885 0.6901050904949885 0.0 0.0 0.0; @@ -1790,7 +1790,7 @@ end f = @formula(admit ~ 1 + rank) gm_bin = fit(GeneralizedLinearModel, f, admit_agr, Binomial(); rtol=1e-8) gm_binw = fit(GeneralizedLinearModel, f, admit_agr, Binomial(), - wts=aweights(admit_agr.count); rtol=1e-08) + wts=fweights(admit_agr.count); rtol=1e-08) mm0_bin = [-0.5 -0.0 -0.0 -0.0 -0.5 -0.5 -0.0 -0.0 @@ -1819,7 +1819,7 @@ end f = @formula(admit ~ 1 + rank) gm_bin = fit(GeneralizedLinearModel, f, admit_agr, Binomial(), ProbitLink()) gm_binw = fit(GeneralizedLinearModel, f, admit_agr, Binomial(), ProbitLink(), - wts=aweights(admit_agr.count), rtol=1e-8) + wts=fweights(admit_agr.count), rtol=1e-8) mm0_bin = [-0.7978846 0.0000000 0.0000000 0.0000000 -0.7978846 -0.7978846 0.0000000 0.0000000 From 56d81aed20d4cdf9f618e158f4de933ec246759d Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Wed, 11 Dec 2024 13:12:38 +0100 Subject: [PATCH 085/106] Add filter to jldoctest string --- docs/src/examples.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/examples.md b/docs/src/examples.md index 73061e3a..ce095dad 100644 --- a/docs/src/examples.md +++ b/docs/src/examples.md @@ -510,7 +510,7 @@ julia> round(deviance(gm1), digits=5) In this example, we choose the best model from a set of λs, based on minimum BIC. -```jldoctest +```jldoctest; filter = r"(\d*)\.(\d{7})\d+" => s"\1.\2***" julia> using GLM, RDatasets, StatsBase, DataFrames, Optim julia> trees = DataFrame(dataset("datasets", "trees")); From a2357cfeca83d531f51e21a87ef410beccae0d8f Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Wed, 11 Dec 2024 13:21:09 +0100 Subject: [PATCH 086/106] Fix problem with docstrings --- src/linpred.jl | 19 +++++++++---------- src/lm.jl | 12 ++++++------ 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/src/linpred.jl b/src/linpred.jl index 9a978626..cf4e856b 100644 --- a/src/linpred.jl +++ b/src/linpred.jl @@ -31,7 +31,6 @@ A `LinPred` type with a dense QR decomposition of `X` - `qr`: either a `QRCompactWY` or `QRPivoted` object created from `X`, with optional row weights. - `scratchm1`: scratch Matrix{T} of the same size as `X` """ - mutable struct DensePredQR{T<:BlasReal, Q<:Union{QRCompactWY, QRPivoted}, W<:AbstractWeights} <: DensePred X::Matrix{T} # model matrix beta0::Vector{T} # base coefficient vector @@ -454,18 +453,18 @@ function leverage(pp::DensePredChol{T, C, W}) where {T, C<:Cholesky, W} end function leverage(pp::DensePredQR{T, C, W}) where {T, C<:QRPivoted, W} - X = modelmatrix(pp; weighted=isweighted(pp)) - _, k = size(X) - ch = pp.qr - rnk = length(ch.p) - p = ch.p - idx = invperm(p)[1:rnk] - sum(x -> x^2, view(X, :, 1:rnk)/ch.R[1:rnk, idx], dims=2) + X = modelmatrix(pp; weighted=isweighted(pp)) + _, k = size(X) + ch = pp.qr + rnk = length(ch.p) + p = ch.p + idx = invperm(p)[1:rnk] + sum(x -> x^2, view(X, :, 1:rnk)/ch.R[1:rnk, idx], dims=2) end function leverage(pp::DensePredQR{T, C, W}) where {T, C<:Cholesky, W} - X = modelmatrix(pp; weighted=isweighted(pp)) - sum(x -> x^2, X/pp.qr.R, dims=2) + X = modelmatrix(pp; weighted=isweighted(pp)) + sum(x -> x^2, X/pp.qr.R, dims=2) end response(obj::LinPredModel) = obj.rr.y diff --git a/src/lm.jl b/src/lm.jl index ec00b72f..f1d55572 100644 --- a/src/lm.jl +++ b/src/lm.jl @@ -404,12 +404,6 @@ Compute [Cook's distance](https://en.wikipedia.org/wiki/Cook%27s_distance) for each observation in linear model `obj`, giving an estimate of the influence of each data point. """ -## To remove when https://github.com/JuliaStats/StatsAPI.jl/pull/16 is merged -function crossmodelmatrix(model::RegressionModel; weighted::Bool=false) - x = weighted ? modelmatrix(model; weighted=weighted) : modelmatrix(model) - return Symmetric(x' * x) -end - function StatsBase.cooksdistance(obj::LinearModel) u = residuals(obj; weighted=isweighted(obj)) mse = GLM.dispersion(obj,true) @@ -418,3 +412,9 @@ function StatsBase.cooksdistance(obj::LinearModel) D = @. u^2 * (hii / (1 - hii)^2) / (k*mse) return D end + +## To remove when https://github.com/JuliaStats/StatsAPI.jl/pull/16 is merged +function crossmodelmatrix(model::RegressionModel; weighted::Bool=false) + x = weighted ? modelmatrix(model; weighted=weighted) : modelmatrix(model) + return Symmetric(x' * x) +end From 6068d2a5815a8f341a7bf35bf0d58064dc9a8c31 Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Thu, 12 Dec 2024 10:38:49 +0100 Subject: [PATCH 087/106] Update docs/src/index.md Co-authored-by: Milan Bouchet-Valat --- docs/src/index.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/src/index.md b/docs/src/index.md index b5ca3cfa..e99209cc 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -133,11 +133,11 @@ Both `lm` and `glm` allow weighted estimation. The three different each observation. These weights may also be referred to as reliability weights, precision weights or inverse variance weights. These are typically used when the observations being weighted are aggregate values (e.g., averages) with differing variances. -- `FrequencyWeights` describe the inverse of the sampling probability for each observation, +- `FrequencyWeights` describe the number of times (or frequency) each observation was seen. + These weights may also be referred to as case weights or repeat weights. +- `ProbabilityWeights` represent the inverse of the sampling probability for each observation, providing a correction mechanism for under- or over-sampling certain population groups. These weights may also be referred to as sampling weights. -- `ProbabilityWeights` describe how the sample can be scaled back to the population. - Usually are the reciprocals of sampling probabilities. To indicate which kind of weights should be used, the vector of weights must be wrapped in one of the three weights types, and then passed to the `weights` keyword argument. From 930a8cb5b6d57b9bc80219995e7c0a0b97636cef Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Thu, 12 Dec 2024 13:46:16 +0100 Subject: [PATCH 088/106] Remove trailing white spaces --- docs/src/examples.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/src/examples.md b/docs/src/examples.md index ce095dad..a2965282 100644 --- a/docs/src/examples.md +++ b/docs/src/examples.md @@ -12,8 +12,8 @@ julia> using DataFrames, GLM, StatsBase julia> data = DataFrame(X=[1,2,3], Y=[2,4,7]) 3×2 DataFrame - Row │ X Y - │ Int64 Int64 + Row │ X Y + │ Int64 Int64 ─────┼────────────── 1 │ 1 2 2 │ 2 4 From 107d17deba1ef62851ad5a5a909cfc2d23898254 Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Thu, 12 Dec 2024 13:46:45 +0100 Subject: [PATCH 089/106] Add mention of UnitWeights in the weights discussion --- docs/src/index.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/src/index.md b/docs/src/index.md index e99209cc..3730d403 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -139,6 +139,8 @@ Both `lm` and `glm` allow weighted estimation. The three different providing a correction mechanism for under- or over-sampling certain population groups. These weights may also be referred to as sampling weights. +`GLM.jl` internally uses UnitWeights for unweighted regression. When no weights are specified, the model defaults to using `UnitWeights`, effectively treating all observations as equally weighted. + To indicate which kind of weights should be used, the vector of weights must be wrapped in one of the three weights types, and then passed to the `weights` keyword argument. Short-hand functions `aweights`, `fweights`, and `pweights` can be used to construct @@ -149,7 +151,7 @@ We illustrate the API with randomly generated data. ```jldoctest weights julia> using StableRNGs, DataFrames, GLM -julia> data = DataFrame(y = rand(StableRNG(1), 100), x = randn(StableRNG(2), 100), weights = repeat([1, 2, 3, 4], 25), ); +julia> data = DataFrame(y = rand(StableRNG(1), 100), x = randn(StableRNG(2), 100), weights = repeat([1, 2, 3, 4], 25)); julia> m = lm(@formula(y ~ x), data) LinearModel From a003b104cfbbe040a6f512c114fbef5b40b599aa Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Thu, 12 Dec 2024 13:48:20 +0100 Subject: [PATCH 090/106] Remove trailing white spaces --- src/GLM.jl | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/src/GLM.jl b/src/GLM.jl index c0deef1a..313d0ee8 100644 --- a/src/GLM.jl +++ b/src/GLM.jl @@ -13,7 +13,7 @@ module GLM using StatsAPI import StatsBase: coef, coeftable, coefnames, confint, deviance, nulldeviance, dof, dof_residual, loglikelihood, nullloglikelihood, nobs, stderror, vcov, residuals, predict, predict!, - fitted, fit, model_response, response, modelmatrix, r2, r², adjr2, adjr², + fitted, fit, model_response, response, modelmatrix, r2, r², adjr2, adjr², PValue, weights, leverage import StatsFuns: xlogy import SpecialFunctions: erfc, erfcinv, digamma, trigamma @@ -110,13 +110,18 @@ module GLM If `method=:cholesky` (the default), then the `Cholesky` decomposition method will be used. If `method=:qr`, then the `QR` decomposition method (which is more stable but slower) will be used. - - `wts::Vector=similar(y,0)`: Prior frequency (a.k.a. case) weights of observations. - Such weights are equivalent to repeating each observation a number of times equal - to its weight. Do note that this interpretation gives equal point estimates but - different standard errors from analytical (a.k.a. inverse variance) weights and - from probability (a.k.a. sampling) weights which are the default in some other - software. - Can be length 0 to indicate no weighting (default). + - `wts::AbstractWeights`: Weights of observations. + The weights can be of type `AnalyticWeights`, `FrequencyWeights`, `ProbabilityWeights`, or `UnitWeights`. + - `AnalyticWeights` describe a non-random relative importance (usually between 0 and 1) for + each observation. These weights may also be referred to as reliability weights, precision + weights or inverse variance weights. These are typically used when the observations being + weighted are aggregate values (e.g., averages) with differing variances. + - `FrequencyWeights` describe the number of times (or frequency) each observation was seen. + These weights may also be referred to as case weights or repeat weights. + - `ProbabilityWeights` represent the inverse of the sampling probability for each observation, + providing a correction mechanism for under- or over-sampling certain population groups. + These weights may also be referred to as sampling weights. + - `UnitWeights` all weights are equal to 1 (default). - `contrasts::AbstractDict{Symbol}=Dict{Symbol,Any}()`: a `Dict` mapping term names (as `Symbol`s) to term types (e.g. `ContinuousTerm`) or contrasts (e.g., `HelmertCoding()`, `SeqDiffCoding(; levels=["a", "b", "c"])`, From 1c06c7e5d78592bef6efe647c02a615dd3ce5296 Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Thu, 12 Dec 2024 13:49:09 +0100 Subject: [PATCH 091/106] Change delbeta! signature --- src/linpred.jl | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/src/linpred.jl b/src/linpred.jl index cf4e856b..d282d99d 100644 --- a/src/linpred.jl +++ b/src/linpred.jl @@ -69,7 +69,7 @@ Evaluate and return `p.delbeta` the increment to the coefficient vector from res """ function delbeta! end -function delbeta!(p::DensePredQR{T, <:QRCompactWY}, r::Vector{T}) where T<:BlasReal +function delbeta!(p::DensePredQR{T, <:QRCompactWY,<:AbstractWeights}, r::Vector{T}) where T<:BlasReal r̃ = p.wts isa UnitWeights ? r : (wtsqrt = sqrt.(p.wts); wtsqrt .*= r; wtsqrt) #rnk = rank(p.qr.R) #rnk == length(p.delbeta) || throw(RankDeficientException(rnk)) @@ -78,7 +78,7 @@ function delbeta!(p::DensePredQR{T, <:QRCompactWY}, r::Vector{T}) where T<:BlasR return p end -function delbeta!(p::DensePredQR{T, <:QRCompactWY}, r::Vector{T}, wt::AbstractVector) where T<:BlasReal +function delbeta!(p::DensePredQR{T, <:QRCompactWY,<:AbstractWeights}, r::Vector{T}, wt::AbstractVector) where T<:BlasReal X = p.X wtsqrt = sqrt.(wt) sqrtW = Diagonal(wtsqrt) @@ -89,7 +89,7 @@ function delbeta!(p::DensePredQR{T, <:QRCompactWY}, r::Vector{T}, wt::AbstractVe return p end -function delbeta!(p::DensePredQR{T,<:QRPivoted}, r::Vector{T}) where T<:BlasReal +function delbeta!(p::DensePredQR{T,<:QRPivoted,<:AbstractWeights}, r::Vector{T}) where T<:BlasReal r̃ = p.wts isa UnitWeights ? r : (wtsqrt = sqrt.(p.wts); wtsqrt .*= r; wtsqrt) rnk = rank(p.qr.R) if rnk == length(p.delbeta) @@ -104,7 +104,7 @@ function delbeta!(p::DensePredQR{T,<:QRPivoted}, r::Vector{T}) where T<:BlasReal return p end -function delbeta!(p::DensePredQR{T,<:QRPivoted}, r::Vector{T}, wt::AbstractVector{T}) where T<:BlasReal +function delbeta!(p::DensePredQR{T,<:QRPivoted,<:AbstractWeights}, r::Vector{T}, wt::AbstractVector{T}) where T<:BlasReal X = p.X wtsqrt = sqrt.(wt) sqrtW = Diagonal(wtsqrt) @@ -187,13 +187,13 @@ function cholesky(p::DensePredChol{T}) where T<:FP Cholesky(copy(cholfactors(c)), c.uplo, c.info) end -function delbeta!(p::DensePredChol{T,<:Cholesky}, r::Vector{T}) where T<:BlasReal +function delbeta!(p::DensePredChol{T,<:Cholesky,<:AbstractWeights}, r::Vector{T}) where T<:BlasReal X = p.wts isa UnitWeights ? p.scratchm1 .= p.X : mul!(p.scratchm1, Diagonal(p.wts), p.X) ldiv!(p.chol, mul!(p.delbeta, transpose(X), r)) p end -function delbeta!(p::DensePredChol{T,<:CholeskyPivoted}, r::Vector{T}) where T<:BlasReal +function delbeta!(p::DensePredChol{T,<:CholeskyPivoted,<:AbstractWeights}, r::Vector{T}) where T<:BlasReal ch = p.chol X = p.wts isa UnitWeights ? p.scratchm1 .= p.X : mul!(p.scratchm1, Diagonal(p.wts), p.X) delbeta = mul!(p.delbeta, adjoint(X), r) @@ -219,7 +219,7 @@ function delbeta!(p::DensePredChol{T,<:Cholesky,<:AbstractWeights}, r::Vector{T} p end -function delbeta!(p::DensePredChol{T,<:CholeskyPivoted, <:AbstractWeights}, r::Vector{T}, wt::Vector{T}) where T<:BlasReal +function delbeta!(p::DensePredChol{T,<:CholeskyPivoted,<:AbstractWeights}, r::Vector{T}, wt::Vector{T}) where T<:BlasReal piv = p.chol.p # inverse vector delbeta = p.delbeta # p.scratchm1 = WX @@ -437,7 +437,7 @@ end leverage(x::LinPredModel) = leverage(x.pp) -function leverage(pp::DensePredChol{T, C, W}) where {T, C<:CholeskyPivoted, W} +function leverage(pp::DensePredChol{T,<:CholeskyPivoted}) where T X = modelmatrix(pp; weighted=isweighted(pp)) _, k = size(X) ch = pp.chol @@ -447,12 +447,12 @@ function leverage(pp::DensePredChol{T, C, W}) where {T, C<:CholeskyPivoted, W} sum(x -> x^2, view(X, :, 1:rnk)/ch.U[1:rnk, idx], dims=2) end -function leverage(pp::DensePredChol{T, C, W}) where {T, C<:Cholesky, W} +function leverage(pp::DensePredChol{T,<:Cholesky}) where T X = modelmatrix(pp; weighted=isweighted(pp)) sum(x -> x^2, X/pp.chol.U, dims=2) end -function leverage(pp::DensePredQR{T, C, W}) where {T, C<:QRPivoted, W} +function leverage(pp::DensePredQR{T,<:QRPivoted}) where T X = modelmatrix(pp; weighted=isweighted(pp)) _, k = size(X) ch = pp.qr @@ -462,7 +462,7 @@ function leverage(pp::DensePredQR{T, C, W}) where {T, C<:QRPivoted, W} sum(x -> x^2, view(X, :, 1:rnk)/ch.R[1:rnk, idx], dims=2) end -function leverage(pp::DensePredQR{T, C, W}) where {T, C<:Cholesky, W} +function leverage(pp::DensePredQR{T,<:QRCompactWY}) where T X = modelmatrix(pp; weighted=isweighted(pp)) sum(x -> x^2, X/pp.qr.R, dims=2) end @@ -479,6 +479,13 @@ end residuals(obj::LinPredModel; weighted::Bool=false) = residuals(obj.rr; weighted=weighted) +""" + nobs(obj::LinearModel) + nobs(obj::GLM) + +For linear and generalized linear models, returns the number of rows, or, +when prior weights are specified, the sum of weights. +""" nobs(obj::LinPredModel) = nobs(obj.rr) weights(obj::RegressionModel) = weights(obj.model) @@ -492,7 +499,7 @@ isweighted(pp::LinPred) = weights(pp) isa Union{FrequencyWeights, AnalyticWeight coef(x::LinPred) = x.beta0 coef(obj::LinPredModel) = coef(obj.pp) coefnames(x::LinPredModel) = - x.formula === nothing ? ["x$i" for i in 1:length(coef(x))] : StatsModels.vectorize(coefnames(formula(x).rhs)) + x.formula === nothing ? ["x$i" for i in 1:length(coef(x))] : StatsModels.vectorize(coefnames(formula(x).rhs)) dof_residual(obj::LinPredModel) = nobs(obj) - linpred_rank(obj) From b41cce71ef9bb13f40377b5b48feff65621782fc Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Thu, 12 Dec 2024 13:50:14 +0100 Subject: [PATCH 092/106] Add tests for dropcollinear=false --- test/analytic_weights.jl | 72 ++++++++++++++++++------------------- test/probability_weights.jl | 32 +++++++++++------ 2 files changed, 58 insertions(+), 46 deletions(-) diff --git a/test/analytic_weights.jl b/test/analytic_weights.jl index c43ccdb1..ced37571 100644 --- a/test/analytic_weights.jl +++ b/test/analytic_weights.jl @@ -25,23 +25,24 @@ dobson = DataFrame( w=[1, 2, 1, 2, 3, 4, 3, 2, 1] ) -for dmethod ∈ (:cholesky, :qr) -@testset "Linear model ftest with $dmethod method" for dmethod ∈ (:cholesky, :qr) - model_0 = lm(@formula(y ~ x1), df; wts=aweights(df.w), method=dmethod) - model_1 = lm(@formula(y ~ x1 + x2), df; wts=aweights(df.w), method=dmethod) +itr = Iterators.product((:qr, :cholesky), (true, false)) + + +@testset "Linear model ftest with $dmethod method with dropcollinear=$drop" for (dmethod, drop) ∈ itr + model_0 = lm(@formula(y ~ x1), df; wts=aweights(df.w), method=dmethod, dropcollinear=drop) + model_1 = lm(@formula(y ~ x1 + x2), df; wts=aweights(df.w), method=dmethod, dropcollinear=drop) X = hcat(ones(length(df.y)), df.x1, df.x2) - model_2 = lm(X, y; wts=aweights(df.w), method=dmethod) + model_2 = lm(X, y; wts=aweights(df.w), method=dmethod, dropcollinear=drop) @test ftest(model_1).fstat ≈ 1.551275 rtol = 1e-05 @test ftest(model_2) === ftest(model_1) @test ftest(model_0, model_1).fstat[2] ≈ 1.7860438 rtol = 1e-05 @test ftest(model_0, model_2).fstat[2] ≈ 1.7860438 rtol = 1e-05 end -end -for dmethod ∈ (:cholesky, :qr) -@testset "GLM: Binomial with LogitLink link - AnalyticWeights with $dmethod method" for dmethod ∈ (:cholesky, :qr) + +@testset "GLM: Binomial with LogitLink link - AnalyticWeights with $dmethod method" for (dmethod, drop) ∈ itr model = glm(@formula(y ~ 1 + x1 + x2), df, Binomial(), LogitLink(), wts=aweights(df.w), - method=dmethod, atol=1e-08, rtol=1e-08) + method=dmethod, dropcollinear=drop, atol=1e-08, rtol=1e-08) @test deviance(model) ≈ 39.58120350785813 rtol = 1e-06 @test loglikelihood(model) ≈ -19.79060175392906 rtol = 1e-06 @test coef(model) ≈ [0.6333582770515337, 1.8861277804531265, 18.61281712203539] rtol = 1e-06 @@ -74,12 +75,12 @@ for dmethod ∈ (:cholesky, :qr) 1.6687848932140258e-8 3.1458514759844027e-9 1.67e-8; 0.4123258762224241 0.2630623634882926 0.0] rtol = 1e-07 end -end -@testset "GLM: Binomial with ProbitLink link - AnalyticWeights with $dmethod method" for dmethod ∈ (:cholesky, :qr) + +@testset "GLM: Binomial with ProbitLink link - AnalyticWeights with $dmethod method" for (dmethod, drop) ∈ itr model = glm(@formula(y ~ 1 + x1 + x2), df, Binomial(), ProbitLink(), - wts=aweights(df.w), method=dmethod, rtol=1e-09) + wts=aweights(df.w), method=dmethod, dropcollinear=drop, rtol=1e-09) @test deviance(model) ≈ 39.595360462143866 rtol = 1e-06 @test loglikelihood(model) ≈ -19.797680231071933 rtol = 1e-06 @test coef(model) ≈ [0.42120722997197313, 1.0416447141541567, 4.916910225354065] rtol = 1e-07 @@ -113,9 +114,9 @@ end 0.7707735136764122 0.49175061259680825 0.0] rtol = 1e-07 end -@testset "GLM: Binomial with CauchitLink link - AnalyticWeights - method $dmethod" for dmethod ∈ (:cholesky, :qr) +@testset "GLM: Binomial with CauchitLink link - AnalyticWeights - method $dmethod" for (dmethod, drop) ∈ itr model = glm(@formula(y ~ 1 + x1 + x2), df, Binomial(), CauchitLink(), wts=aweights(df.w), - method=dmethod, rtol=1e-08, atol=1e-08) + method=dmethod, dropcollinear=drop, rtol=1e-08, atol=1e-08) @test deviance(model) ≈ 39.627559015619845 rtol = 1e-07 @test loglikelihood(model) ≈ -19.813779507809922 rtol = 1e-07 @test aic(model) ≈ 45.627559015619845 rtol = 1e-07 @@ -147,13 +148,13 @@ end 0.21554272008110664 0.1375154474822352 0.0] rtol = 1e-07 end -@testset "GLM: Binomial with CloglogLink link - AnalyticWeights with $dmethod method" for dmethod ∈ (:cholesky, :qr) +@testset "GLM: Binomial with CloglogLink link - AnalyticWeights with $dmethod method" for (dmethod, drop) ∈ itr model = glm(@formula(y ~ 1 + x1 + x2), df, Binomial(), CloglogLink(), wts=aweights(df.w), - method=dmethod, rtol=5e-10, atol=1e-10) + method=dmethod, dropcollinear=drop, rtol=5e-10, atol=1e-10) @test deviance(model) ≈ 39.61484762863061 rtol = 1e-07 @test loglikelihood(model) ≈ -19.807423814315307 rtol = 1e-07 - # @test coef(model) ≈ [0.12095167614339054, 0.8666201161364425, 2.5534670172943965] rtol=1e-07 - # @test stderror(model) ≈ [0.46442064138194333, 0.9661962332997427, 116.7042677626327] rtol=1e-07 + @test coef(model) ≈ [0.12095167614339054, 0.8666201161364425, 2.71457411130009] rtol=1e-07 + @test stderror(model) ≈ [0.46442064138194333, 0.9661962332997427, 462.67067410332123] rtol=1e-07 @test aic(model) ≈ 45.61484762863061 rtol = 1e-07 @test bic(model) ≈ 49.27147510323522 rtol = 1e-07 @test GLM.momentmatrix(model) ≈ [ 1.9242952153533148 0.3483465846271526 0.0; @@ -183,9 +184,9 @@ end 0.9629196988432743 0.6143391585021523 0.0] rtol = 1e-05 end -@testset "GLM: Gamma with InverseLink link - AnalyticWeights with $dmethod method" for dmethod ∈ (:cholesky, :qr) +@testset "GLM: Gamma with InverseLink link - AnalyticWeights with $dmethod method" for (dmethod, drop) ∈ itr model = glm(@formula(lot1 ~ 1 + u), clotting, Gamma(), InverseLink(), - wts=aweights(clotting.w), method=dmethod, atol=1e-07, rtol=1e-08) + wts=aweights(clotting.w), method=dmethod, dropcollinear=drop, atol=1e-07, rtol=1e-08) @test deviance(model) ≈ 0.03933389380881642 rtol = 1e-07 @test loglikelihood(model) ≈ -43.359078787690514 rtol = 1e-07 @test coef(model) ≈ [-0.017217012596343607, 0.015649040406186487] rtol = 1e-07 @@ -203,9 +204,9 @@ end 262.77277766267576 1210.113361381432] rtol = 1e-07 end -@testset "GLM: Gamma with IdentityLink link - AnalyticWeights with $dmethod method" for dmethod ∈ (:cholesky, :qr) +@testset "GLM: Gamma with IdentityLink link - AnalyticWeights with $dmethod method" for (dmethod, drop) ∈ itr model = glm(@formula(lot1 ~ 1 + u), clotting, Gamma(), IdentityLink(), - wts=aweights(clotting.w), method=dmethod, rtol=1e-10, atol=1e-10, minstepfac=0.00001) + wts=aweights(clotting.w), method=dmethod, dropcollinear=drop, rtol=1e-10, atol=1e-10, minstepfac=0.00001) @test deviance(model) ≈ 1.3435348802929383 rtol = 1e-07 @test loglikelihood(model) ≈ -101.19916126647321 rtol = 1e-07 @test coef(model) ≈ [86.45700434128152, -15.320695650698417] rtol = 1e-05 @@ -223,9 +224,9 @@ end 0.6561290267416002 3.0215858321118008] rtol = 1e-04 end -@testset "GLM: Gamma with LogLink link - AnalyticWeights with $dmethod method" for dmethod ∈ (:cholesky, :qr) +@testset "GLM: Gamma with LogLink link - AnalyticWeights with $dmethod method" for (dmethod, drop) ∈ itr model = glm(@formula(lot1 ~ 1 + u), clotting, Gamma(), LogLink(), - wts=aweights(clotting.w), method=dmethod, atol=1e-09, rtol=1e-09) + wts=aweights(clotting.w), method=dmethod, dropcollinear=drop, atol=1e-09, rtol=1e-09) @test deviance(model) ≈ 0.41206342934199663 rtol = 1e-07 @test loglikelihood(model) ≈ -81.79777246247532 rtol = 1e-07 @test coef(model) ≈ [5.325107090308856, -0.5495682740033511] rtol = 1e-07 @@ -243,9 +244,9 @@ end 16.590486289982852 76.40201283367323] rtol = 1e-07 end -@testset "GLM: Gamma with InverseLink link - AnalyticWeights with $dmethod method" for dmethod ∈ (:cholesky, :qr) +@testset "GLM: Gamma with InverseLink link - AnalyticWeights with $dmethod method" for (dmethod, drop) ∈ itr model = glm(@formula(lot1 ~ 1 + u), clotting, Gamma(), InverseLink(), - wts=aweights(clotting.w), method=dmethod, atol=1e-09, rtol=1e-09) + wts=aweights(clotting.w), method=dmethod, dropcollinear=drop, atol=1e-09, rtol=1e-09) @test deviance(model) ≈ 0.03933389380881642 rtol = 1e-07 @test loglikelihood(model) ≈ -43.359078787690514 rtol = 1e-07 @test coef(model) ≈ [-0.017217012596343607, 0.015649040406186487] rtol = 1e-07 @@ -263,9 +264,9 @@ end 262.77277766267576 1210.113361381432] rtol = 1e-07 end -@testset "GLM: InverseGaussian with InverseSquareLink link - AnalyticWeights with $dmethod method" for dmethod ∈ (:cholesky, :qr) +@testset "GLM: InverseGaussian with InverseSquareLink link - AnalyticWeights with $dmethod method" for (dmethod, drop) ∈ itr model = glm(@formula(lot1 ~ 1 + u), clotting, InverseGaussian(), InverseSquareLink(), - wts=aweights(clotting.w), method=dmethod, atol=1e-09, rtol=1e-09) + wts=aweights(clotting.w), method=dmethod, dropcollinear=drop, atol=1e-09, rtol=1e-09) @test deviance(model) ≈ 0.021377370485120707 rtol = 1e-07 @test loglikelihood(model) ≈ -86.82546665077861 rtol = 1e-07 @test coef(model) ≈ [-0.0012633718975150973, 0.0008126490405747128] rtol = 1e-07 @@ -283,11 +284,10 @@ end 8424.676595366931 38797.069483575455] rtol = 1e-06 end -@testset "GLM: NegativeBinomial with LogLink link - AnalyticWeights - method: dmethod" for dmethod ∈ (:cholesky, :qr) +@testset "GLM: NegativeBinomial with LogLink link - AnalyticWeights - method: dmethod" for (dmethod, drop) ∈ itr model = glm(@formula(Days ~ Eth + Sex + Age + Lrn), quine, NegativeBinomial(2), LogLink(), wts=aweights(quine.aweights), method=dmethod, - atol=1e-08, rtol=1e-08) - + dropcollinear=drop, atol=1e-08, rtol=1e-08) @test deviance(model) ≈ 624.7631999565588 rtol = 1e-07 @test loglikelihood(model) ≈ -2004.5939464322778 rtol = 1e-07 @test coef(model) ≈ [3.02411915515531, -0.4641576651688563, 0.0718560942992554, @@ -449,10 +449,10 @@ end ] rtol = 1e-04 end -@testset "GLM: NegativeBinomial with LogLink link - AnalyticWeights with $dmethod method" for dmethod ∈ (:cholesky, :qr) +@testset "GLM: NegativeBinomial with LogLink link - AnalyticWeights with $dmethod method" for (dmethod, drop) ∈ itr model = glm(@formula(Days ~ Eth + Sex + Age + Lrn), quine, NegativeBinomial(2), LogLink(), wts=aweights(quine.aweights), - method=dmethod, rtol=1e-08, atol=1e-08) + method=dmethod, dropcollinear=drop, rtol=1e-08, atol=1e-08) @test deviance(model) ≈ 624.7631999565588 rtol = 1e-07 @test loglikelihood(model) ≈ -2004.5939464322778 rtol = 1e-07 @test coef(model) ≈ [3.02411915515531, -0.4641576651688563, 0.0718560942992554, @@ -613,10 +613,10 @@ end ] rtol = 1e-04 end -@testset "GLM: NegativeBinomial with SqrtLink link - AnalyticWeights with $dmethod method" for dmethod ∈ (:cholesky, :qr) +@testset "GLM: NegativeBinomial with SqrtLink link - AnalyticWeights with $dmethod method" for (dmethod, drop) ∈ itr model = glm(@formula(Days ~ Eth + Sex + Age + Lrn), quine, NegativeBinomial(2), - SqrtLink(), wts=aweights(quine.aweights), method=dmethod, - rtol=1e-08, atol=1e-09) + SqrtLink(), wts=aweights(quine.aweights), + method=dmethod, dropcollinear=drop, rtol=1e-08, atol=1e-09) @test deviance(model) ≈ 626.6464732988984 rtol = 1e-07 @test loglikelihood(model) ≈ -2005.5355831034462 rtol = 1e-07 @test coef(model) ≈ [4.733877229152363, -1.007977895471349, 0.02522392818548873, diff --git a/test/probability_weights.jl b/test/probability_weights.jl index 81f07970..ed21f416 100644 --- a/test/probability_weights.jl +++ b/test/probability_weights.jl @@ -28,7 +28,10 @@ dobson = DataFrame( dobson.pweights = size(dobson, 1) .* (dobson.w ./ sum(dobson.w)) -@testset "Linear Model ftest/loglikelihod with $dmethod method" for dmethod ∈ (:cholesky, :qr) +itr = Iterators.product((:qr, :cholesky), (true, false)) + + +@testset "Linear Model ftest/loglikelihod with $dmethod method with dropcollinear=$drop" for (dmethod, drop) ∈ itr model_1 = lm(@formula(y ~ x1 + x2), df; wts=pweights(df.pweights), method = dmethod) X = hcat(ones(length(df.y)), df.x1, df.x2) model_2 = lm(X, y; wts=pweights(df.pweights)) @@ -38,7 +41,7 @@ dobson.pweights = size(dobson, 1) .* (dobson.w ./ sum(dobson.w)) @test_throws ArgumentError loglikelihood(model_2) end -@testset "GLM: Binomial with LogitLink link - ProbabilityWeights with $dmethod method" for dmethod ∈ (:cholesky, :qr) +@testset "GLM: Binomial with LogitLink link - ProbabilityWeights with $dmethod method with dropcollinear=$drop" for (dmethod, drop) ∈ itr model = glm( @formula(y ~ 1 + x1 + x2), df, @@ -46,6 +49,7 @@ end LogitLink(), wts = pweights(df.pweights), method = dmethod, + dropcollinear = drop, rtol = 1e-07, ) @test_throws ArgumentError loglikelihood(model) @@ -56,7 +60,7 @@ end @test stderror(model) ≈ [1.07077535201799, 1.4966446912323, 0.7679252464101] rtol = 1e-05 end -@testset "GLM: Binomial with ProbitLink link - ProbabilityWeights with $dmethod method" for dmethod ∈ (:cholesky, :qr) +@testset "GLM: Binomial with ProbitLink link - ProbabilityWeights with $dmethod method with dropcollinear=$drop" for (dmethod, drop) ∈ itr model = glm( @formula(y ~ 1 + x1 + x2), df, @@ -64,6 +68,7 @@ end ProbitLink(), wts = pweights(df.pweights), method = dmethod, + dropcollinear = drop, rtol = 1e-09, ) @test_throws ArgumentError loglikelihood(model) @@ -74,7 +79,7 @@ end @test stderror(model) ≈ [0.6250657160317, 0.851366312489, 0.4423686640689] rtol = 1e-05 end -@testset "GLM: Binomial with CauchitLink link - ProbabilityWeights with $dmethod method" for dmethod ∈ (:cholesky, :qr) +@testset "GLM: Binomial with CauchitLink link - ProbabilityWeights with $dmethod method with dropcollinear=$drop" for (dmethod, drop) ∈ itr model = glm( @formula(y ~ 1 + x1 + x2), df, @@ -82,6 +87,7 @@ end CauchitLink(), wts = pweights(df.pweights), method = dmethod, + dropcollinear = drop, rtol = 1e-07, ) @test_throws ArgumentError loglikelihood(model) @@ -92,7 +98,7 @@ end @test stderror(model) ≈ [1.020489214335, 1.5748610330014, 1.5057621596148] rtol = 1e-03 end -@testset "GLM: Binomial with CloglogLink link - ProbabilityWeights with $dmethod method" for dmethod ∈ (:cholesky, :qr) +@testset "GLM: Binomial with CloglogLink link - ProbabilityWeights with $dmethod method with dropcollinear=$drop" for (dmethod, drop) ∈ itr model = glm( @formula(y ~ 1 + x1 + x2), df, @@ -100,6 +106,7 @@ end CloglogLink(), wts = pweights(df.pweights), method = dmethod, + dropcollinear = drop, rtol = 1e-09, ) @test_throws ArgumentError loglikelihood(model) @@ -111,7 +118,7 @@ end @test stderror(model) ≈ [0.647026270959, 0.74668663622095, 0.49056337945919] rtol = 1e-04 end -@testset "GLM: Gamma with LogLink link - ProbabilityWeights with $dmethod method" for dmethod ∈ (:cholesky, :qr) +@testset "GLM: Gamma with LogLink link - ProbabilityWeights with $dmethod method with dropcollinear=$drop" for (dmethod, drop) ∈ itr model = glm( @formula(lot1 ~ 1 + u), clotting, @@ -119,6 +126,7 @@ end LogLink(), wts = pweights(clotting.pweights), method = dmethod, + dropcollinear = drop, rtol = 1e-12, atol = 1e-9, ) @@ -131,7 +139,7 @@ end @test stderror(model) ≈ [0.2651749940925478, 0.06706321966020713] rtol = 1e-07 end -@testset "GLM: NegativeBinomial(2) with LogLink link - ProbabilityWeights with $dmethod method" for dmethod ∈ (:cholesky, :qr) +@testset "GLM: NegativeBinomial(2) with LogLink link - ProbabilityWeights with $dmethod method with dropcollinear=$drop" for (dmethod, drop) ∈ itr model = glm( @formula(Days ~ Eth + Sex + Age + Lrn), quine, @@ -139,6 +147,7 @@ end LogLink(), wts = pweights(quine.pweights), method = dmethod, + dropcollinear = drop, atol = 1e-09, ) @test_throws ArgumentError loglikelihood(model) @@ -174,7 +183,7 @@ end ] rtol = 1e-04 end -@testset "GLM: with LogLink link - ProbabilityWeights with $dmethod method" for dmethod ∈ (:cholesky, :qr) +@testset "GLM: with LogLink link - ProbabilityWeights with $dmethod method with dropcollinear=$drop" for (dmethod, drop) ∈ itr model = glm( @formula(Days ~ Eth + Sex + Age + Lrn), quine, @@ -182,6 +191,7 @@ end LogLink(), wts = pweights(quine.pweights), method = dmethod, + dropcollinear = drop, rtol = 1e-09, ) @test_throws ArgumentError loglikelihood(model) @@ -208,7 +218,7 @@ end ] rtol = 1e-04 end -@testset "GLM: NegaiveBinomial(2) with SqrtLink link - ProbabilityWeights with $dmethod method" for dmethod ∈ (:cholesky, :qr) +@testset "GLM: NegaiveBinomial(2) with SqrtLink link - ProbabilityWeights with $dmethod method with dropcollinear=$drop" for (dmethod, drop) ∈ itr model = glm( @formula(Days ~ Eth + Sex + Age + Lrn), quine, @@ -216,6 +226,7 @@ end SqrtLink(), wts = pweights(quine.pweights), method = dmethod, + dropcollinear = drop, rtol = 1e-08, ) @test_throws ArgumentError loglikelihood(model) @@ -243,7 +254,7 @@ end ] rtol = 1e-04 end -@testset "GLM: Poisson with LogLink link - ProbabilityWeights with $dmethod method" for dmethod ∈ (:cholesky, :qr) +@testset "GLM: Poisson with LogLink link - ProbabilityWeights with $dmethod method with dropcollinear=$drop" for (dmethod, drop) ∈ itr model = glm( @formula(Counts ~ 1 + Outcome + Treatment), dobson, @@ -251,6 +262,7 @@ end LogLink(), wts = pweights(dobson.pweights), method = dmethod, + dropcollinear = drop, ) @test_throws ArgumentError loglikelihood(model) @test deviance(model) ≈ 4.837327189925912 rtol = 1e-07 From 2730277ec7f31635788afec1c688b57feada2d6f Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Thu, 12 Dec 2024 13:50:32 +0100 Subject: [PATCH 093/106] Minor cosmethic changes --- src/glmfit.jl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/glmfit.jl b/src/glmfit.jl index 8399394b..a6009469 100644 --- a/src/glmfit.jl +++ b/src/glmfit.jl @@ -320,7 +320,6 @@ function loglikelihood(r::GlmResp{T,D,L,<:AbstractWeights}) where {T,D,L} end elseif wts isa AnalyticWeights @inbounds for i in eachindex(y, mu, wts) - #ll += loglik_obs(d, y[i], mu[i], wts[i], ϕ) ll += loglik_apweights_obs(d, y[i], mu[i], wts[i], δ, wts.sum, N) end else @@ -888,7 +887,7 @@ function residuals(r::GlmResp; weighted::Bool=false) @inbounds for i in eachindex(y, μ) μi = μ[i] yi = y[i] - dres[i] = sqrt(max(0, devresid(r.d, yi, μi)))*sign(yi-μi) + dres[i] = sqrt(max(0, devresid(r.d, yi, μi))) * sign(yi-μi) end if weighted From cdeb1a338d7c1a3382d1d693c6fa26c74c7e8db2 Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Thu, 12 Dec 2024 13:54:55 +0100 Subject: [PATCH 094/106] Add weighting information in COMMON_FIT_KWARGS_DOCS --- src/GLM.jl | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/src/GLM.jl b/src/GLM.jl index 313d0ee8..c22de932 100644 --- a/src/GLM.jl +++ b/src/GLM.jl @@ -111,17 +111,14 @@ module GLM If `method=:qr`, then the `QR` decomposition method (which is more stable but slower) will be used. - `wts::AbstractWeights`: Weights of observations. - The weights can be of type `AnalyticWeights`, `FrequencyWeights`, `ProbabilityWeights`, or `UnitWeights`. - - `AnalyticWeights` describe a non-random relative importance (usually between 0 and 1) for - each observation. These weights may also be referred to as reliability weights, precision - weights or inverse variance weights. These are typically used when the observations being - weighted are aggregate values (e.g., averages) with differing variances. - - `FrequencyWeights` describe the number of times (or frequency) each observation was seen. - These weights may also be referred to as case weights or repeat weights. - - `ProbabilityWeights` represent the inverse of the sampling probability for each observation, - providing a correction mechanism for under- or over-sampling certain population groups. - These weights may also be referred to as sampling weights. - - `UnitWeights` all weights are equal to 1 (default). + The weights can be of type `AnalyticWeights`, `FrequencyWeights`, + `ProbabilityWeights`, or `UnitWeights`. `AnalyticWeights` describe a non-random + relative importance (usually between 0 and 1) for each observation. These weights may + also be referred to as reliability weights, precision weights or inverse variance weights. + `FrequencyWeights` describe the number of times (or frequency) each observation was seen. + `ProbabilityWeights` represent the inverse of the sampling probability for each observation, + providing a correction mechanism for under- or over-sampling certain population groups. `UnitWeights` + (default) describe the case in which all weights are equal to 1 (so no weighting takes place). - `contrasts::AbstractDict{Symbol}=Dict{Symbol,Any}()`: a `Dict` mapping term names (as `Symbol`s) to term types (e.g. `ContinuousTerm`) or contrasts (e.g., `HelmertCoding()`, `SeqDiffCoding(; levels=["a", "b", "c"])`, From 95d506e1a1a4145ea24fc75bf17fe83ec5e66033 Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Fri, 13 Dec 2024 14:00:59 +0100 Subject: [PATCH 095/106] Add test for leverage --- test/runtests.jl | 59 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/test/runtests.jl b/test/runtests.jl index 1012f0a6..64e78f20 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -2161,3 +2161,62 @@ end @test coef(ft) ≈ [9.648767705301294, -0.11274823562143056, 0.1907889126252095, -0.8123086879222496] @test_throws DomainError glm(@formula(Column1 ~ Column2 + Column3 + Column4), df, Gamma(), LogLink(), start = fill(NaN, 4)) end + +@testset "Testing various weighting specific weighting" begin + # Test probability weighting with collinear columns + form.CarbC = form.Carb + form.awts = [0.6, 0.3, 0.6, 0.3, 0.6, 0.3] + form.fwts = [6, 3, 6, 3, 6, 3] + lm0 = fit(LinearModel, @formula(OptDen ~ Carb), form; wts = aweights(form.awts), method=:qr) + lm1 = fit(LinearModel, @formula(OptDen ~ Carb + CarbC), form; wts = aweights(form.awts), method=:qr) + @test coef(lm0) == coef(lm1)[1:2] + @test stderror(lm0) == stderror(lm1)[1:2] + @test isnan(stderror(lm1)[3]) + lm0 = fit(LinearModel, @formula(OptDen ~ Carb), form; wts = pweights(form.awts), method=:qr) + lm1 = fit(LinearModel, @formula(OptDen ~ Carb + CarbC), form; wts = pweights(form.awts), method=:qr) + @test coef(lm0) == coef(lm1)[1:2] + @test stderror(lm0) == stderror(lm1)[1:2] + @test isnan(stderror(lm1)[3]) + lm0 = fit(LinearModel, @formula(OptDen ~ Carb), form; wts = fweights(form.fwts), method=:qr) + lm1 = fit(LinearModel, @formula(OptDen ~ Carb + CarbC), form; wts = fweights(form.fwts), method=:qr) + @test coef(lm0) == coef(lm1)[1:2] + @test stderror(lm0) == stderror(lm1)[1:2] + @test isnan(stderror(lm1)[3]) + ## Leverage with weights +end + +rng = StableRNG(123) +df = DataFrame(x_1 = randn(rng, 10), x_2 = randn(rng, 10), y = randn(rng, 10), ) +df.xx_1 = df.x_1 +df.xx_2 = df.x_2 +df.d = rand(rng, 0:1, 10) +frm0 = @formula(y ~ x_1 + x_2) +frm1 = @formula(y ~ x_1 + xx_2 + + x_2 + xx_1) +frmp0 = @formula(d ~ x_1 + x_2) +frmp1 = @formula(d ~ x_1 + xx_2 + + x_2 + xx_1) + +lev0 = [0.2346366962678214; 0.6633984457928059; 0.32460236947851806; + 0.1543698142163501; 0.3762092067703499; 0.4887705577596249; + 0.15170408132550545; 0.15279492673405848; 0.16851296355750492; + 0.28500093809746074] + +lev0_pr = [0.28923503046426724; 0.0006968399611682526; + 0.6040870179390236; 0.222176173808432; + 0.06247295465277078; 0.8226912702704338; + 0.21412449742521156; 0.2160509316358758; + 0.23269115629631995; 0.33577412754649705] + +@testset "Leverage" for method ∈ (:qr, :cholesky) begin + lm0 = fit(LinearModel, frm0, df, method=method) + lm1 = fit(LinearModel, frm1, df, method=method) + @test leverage(lm0) ≈ leverage(lm1) + @test lev0 ≈ leverage(lm1) + glm1 = fit(GeneralizedLinearModel, frm1, df, Normal(), IdentityLink(), method=method) + @test lev0 ≈ leverage(glm1) + probit0 = glm(frmp0, df, Binomial(), ProbitLink(), method=method) + probit = glm(frmp1, df, Binomial(), ProbitLink(), method=method) + + @test leverage(probit) ≈ leverage(probit0) + @test lev0_pr ≈ leverage(probit0) rtol = 1e-03 +end +end From f124589db242ecf524aa30bd09d8e8b3f34b442c Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Fri, 13 Dec 2024 14:01:24 +0100 Subject: [PATCH 096/106] [wip] work on leverage --- src/linpred.jl | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/src/linpred.jl b/src/linpred.jl index d282d99d..85c5366f 100644 --- a/src/linpred.jl +++ b/src/linpred.jl @@ -435,36 +435,35 @@ function modelmatrix(pp::LinPred; weighted::Bool=isweighted(pp)) return Z end -leverage(x::LinPredModel) = leverage(x.pp) -function leverage(pp::DensePredChol{T,<:CholeskyPivoted}) where T + +leverage(x::LinPredModel) = dropdims(leverage(x.pp, hasfield(typeof(x.rr), :wrkwt) ? x.rr.wrkwt : 1.0); dims = 2) + +function leverage(pp::DensePredChol{T,<:CholeskyPivoted}, w) where T X = modelmatrix(pp; weighted=isweighted(pp)) _, k = size(X) ch = pp.chol rnk = rank(ch) - p = ch.p - idx = invperm(p)[1:rnk] - sum(x -> x^2, view(X, :, 1:rnk)/ch.U[1:rnk, idx], dims=2) + p = invperm(ch.p)[1:rnk] + sum(x -> x^2, (sqrt.(w).*view(X, :, p))/ch.U[1:rnk, 1:rnk], dims=2) end -function leverage(pp::DensePredChol{T,<:Cholesky}) where T +function leverage(pp::DensePredChol{T,<:Cholesky}, w) where T X = modelmatrix(pp; weighted=isweighted(pp)) - sum(x -> x^2, X/pp.chol.U, dims=2) + sum(x -> x^2, (sqrt.(w).*X)/pp.chol.U, dims=2) end -function leverage(pp::DensePredQR{T,<:QRPivoted}) where T +function leverage(pp::DensePredQR{T,<:QRPivoted}, w) where T X = modelmatrix(pp; weighted=isweighted(pp)) - _, k = size(X) ch = pp.qr - rnk = length(ch.p) - p = ch.p - idx = invperm(p)[1:rnk] - sum(x -> x^2, view(X, :, 1:rnk)/ch.R[1:rnk, idx], dims=2) + rnk = rank(ch.R) + p = ch.p[1:rnk] + sum(x -> x^2, (sqrt.(w).*view(X, :, p))/ch.R[1:rnk, 1:rnk], dims=2) end -function leverage(pp::DensePredQR{T,<:QRCompactWY}) where T +function leverage(pp::DensePredQR{T,<:QRCompactWY}, w) where T X = modelmatrix(pp; weighted=isweighted(pp)) - sum(x -> x^2, X/pp.qr.R, dims=2) + sum(x -> x^2, (sqrt.(w).*X)/pp.qr.R, dims=2) end response(obj::LinPredModel) = obj.rr.y From cbdadbc2c480ef96eb0e6b1132ee835019b68d52 Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Fri, 13 Dec 2024 14:01:34 +0100 Subject: [PATCH 097/106] Use inverse --- docs/Project.toml | 4 ++++ src/lm.jl | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/docs/Project.toml b/docs/Project.toml index 60fd6748..36455e8a 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -1,13 +1,17 @@ [deps] +CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597" DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" GLM = "38e38edf-8417-5370-95a0-9cbb8c7f171a" Optim = "429524aa-4258-5aef-a3af-852621145aeb" +RCall = "6f49c342-dc21-5d91-9882-a32aef131414" RDatasets = "ce6b1742-4840-55fa-b093-852dadbb1d8b" +Revise = "295af30f-e4ad-537b-8983-00126c2a3abe" StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" +StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c" StatsModels = "3eaba693-59b7-5ba5-a881-562e759f1c8d" [compat] diff --git a/src/lm.jl b/src/lm.jl index f1d55572..0dd86cdf 100644 --- a/src/lm.jl +++ b/src/lm.jl @@ -388,7 +388,7 @@ function momentmatrix(m::LinearModel) end end -invloglikhessian(m::LinearModel) = invchol(m.pp) +invloglikhessian(m::LinearModel) = inverse(m.pp) function varstruct(x::LinearModel) wrkwt = working_weights(x) From 2386ab9ad6d400ac99c1b21ae2538fc969430b45 Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Fri, 13 Dec 2024 20:11:04 +0100 Subject: [PATCH 098/106] Test leverage --- src/glmtools.jl | 16 ++++++++++++++++ src/linpred.jl | 43 +++++++++++++++++++++++-------------------- test/runtests.jl | 35 ++++++++++++++++++++++++++++++----- 3 files changed, 69 insertions(+), 25 deletions(-) diff --git a/src/glmtools.jl b/src/glmtools.jl index 03a608e6..0118d456 100644 --- a/src/glmtools.jl +++ b/src/glmtools.jl @@ -555,3 +555,19 @@ loglik_apweights_obs(::InverseGaussian, y, μ, wt, ϕ, sumwt, n) = -(wt*(1 + log loglik_apweights_obs(::Normal, y, μ, wt, ϕ, sumwt, n) = ((-log(2π*ϕ/n) - 1) + log(wt))/2 loglik_apweights_obs(::Poisson, y, μ, wt, ϕ, sumwt, n) = wt*logpdf(Poisson(μ), y) loglik_apweights_obs(d::NegativeBinomial, y, μ, wt, ϕ, sumwt, n) = wt*logpdf(NegativeBinomial(d.r, d.r/(μ+d.r)), y) + +## Studentized Pearson residuals +function pearson_residuals(m::GeneralizedLinearModel) + r = m.rr + wts = r.wts + y, η, μ = r.y, r.eta, r.mu + h = leverage(m) + sqrt(dispersion(m)).*((y .- μ).*sqrt.(wts)) ./ sqrt.(dispersion(m).*glmvar.(r.d, μ)) +end + +function ccooksdistance(m::GeneralizedLinearModel) + h = leverage(m) + hh = h./(1.0.-h).^2 + Rp = pearson_residuals(m) + (Rp.^2).*hh./(dispersion(m)^2*dof(m)) +end diff --git a/src/linpred.jl b/src/linpred.jl index 85c5366f..b2257d9d 100644 --- a/src/linpred.jl +++ b/src/linpred.jl @@ -435,35 +435,38 @@ function modelmatrix(pp::LinPred; weighted::Bool=isweighted(pp)) return Z end +function leverage(x::LinPredModel) + h = leverage(x.pp) + #return h + hasfield(typeof(x.rr), :wrkwt) ? x.rr.wrkwt.*h : x.rr.wts.*h +end - -leverage(x::LinPredModel) = dropdims(leverage(x.pp, hasfield(typeof(x.rr), :wrkwt) ? x.rr.wrkwt : 1.0); dims = 2) - -function leverage(pp::DensePredChol{T,<:CholeskyPivoted}, w) where T - X = modelmatrix(pp; weighted=isweighted(pp)) - _, k = size(X) - ch = pp.chol - rnk = rank(ch) - p = invperm(ch.p)[1:rnk] - sum(x -> x^2, (sqrt.(w).*view(X, :, p))/ch.U[1:rnk, 1:rnk], dims=2) +function leverage(pp::DensePredChol{T,<:CholeskyPivoted}) where T + X = modelmatrix(pp; weighted=false) + rnk = rank(pp.chol) + A = GLM.inverse(pp) + p = pp.chol.p[1:rnk] + diag(X[:,p]*A[p,p]*X[:,p]') + # sum(x->x^2, view(X, :, p)/view(pp.chol.U, p, p), dims=2) end -function leverage(pp::DensePredChol{T,<:Cholesky}, w) where T - X = modelmatrix(pp; weighted=isweighted(pp)) - sum(x -> x^2, (sqrt.(w).*X)/pp.chol.U, dims=2) +function leverage(pp::DensePredChol{T,<:Cholesky}) where T + X = modelmatrix(pp; weighted=false) + @show X/pp.chol.U + sum(x -> x^2, X/pp.chol.U, dims=2) end -function leverage(pp::DensePredQR{T,<:QRPivoted}, w) where T - X = modelmatrix(pp; weighted=isweighted(pp)) +function leverage(pp::DensePredQR{T,<:QRPivoted}) where T + X = modelmatrix(pp; weighted=false) ch = pp.qr rnk = rank(ch.R) - p = ch.p[1:rnk] - sum(x -> x^2, (sqrt.(w).*view(X, :, p))/ch.R[1:rnk, 1:rnk], dims=2) + p = invperm(ch.p)[1:rnk] + sum(x -> x^2, view(X, :, 1:rnk)/view(ch.R, p, p), dims=2) end -function leverage(pp::DensePredQR{T,<:QRCompactWY}, w) where T - X = modelmatrix(pp; weighted=isweighted(pp)) - sum(x -> x^2, (sqrt.(w).*X)/pp.qr.R, dims=2) +function leverage(pp::DensePredQR{T,<:QRCompactWY}) where T + X = modelmatrix(pp; weighted=false) + sum(x -> x^2, X/pp.qr.R, dims=2) end response(obj::LinPredModel) = obj.rr.y diff --git a/test/runtests.jl b/test/runtests.jl index 64e78f20..0cb468b3 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -2175,12 +2175,12 @@ end lm0 = fit(LinearModel, @formula(OptDen ~ Carb), form; wts = pweights(form.awts), method=:qr) lm1 = fit(LinearModel, @formula(OptDen ~ Carb + CarbC), form; wts = pweights(form.awts), method=:qr) @test coef(lm0) == coef(lm1)[1:2] - @test stderror(lm0) == stderror(lm1)[1:2] + @test stderror(lm0) ≈ stderror(lm1)[1:2] @test isnan(stderror(lm1)[3]) lm0 = fit(LinearModel, @formula(OptDen ~ Carb), form; wts = fweights(form.fwts), method=:qr) lm1 = fit(LinearModel, @formula(OptDen ~ Carb + CarbC), form; wts = fweights(form.fwts), method=:qr) - @test coef(lm0) == coef(lm1)[1:2] - @test stderror(lm0) == stderror(lm1)[1:2] + @test coef(lm0) ≈ coef(lm1)[1:2] + @test stderror(lm0) ≈ stderror(lm1)[1:2] @test isnan(stderror(lm1)[3]) ## Leverage with weights end @@ -2190,6 +2190,7 @@ df = DataFrame(x_1 = randn(rng, 10), x_2 = randn(rng, 10), y = randn(rng, 10), ) df.xx_1 = df.x_1 df.xx_2 = df.x_2 df.d = rand(rng, 0:1, 10) +df.w = rand(rng, 10) frm0 = @formula(y ~ x_1 + x_2) frm1 = @formula(y ~ x_1 + xx_2 + + x_2 + xx_1) frmp0 = @formula(d ~ x_1 + x_2) @@ -2206,7 +2207,7 @@ lev0_pr = [0.28923503046426724; 0.0006968399611682526; 0.21412449742521156; 0.2160509316358758; 0.23269115629631995; 0.33577412754649705] -@testset "Leverage" for method ∈ (:qr, :cholesky) begin +@testset "Leverage unweighted" for method ∈ (:qr, :cholesky) lm0 = fit(LinearModel, frm0, df, method=method) lm1 = fit(LinearModel, frm1, df, method=method) @test leverage(lm0) ≈ leverage(lm1) @@ -2215,8 +2216,32 @@ lev0_pr = [0.28923503046426724; 0.0006968399611682526; @test lev0 ≈ leverage(glm1) probit0 = glm(frmp0, df, Binomial(), ProbitLink(), method=method) probit = glm(frmp1, df, Binomial(), ProbitLink(), method=method) - @test leverage(probit) ≈ leverage(probit0) @test lev0_pr ≈ leverage(probit0) rtol = 1e-03 end + + +lev0 = [0.4546669409864052, 0.39220506613766826, 0.31067464842874659, + 0.16105201633463462, 0.45458434896240396, 0.43751245519667181, + 0.12193399045053441, 0.12312180988271218, 0.22090225888834489, + 0.32334646473187784] + +lev0_pr = [0.28660353231987495, 2.6405172015486347e-05, 0.62180044570022475, + 0.25772930451725845, 0.058608663568656121, 0.78417628710278586, + 0.16615273053689983, 0.16787702318659792, 0.30440874803670093, + 0.35261685985898611] + +@testset "Leverage weighted" for method ∈ (:qr, :cholesky) + lm0 = fit(LinearModel, frm0, df, method=method, wts=df.w) + lm1 = fit(LinearModel, frm1, df, method=method, wts=df.w) + @test leverage(lm0) ≈ leverage(lm1) + @test lev0 ≈ leverage(lm1) + glm1 = fit(GeneralizedLinearModel, frm1, df, Normal(), IdentityLink(), method=method, wts=df.w) + @test lev0 ≈ leverage(glm1) + probit0 = glm(frmp0, df, Binomial(), ProbitLink(), method=method, wts=df.w) + probit = glm(frmp1, df, Binomial(), ProbitLink(), method=method, wts=df.w) + @test leverage(probit) ≈ leverage(probit0) + @test lev0_pr ≈ leverage(probit0) rtol = 1e-03 end + + From 36326ffa57f496aeb3711d92e6180463c464ba8d Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Fri, 13 Dec 2024 20:15:32 +0100 Subject: [PATCH 099/106] Comment cookdistance --- .vscode/settings.json | 2 ++ clusters_clt.tex | 24 ++++++++++++++++++++++++ row2col2.png | Bin 0 -> 9602 bytes row2col2.tex | 15 +++++++++++++++ src/glmtools.jl | 28 ++++++++++++++-------------- test.tex | 20 ++++++++++++++++++++ test/junk.jl | 30 ++++++++++++++++++++++++++++++ 7 files changed, 105 insertions(+), 14 deletions(-) create mode 100644 .vscode/settings.json create mode 100644 clusters_clt.tex create mode 100644 row2col2.png create mode 100644 row2col2.tex create mode 100644 test.tex create mode 100644 test/junk.jl diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 00000000..2c63c085 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,2 @@ +{ +} diff --git a/clusters_clt.tex b/clusters_clt.tex new file mode 100644 index 00000000..bab96e09 --- /dev/null +++ b/clusters_clt.tex @@ -0,0 +1,24 @@ +\setlength{\LTpost}{0mm} +\begin{longtable}{l|rrrrrrrr} +\toprule +\multicolumn{1}{l}{} & \multicolumn{2}{c}{\emph{G} = 50} & \multicolumn{2}{c}{\emph{G} = 100} & \multicolumn{2}{c}{\emph{G} = 150} & \multicolumn{2}{c}{\emph{G} = 200} \\ +\cmidrule(lr){2-3} \cmidrule(lr){4-5} \cmidrule(lr){6-7} \cmidrule(lr){8-9} +\multicolumn{1}{l}{} & \(\frac{\sqrt{G}\bar{X}_{n}}{\bar{\sigma}_{n}}\) & \(\frac{\sqrt{G}\bar{X}_{n}}{\hat{\sigma}_{n}}\) & \(\frac{\sqrt{G}\bar{X}_{n}}{\bar{\sigma}_{n}}\) & \(\frac{\sqrt{G}\bar{X}_{n}}{\hat{\sigma}_{n}}\) & \(\frac{\sqrt{G}\bar{X}_{n}}{\bar{\sigma}_{n}}\) & \(\frac{\sqrt{G}\bar{X}_{n}}{\hat{\sigma}_{n}}\) & \(\frac{\sqrt{G}\bar{X}_{n}}{\bar{\sigma}_{n}}\) & \(\frac{\sqrt{G}\bar{X}_{n}}{\hat{\sigma}_{n}}\) \\ +\midrule\addlinespace[2.5pt] +\multicolumn{9}{l}{\(\alpha=1.5, \beta=1.9\)} \\ +\midrule\addlinespace[2.5pt] +10\% & $0.03$ & $0.06$ & $0.04$ & $0.03$ & $0.03$ & $0.04$ & $0.05$ & $0.06$ \\ +5\% & $0.02$ & $0.03$ & $0.02$ & $0.01$ & $0.00$ & $0.01$ & $0.04$ & $0.06$ \\ +1\% & $0.01$ & $0.00$ & $0.01$ & $0.00$ & $0.00$ & $0.01$ & $0.01$ & $0.02$ \\ +\midrule\addlinespace[2.5pt] +\multicolumn{9}{l}{\(\alpha=1.5, \beta=2.1\)} \\ +\midrule\addlinespace[2.5pt] +10\% & $0.05$ & $0.03$ & $0.03$ & $0.08$ & $0.03$ & $0.03$ & $0.02$ & $0.08$ \\ +5\% & $0.02$ & $0.02$ & $0.01$ & $0.05$ & $0.02$ & $0.01$ & $0.02$ & $0.05$ \\ +1\% & $0.01$ & $0.00$ & $0.00$ & $0.02$ & $0.00$ & $0.00$ & $0.00$ & $0.01$ \\ +\bottomrule +\end{longtable} +\begin{minipage}{\linewidth} +The table shows the rejection rates for \(G=\{50,100,150,200\}\) and \(\alpha=1.5\) and \(\beta=1.9\) and \(\beta=2.1\). The Monte Carlo is based on 10,000 simulations. The simulation standard errors are: 0.009 for \(\alpha=10\%\), 0.007 for \(\alpha=5\%\), and 0.0031 for \(\alpha=1\%\).\\ +\end{minipage} + diff --git a/row2col2.png b/row2col2.png new file mode 100644 index 0000000000000000000000000000000000000000..d6fdbe01c5375271d27000bdcc79b8bfd8601361 GIT binary patch literal 9602 zcmc(lcT`htx8S4jii&^~DI!HsPQ32^lZ_*)=F4DV#^gtA;p($0Q6G9atEeIq; zIspM8N)1Q|y-x6bzxmc(>)x3=f6SV-lALpL_H*`==j`^|C-G189@AXCc@+Qv&}eF? z8Ug^666F1=%ar6#PwihX$s0=W1KkGzKrNK|*#0l_F}I_Jp)LRr%m)Cxd<_5`lD~Sn z3;_6v0RXFZ0DwX!0KoD*r^P^teBz>m_G4ARSu7NdBp+RQu3_N=0MK@yzb~W-(z27k zr1I6&Rij#_zClHG&%!H78vwZTSX1?Zap2T??z-QC)_6m8@C4<1e=s-rWe5+eJcUy7 z``Z+s^(etZY(J8JT+6+Xb2+br?ThL+8Me_QircT-zdgR9x!+zNES>&jY_@f^LHpBi zdI!(z>klu_=bTL-EBQLR? zOFsLHM&f+z(AArN54a)q^;}L>@Y$Plxj55r+>gF9ft*ZHH${(K+!sL(iy$VBJsmq4 z)bA<1+ug4cVYyk&7MW>RS{jT;AFu&?lI)dK_8s=@F4_=yU*fi&1=y)m63kzC1z1nt ze=YaqBTk;Wm4dNmq3?8c%GBTnjYK!b-hH3itt&s)RtuC2E}b`k(vfGi%yM;xTv$*4>+4D0g%*-1e6gFYsvmh#C^`etL6mmAbYd!!$z}>Z=|6?3Ti2#u z+*aZDlyXxF-zrKoex&Lq(f#J~%d?Ao|IU;55=vghu=hS>YoG%u2>}6dqY?tHfL5 zRa!lz>^0P~9dbH?Sa}$#b<=Gef{P9i=d)Ji{Z#lMhV{BXmYUQBmZUgB2}Zo8D%H7t z{Fi^zi{RVnqzBKH1eF&3*5j||N0q>&dGk6Tm`RZ*CPkc!R+( z@iV6Z<%pigGxxxd``;EZ`467GW?uIhm7M~S;F`qO|Q*gPuc zSk7mmC6LSJB-3KEtG8E2G~isd+L2tQ*oRpA&u)+h}Y@}!B_R?=5}9x z{4((5(^ja@yHbGO(v=yW4o?p&OH;KutEn{A`B4CiJzJAvT0r zolJ>^jIfLFJ9ZxV3%!n|@|vrmWg8?fUk;sfbM3B?XJ^Jb*mrz)ut)bhbo4G*2?e4@ zdZA#3TS%~ds8Hp1(}s=XsV?kn-zAT!dY6$B-D$tZ0t%aBY|F3xc-{Tju*i%cCFQS~ zPaNNPmu=*DJ$v&(H(Q1*yW`L*Wz7J1C9e}gAmT1WY`tUo0uLemhTy4S{C@3E0tRLO zcoMY;jf7H7_56~U&*u~8aM;U$**a+KEr#xmPQzVk_g3QogY|RQTh7$h$=HtOWu zrxT_0&kec7ubC@4X8ok~+yfQ(LzsWn!(QD@d_G5Ln!3O>AatA~$Ehe8$L#1?UIDph zOTNxfz~dXP8^K#zP=pID!g;bafTX`Z`%99?uJ@yx;$dtSyhc&%f*{Fh2bCr0$ZS_? z7hWrUjc8_Sr2!ApfX_(yVq9B_ZV<9hUZIXHbS?u3Fxk`PsZ9MoSwJhrMypJ*z6QKm z18yz(46~{i(zjE>N&JusCrDV_+G>PSwqQ8twMsg-LUbFNVM{I5(*3z+n8#Zp1ZmL z1V>LG!om_EQi2%&wwXN84znp$rnq){jt!7&V`u-zsSl7ggM&>>`*c`fXSXjA`0~IG z%+=CRBUtqJoRl%ED-34e{XG7Mm!=vDO^d98OdwqHBh8PKz$%^KjUha2*j;4Uz43c0 zd4ZpI+}wl$HDbgi$P*`Y3KnQiEY2wP?a$4VqviGi+Ke#l&Ur4Jb1wXHhSYhyDSy+4 zQ@zk;qQOsFK?2k*nOV!0-FH3zfRnjzf10EEnCc5;Gt=M0HiCzd2cre&@Ckv9!LB|2 z6ATbDuZUR^Ja!oN2wY=T<(h$$iUYDc!o1SNE0?q%d%qxp^mSlqBV*Wo<*NqVY_UPA z{9g%Zf#!|0KWvzHWSp zn9T(9H7@*w%E;Q(uGRbSWAydtqPd=FkUO%fHbf5L?Dq$n(hm+FMtb2Feo0mk7Obe?g}bhWMKI7!^Ss8pw9Io`WZ zX9VvusgPn<6vEgfAUQTsacqu4r+)oTI@iN3#p=v2n@5K!*gb9E%su_oBk)(`Qh|Yw-HN+ zDnBi=ElhDT)n3Bo2@xXw>SuWF5NdWADt5j`->t0{J`+oib?QsJVz!gXbUBALY5iRMk$Fg*J1Po3$OZk%i&U`qw z;#_OhpZ6v)Cj zM!CDHFaa`@yRkvZ>KPLiXBed#`#$!wi$7=!#Ag&)u^X7kwy+D5pgu=??=m>NUl42& z=*ht)vFnR}lr;?RckP+7zEPV24fjQ{LNg|@jHj++`Ij=UK<#{lS5!{bwoTx&pUhnv z*7mtP*4~0*WvuI{PN{c447O~RutLF{`yFv-#heZ;6k;lj%z#}M6Heb2!P?UO!Im#Y0`G=FrqKi?;ps#Y1 zvHL7TW|-NMzt>Wr7znGZ<7_;l;j;2f@+h>F80dy7Z~b)a`pv<`mrb;{Pp=n^nDIp6 z%9L*`)K#uL3f5m!sks7l7d6i|GCiL5u*kM`Jz0LdbBpsesX&=zey8eNltA|xz9}US z(^u8rgEWlz9S@2vHg_?FRdStu(9PJUY{kU|?9wY}R}7P|sq|v9UOR@A z;L*1tB-%5v7jP)Zlk%rDs$?~Ka2BMK(k5%01dM`49 zGxeTT7^<>u*j*oQJCUkh<_KIG_jgL}b2?h0n5}KR#=C%+jrumd9Fe@gHxQLlT#jvm zo59vSj@K@6x3_P12Ag@9t#TG>{}x|H*Xf~t(h#gok;xlg+rY=iu}x)Ix%xlS+A7u# z7vbVuC|OCkc(_~q6Ez9~yVZysDNN8+gk4+3l2_3xn_$4s()?T8BSbM=->Y=#tw)JLn!G7~+4{DbNtAs*x z6@645;q|O1<4}USh8U)d>yg^rU-^Ow`2*7p=UGH9VJyX)BhmFP{ipL(<{7UQ1$ib` z8};n2`q-__^F+GOG^!;y%YA}q;_12K*z@YD*KHxIf(eBFqwX-pYxFz05*qN6_fHi^ z!vgxKM>dVVC{2Y>RoM&)!5mQAe!qN?m(gFHy-o0*pFX=Eqv417KHp%s7G63jaHSKqcMR6Y`@aDd<4U}NB#I*3B5YkD{&68CH)%Oq zc-${siZ3?wd0^m1WZ$e^s1``Ode%Qr%(aq7GidSPTW*yG7l?N7GG%kwKtW{r=HPh zel=+jm!NI9#gPb-R1^FiyDM5CH*M?%%Tmr!nL?b97eL7o^8nd{z#Npu4zr2?N}%)NCsIjLM|};x+7k)ukadX-C@X$mF-R-NnUyC9%aU{#Lkc!K1n7?R>oI1@e+> zC#B6%BRA8YssC1y2Kyxy5nd@vf5wiZCw;I2($EZr^j}u3c}xh0W@dl8IavNq6bCQo zo={1J57_;zM*qO3)hnuV610=GrJQ5zqRCFG@E@Zcu8^`~c9EgZfcbfMtpv043Ncb= z?_+XhY{d1Y=z6balX5@W9Cy)3DXDZSCqB1v&&eV3%2^DshUC!r1gzbFLg8`IaF9E+ z8X@RS_&J@Xblto} zu81j*R;esLIS$064~JwV=Z3bX^Q%>6Dlv=BrG*7Wq^_crd!h2>1T}M)4>qMVbqhA5 zV6C0a0$X#&6l9HqsjM3nIr4ch?A-zu;bO>Dg0H5=7JYtxXe08enL|WoTUPB-uAklp zln@+1`FtNtE1R*q50+`nPgTx~_M>wz#4hPeFL5az_zUYNSk^9QN*+Nbn(m4?ArT=q zXsQU(n@@^vHoFQQZ*EL0AB6EW#%8PeQjaa+ZmnBRP1Glqd|5qQKR36uw;;OuG7FZ% z%{7nxJ_APh^PkHIQ}7}UoD_^xmvzl>7e)`RJ28e&#^A@e1%COyt4}L0fM^?2*=C2A`x8LS?d81$o* z%4h1M$lf#_-zvp^g!=%ZI`xfbgH!kVzKc;S){^PnS-B9A{&7#va3_djoS0gUg42zg zJ0`UuwG4On-~srmW;$Td9WxH_E$#jBNPeQkivkO#l_Dxn!gOoWQ5H0ycI__EkDr!X zt{QnF({z2hLd9sZD{)nG+T6}BitNyDLG0buFy(n;xQP1v#Pr~ACozXUIi2{r=TAoj zLS+IuF}_KYs_{k)I}&u&$?8K4*}sN(KxN`!b;Ear9X>=Q6e{_+mtK}@h%~~lKir$r zF1`KQfcS2dO`2I%p)i!{EhshjO{wM|hd-sap~(roR%w-M;S)`s&7KqTu?Glf7&rIc5MClstBCaGu#c{GECuTBd*xL8KH1^4XQMYMOl2PrtWYkbkE?)@ej;fi%Q6 zB=j);IoI8uHrfPO!4qbe2CmdZ*uQ$Ax*6vj`Cm6d?7sEu{~^qmc_Q@Wyz$%3p55iTzTJ zr+r@Sb#-f#dBS_g&U3D#X$r03L7;AUSlN0PE^qDd`Op+;m>m(s=7x!6Y4y5)DA~R+ zFA40P63XnR)n8vj6h;i$ynLMgc;Jxr%-+?%iHu2(vBfxTVk5;YtGFYi2dS3@xzedm ztF_N%=Hy45#t4-;rvp^aDlJM6OSuSTe7WYix$I1k5m!ruymT)6~AzNlHqzFtbdo zC)hsx*;J$Y1GwP@#vp3;uk zvRr%6yemR$YFSg-K7`z-Gk|xQk_p*T5Mu*DjyoCKVb!vrJ*iS?%}{AI z2+|i?+kewsD##_0J`1@zUX^Jo(Bi&LZXiYUQq=$-POB5jXVLDmhX8+9E`Sc80+v4R za7kbvIOVwilvpndoTajrEZPm_AQ>cFCJIAy5Owg5UOm#=%)rGi-2H!7(h zfzLv&pM@Tl5(RGw$(J)>Y#i%Rp>a-Q9-A?P40p2%<%)M;CJ zI07Jdx!kMn&h#SV+Ff_mZ;<;|9A||d%EN?jCEB}RB05w?Em&rm@I*8Pkd=rWeWVir zK!H0iemO1ZkJ1acyUUb+X0ws#!j%%DK>FaiLKR_IOU5A@@-q@(Jq{0E(EF~`!n1C@ zy)HP1%QEp-e`V8+d^zj7yGvFjvsHTKWSH=N!|C(y| z%MYCKSZhKs7TX*}2IY(bFpMMBq1eKu;dR^5=ZBXN&fohsYcm*zN6UU0^YO~qGo+YE zMY6}b=ncnfrT6m3X65qB^aWnTIXmP-r^xFkxASV~7yqPBB?j_%VOm%*(gIS>{7c;OYB8DL3jWP+Q*IIx42gUx)h)f0Ivqk59K*#o^Y8Q8 zdCsmDUmpC(YrV*EYX1|&h54?u#XiX)RO5!vVhiC+PK+e{zsE{}gHH{rXi(MfraP8J zhB4pBQ4yUbn;Mt@LSXjcwiod9g!En&w<_tmo(2lJdDc==WRz5rQthhUjV5l5`etvB zQGND|$@Sy4SyR5;mUOxl7lBl|XVG#9Q4A@@&7zQ=fQ2>fMrLhy-L+5sKJ-&Le$b)f zJyu_k1;69grCpJ7+sAvdhxxpxkG_wBZ%qkec4bcQ=8K5G!e!Fov$JrlK*8FNCoj;; zd0OoPRgqziZa2a0UeQ`EUM(mcbK?lpw%(yLCL$Nls$|t4Bs@i>3xm32%EF}C+kJWy zj-DG}Gw<5!-N+Olz>9l(K&rpW7KdNE5ylc)buV1RYx*HO`I`&;Hzv72I+z5)T z&#dQ$!Ov_XU`#?gwTc-NFr(4qHpXAuo2mRRUV%9+-(2c#sL6R^%TN0UIPXma2~mB{ zB4(WU-H**YXHgXj#Bc(bpBbamE})5=vW`phh!hq9lhY-n$~glL@Cw%$_om%(5|z= z!yPc`A)$F9K%KO=sWW(XpMXRVCT__{FjI3* z)fZmvYz=02W^ll1$?lFNnF9xckRPYIv?nuXN18Z_$e!hL&Z#a9&7fAjg`s;FG!o0u zS+}-2=8120BvYy~=()^Ql0{3KpZJecdxId}`@@IbYaz7N22(}vuy>#)iDlIK39e{} zZ}ipxZb_ocX;(Ya{B@9bn7agqR+~?!lSQnuYT{B4qUca9nltZIy*Wcj+Jj7Im{Y?B za&a{5UU0{ZW~^r%B;eyN6t!|}*arD@?ZvPrjt&!AmT2Z(q4}LOS z!HXHUemBJOdGjnFXYegs{s={D>oP~E-l{Zb;c|S=oG_+CrDrxy76`IKRF}z)fj9)$ zvn9ztQOHA=_sDeUW+6z)85#dgZ=K@hm6s+9=9%{=Ad`<9wX{QKKGJNt-CvbV)11z9 z`%{w-?DJtt-hi}m=C*Kz?au69|MX{r>J*ReDvWvxkW_s@<45{FRHf1aLy*}p*Fgc^ z`>j3~LtF#%E6K2wXX45!>L)bN=&wxXzZb%UFYVI6H7Z4Qr40)mi>1NIQ~Ug_HvhRx z(Y2UW$zLIIWy2{lix%sj{@hxjVRvQ`>9vKJksTRzb3SGnVz#0uSiR87sh8bI(0wqV zAecxe`)zao@}5vmwO}pUH*6F>mq>yATA5&an&G5rEJ$9#_wvX7<)p<2!4d0yz`<@EV@pJf|zOx}8 zt^uc!R{Xe<^5!S6N@bBmBM?1{o+J$_>^|T4^&bFqiQhRuO^8}B#Ev~oCE}C(*<@d% zTae*sf?9754*VGG3ZQ&E;q-b-P{-Z*2)^D z-^*>RgnN^rUrQM&poPl>BeMTAloY%(K#oAgQNove5f{<4X0~kn;r2_Q_Grkhvcu08 z)&l!Y@Hi?raw?u(y6YU!e}rrGo-nv%3jO&B4{^LQXSsRDMqN)TktcWiOHx=!Be_3C z6!CBSS-R1EfpTf5aqbd}L-rW>YDl1gvI{$Ph37ZgVX~z~HEF~7b6m%J?i~kDALL$B z7h;ROZ@`@FW*LpWge}afqR+nPtyN&NI6pRY&(Z;|WZ5kwy@=LuO=+7gYSKM1A;*W5rYNNo;@>lPmANMLGFh=M>r1UczqmrDNW1e?8R@2^S%g9t%#d~c;yjMTq;|D0>sAY+$FAVZYZ&;Q{D6B^-FF=~DLvQHF#npE;+A$4O~kbZ*}S(s zzgsYm&GK2lwpM>uo|9F@VfVf73FN1vO(yNxK@)tc$I~agD7`GwjDjc#BPXW*P-y?`f}CI6>qqt`4`&$%;>`CIPaf8Xgm zqe6hR)KV9QUXuT1#N(@G?(68~;o|CTlaLY?7Z;WgHx`psAit!<1jXbP#KcZ7Fs}WF0UlmX aE})?QdI0okxfyu?KvPXmwd$c=x^2, view(X, :, p)/view(pp.chol.U, p, p), dims=2) + From f26bc0e31cf175618fa43d0219f2411ea1f2c197 Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Fri, 13 Dec 2024 20:16:39 +0100 Subject: [PATCH 100/106] Committed by mistake --- row2col2.png | Bin 9602 -> 0 bytes row2col2.tex | 15 --------------- test.tex | 20 -------------------- test/junk.jl | 30 ------------------------------ 4 files changed, 65 deletions(-) delete mode 100644 row2col2.png delete mode 100644 row2col2.tex delete mode 100644 test.tex delete mode 100644 test/junk.jl diff --git a/row2col2.png b/row2col2.png deleted file mode 100644 index d6fdbe01c5375271d27000bdcc79b8bfd8601361..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 9602 zcmc(lcT`htx8S4jii&^~DI!HsPQ32^lZ_*)=F4DV#^gtA;p($0Q6G9atEeIq; zIspM8N)1Q|y-x6bzxmc(>)x3=f6SV-lALpL_H*`==j`^|C-G189@AXCc@+Qv&}eF? z8Ug^666F1=%ar6#PwihX$s0=W1KkGzKrNK|*#0l_F}I_Jp)LRr%m)Cxd<_5`lD~Sn z3;_6v0RXFZ0DwX!0KoD*r^P^teBz>m_G4ARSu7NdBp+RQu3_N=0MK@yzb~W-(z27k zr1I6&Rij#_zClHG&%!H78vwZTSX1?Zap2T??z-QC)_6m8@C4<1e=s-rWe5+eJcUy7 z``Z+s^(etZY(J8JT+6+Xb2+br?ThL+8Me_QircT-zdgR9x!+zNES>&jY_@f^LHpBi zdI!(z>klu_=bTL-EBQLR? zOFsLHM&f+z(AArN54a)q^;}L>@Y$Plxj55r+>gF9ft*ZHH${(K+!sL(iy$VBJsmq4 z)bA<1+ug4cVYyk&7MW>RS{jT;AFu&?lI)dK_8s=@F4_=yU*fi&1=y)m63kzC1z1nt ze=YaqBTk;Wm4dNmq3?8c%GBTnjYK!b-hH3itt&s)RtuC2E}b`k(vfGi%yM;xTv$*4>+4D0g%*-1e6gFYsvmh#C^`etL6mmAbYd!!$z}>Z=|6?3Ti2#u z+*aZDlyXxF-zrKoex&Lq(f#J~%d?Ao|IU;55=vghu=hS>YoG%u2>}6dqY?tHfL5 zRa!lz>^0P~9dbH?Sa}$#b<=Gef{P9i=d)Ji{Z#lMhV{BXmYUQBmZUgB2}Zo8D%H7t z{Fi^zi{RVnqzBKH1eF&3*5j||N0q>&dGk6Tm`RZ*CPkc!R+( z@iV6Z<%pigGxxxd``;EZ`467GW?uIhm7M~S;F`qO|Q*gPuc zSk7mmC6LSJB-3KEtG8E2G~isd+L2tQ*oRpA&u)+h}Y@}!B_R?=5}9x z{4((5(^ja@yHbGO(v=yW4o?p&OH;KutEn{A`B4CiJzJAvT0r zolJ>^jIfLFJ9ZxV3%!n|@|vrmWg8?fUk;sfbM3B?XJ^Jb*mrz)ut)bhbo4G*2?e4@ zdZA#3TS%~ds8Hp1(}s=XsV?kn-zAT!dY6$B-D$tZ0t%aBY|F3xc-{Tju*i%cCFQS~ zPaNNPmu=*DJ$v&(H(Q1*yW`L*Wz7J1C9e}gAmT1WY`tUo0uLemhTy4S{C@3E0tRLO zcoMY;jf7H7_56~U&*u~8aM;U$**a+KEr#xmPQzVk_g3QogY|RQTh7$h$=HtOWu zrxT_0&kec7ubC@4X8ok~+yfQ(LzsWn!(QD@d_G5Ln!3O>AatA~$Ehe8$L#1?UIDph zOTNxfz~dXP8^K#zP=pID!g;bafTX`Z`%99?uJ@yx;$dtSyhc&%f*{Fh2bCr0$ZS_? z7hWrUjc8_Sr2!ApfX_(yVq9B_ZV<9hUZIXHbS?u3Fxk`PsZ9MoSwJhrMypJ*z6QKm z18yz(46~{i(zjE>N&JusCrDV_+G>PSwqQ8twMsg-LUbFNVM{I5(*3z+n8#Zp1ZmL z1V>LG!om_EQi2%&wwXN84znp$rnq){jt!7&V`u-zsSl7ggM&>>`*c`fXSXjA`0~IG z%+=CRBUtqJoRl%ED-34e{XG7Mm!=vDO^d98OdwqHBh8PKz$%^KjUha2*j;4Uz43c0 zd4ZpI+}wl$HDbgi$P*`Y3KnQiEY2wP?a$4VqviGi+Ke#l&Ur4Jb1wXHhSYhyDSy+4 zQ@zk;qQOsFK?2k*nOV!0-FH3zfRnjzf10EEnCc5;Gt=M0HiCzd2cre&@Ckv9!LB|2 z6ATbDuZUR^Ja!oN2wY=T<(h$$iUYDc!o1SNE0?q%d%qxp^mSlqBV*Wo<*NqVY_UPA z{9g%Zf#!|0KWvzHWSp zn9T(9H7@*w%E;Q(uGRbSWAydtqPd=FkUO%fHbf5L?Dq$n(hm+FMtb2Feo0mk7Obe?g}bhWMKI7!^Ss8pw9Io`WZ zX9VvusgPn<6vEgfAUQTsacqu4r+)oTI@iN3#p=v2n@5K!*gb9E%su_oBk)(`Qh|Yw-HN+ zDnBi=ElhDT)n3Bo2@xXw>SuWF5NdWADt5j`->t0{J`+oib?QsJVz!gXbUBALY5iRMk$Fg*J1Po3$OZk%i&U`qw z;#_OhpZ6v)Cj zM!CDHFaa`@yRkvZ>KPLiXBed#`#$!wi$7=!#Ag&)u^X7kwy+D5pgu=??=m>NUl42& z=*ht)vFnR}lr;?RckP+7zEPV24fjQ{LNg|@jHj++`Ij=UK<#{lS5!{bwoTx&pUhnv z*7mtP*4~0*WvuI{PN{c447O~RutLF{`yFv-#heZ;6k;lj%z#}M6Heb2!P?UO!Im#Y0`G=FrqKi?;ps#Y1 zvHL7TW|-NMzt>Wr7znGZ<7_;l;j;2f@+h>F80dy7Z~b)a`pv<`mrb;{Pp=n^nDIp6 z%9L*`)K#uL3f5m!sks7l7d6i|GCiL5u*kM`Jz0LdbBpsesX&=zey8eNltA|xz9}US z(^u8rgEWlz9S@2vHg_?FRdStu(9PJUY{kU|?9wY}R}7P|sq|v9UOR@A z;L*1tB-%5v7jP)Zlk%rDs$?~Ka2BMK(k5%01dM`49 zGxeTT7^<>u*j*oQJCUkh<_KIG_jgL}b2?h0n5}KR#=C%+jrumd9Fe@gHxQLlT#jvm zo59vSj@K@6x3_P12Ag@9t#TG>{}x|H*Xf~t(h#gok;xlg+rY=iu}x)Ix%xlS+A7u# z7vbVuC|OCkc(_~q6Ez9~yVZysDNN8+gk4+3l2_3xn_$4s()?T8BSbM=->Y=#tw)JLn!G7~+4{DbNtAs*x z6@645;q|O1<4}USh8U)d>yg^rU-^Ow`2*7p=UGH9VJyX)BhmFP{ipL(<{7UQ1$ib` z8};n2`q-__^F+GOG^!;y%YA}q;_12K*z@YD*KHxIf(eBFqwX-pYxFz05*qN6_fHi^ z!vgxKM>dVVC{2Y>RoM&)!5mQAe!qN?m(gFHy-o0*pFX=Eqv417KHp%s7G63jaHSKqcMR6Y`@aDd<4U}NB#I*3B5YkD{&68CH)%Oq zc-${siZ3?wd0^m1WZ$e^s1``Ode%Qr%(aq7GidSPTW*yG7l?N7GG%kwKtW{r=HPh zel=+jm!NI9#gPb-R1^FiyDM5CH*M?%%Tmr!nL?b97eL7o^8nd{z#Npu4zr2?N}%)NCsIjLM|};x+7k)ukadX-C@X$mF-R-NnUyC9%aU{#Lkc!K1n7?R>oI1@e+> zC#B6%BRA8YssC1y2Kyxy5nd@vf5wiZCw;I2($EZr^j}u3c}xh0W@dl8IavNq6bCQo zo={1J57_;zM*qO3)hnuV610=GrJQ5zqRCFG@E@Zcu8^`~c9EgZfcbfMtpv043Ncb= z?_+XhY{d1Y=z6balX5@W9Cy)3DXDZSCqB1v&&eV3%2^DshUC!r1gzbFLg8`IaF9E+ z8X@RS_&J@Xblto} zu81j*R;esLIS$064~JwV=Z3bX^Q%>6Dlv=BrG*7Wq^_crd!h2>1T}M)4>qMVbqhA5 zV6C0a0$X#&6l9HqsjM3nIr4ch?A-zu;bO>Dg0H5=7JYtxXe08enL|WoTUPB-uAklp zln@+1`FtNtE1R*q50+`nPgTx~_M>wz#4hPeFL5az_zUYNSk^9QN*+Nbn(m4?ArT=q zXsQU(n@@^vHoFQQZ*EL0AB6EW#%8PeQjaa+ZmnBRP1Glqd|5qQKR36uw;;OuG7FZ% z%{7nxJ_APh^PkHIQ}7}UoD_^xmvzl>7e)`RJ28e&#^A@e1%COyt4}L0fM^?2*=C2A`x8LS?d81$o* z%4h1M$lf#_-zvp^g!=%ZI`xfbgH!kVzKc;S){^PnS-B9A{&7#va3_djoS0gUg42zg zJ0`UuwG4On-~srmW;$Td9WxH_E$#jBNPeQkivkO#l_Dxn!gOoWQ5H0ycI__EkDr!X zt{QnF({z2hLd9sZD{)nG+T6}BitNyDLG0buFy(n;xQP1v#Pr~ACozXUIi2{r=TAoj zLS+IuF}_KYs_{k)I}&u&$?8K4*}sN(KxN`!b;Ear9X>=Q6e{_+mtK}@h%~~lKir$r zF1`KQfcS2dO`2I%p)i!{EhshjO{wM|hd-sap~(roR%w-M;S)`s&7KqTu?Glf7&rIc5MClstBCaGu#c{GECuTBd*xL8KH1^4XQMYMOl2PrtWYkbkE?)@ej;fi%Q6 zB=j);IoI8uHrfPO!4qbe2CmdZ*uQ$Ax*6vj`Cm6d?7sEu{~^qmc_Q@Wyz$%3p55iTzTJ zr+r@Sb#-f#dBS_g&U3D#X$r03L7;AUSlN0PE^qDd`Op+;m>m(s=7x!6Y4y5)DA~R+ zFA40P63XnR)n8vj6h;i$ynLMgc;Jxr%-+?%iHu2(vBfxTVk5;YtGFYi2dS3@xzedm ztF_N%=Hy45#t4-;rvp^aDlJM6OSuSTe7WYix$I1k5m!ruymT)6~AzNlHqzFtbdo zC)hsx*;J$Y1GwP@#vp3;uk zvRr%6yemR$YFSg-K7`z-Gk|xQk_p*T5Mu*DjyoCKVb!vrJ*iS?%}{AI z2+|i?+kewsD##_0J`1@zUX^Jo(Bi&LZXiYUQq=$-POB5jXVLDmhX8+9E`Sc80+v4R za7kbvIOVwilvpndoTajrEZPm_AQ>cFCJIAy5Owg5UOm#=%)rGi-2H!7(h zfzLv&pM@Tl5(RGw$(J)>Y#i%Rp>a-Q9-A?P40p2%<%)M;CJ zI07Jdx!kMn&h#SV+Ff_mZ;<;|9A||d%EN?jCEB}RB05w?Em&rm@I*8Pkd=rWeWVir zK!H0iemO1ZkJ1acyUUb+X0ws#!j%%DK>FaiLKR_IOU5A@@-q@(Jq{0E(EF~`!n1C@ zy)HP1%QEp-e`V8+d^zj7yGvFjvsHTKWSH=N!|C(y| z%MYCKSZhKs7TX*}2IY(bFpMMBq1eKu;dR^5=ZBXN&fohsYcm*zN6UU0^YO~qGo+YE zMY6}b=ncnfrT6m3X65qB^aWnTIXmP-r^xFkxASV~7yqPBB?j_%VOm%*(gIS>{7c;OYB8DL3jWP+Q*IIx42gUx)h)f0Ivqk59K*#o^Y8Q8 zdCsmDUmpC(YrV*EYX1|&h54?u#XiX)RO5!vVhiC+PK+e{zsE{}gHH{rXi(MfraP8J zhB4pBQ4yUbn;Mt@LSXjcwiod9g!En&w<_tmo(2lJdDc==WRz5rQthhUjV5l5`etvB zQGND|$@Sy4SyR5;mUOxl7lBl|XVG#9Q4A@@&7zQ=fQ2>fMrLhy-L+5sKJ-&Le$b)f zJyu_k1;69grCpJ7+sAvdhxxpxkG_wBZ%qkec4bcQ=8K5G!e!Fov$JrlK*8FNCoj;; zd0OoPRgqziZa2a0UeQ`EUM(mcbK?lpw%(yLCL$Nls$|t4Bs@i>3xm32%EF}C+kJWy zj-DG}Gw<5!-N+Olz>9l(K&rpW7KdNE5ylc)buV1RYx*HO`I`&;Hzv72I+z5)T z&#dQ$!Ov_XU`#?gwTc-NFr(4qHpXAuo2mRRUV%9+-(2c#sL6R^%TN0UIPXma2~mB{ zB4(WU-H**YXHgXj#Bc(bpBbamE})5=vW`phh!hq9lhY-n$~glL@Cw%$_om%(5|z= z!yPc`A)$F9K%KO=sWW(XpMXRVCT__{FjI3* z)fZmvYz=02W^ll1$?lFNnF9xckRPYIv?nuXN18Z_$e!hL&Z#a9&7fAjg`s;FG!o0u zS+}-2=8120BvYy~=()^Ql0{3KpZJecdxId}`@@IbYaz7N22(}vuy>#)iDlIK39e{} zZ}ipxZb_ocX;(Ya{B@9bn7agqR+~?!lSQnuYT{B4qUca9nltZIy*Wcj+Jj7Im{Y?B za&a{5UU0{ZW~^r%B;eyN6t!|}*arD@?ZvPrjt&!AmT2Z(q4}LOS z!HXHUemBJOdGjnFXYegs{s={D>oP~E-l{Zb;c|S=oG_+CrDrxy76`IKRF}z)fj9)$ zvn9ztQOHA=_sDeUW+6z)85#dgZ=K@hm6s+9=9%{=Ad`<9wX{QKKGJNt-CvbV)11z9 z`%{w-?DJtt-hi}m=C*Kz?au69|MX{r>J*ReDvWvxkW_s@<45{FRHf1aLy*}p*Fgc^ z`>j3~LtF#%E6K2wXX45!>L)bN=&wxXzZb%UFYVI6H7Z4Qr40)mi>1NIQ~Ug_HvhRx z(Y2UW$zLIIWy2{lix%sj{@hxjVRvQ`>9vKJksTRzb3SGnVz#0uSiR87sh8bI(0wqV zAecxe`)zao@}5vmwO}pUH*6F>mq>yATA5&an&G5rEJ$9#_wvX7<)p<2!4d0yz`<@EV@pJf|zOx}8 zt^uc!R{Xe<^5!S6N@bBmBM?1{o+J$_>^|T4^&bFqiQhRuO^8}B#Ev~oCE}C(*<@d% zTae*sf?9754*VGG3ZQ&E;q-b-P{-Z*2)^D z-^*>RgnN^rUrQM&poPl>BeMTAloY%(K#oAgQNove5f{<4X0~kn;r2_Q_Grkhvcu08 z)&l!Y@Hi?raw?u(y6YU!e}rrGo-nv%3jO&B4{^LQXSsRDMqN)TktcWiOHx=!Be_3C z6!CBSS-R1EfpTf5aqbd}L-rW>YDl1gvI{$Ph37ZgVX~z~HEF~7b6m%J?i~kDALL$B z7h;ROZ@`@FW*LpWge}afqR+nPtyN&NI6pRY&(Z;|WZ5kwy@=LuO=+7gYSKM1A;*W5rYNNo;@>lPmANMLGFh=M>r1UczqmrDNW1e?8R@2^S%g9t%#d~c;yjMTq;|D0>sAY+$FAVZYZ&;Q{D6B^-FF=~DLvQHF#npE;+A$4O~kbZ*}S(s zzgsYm&GK2lwpM>uo|9F@VfVf73FN1vO(yNxK@)tc$I~agD7`GwjDjc#BPXW*P-y?`f}CI6>qqt`4`&$%;>`CIPaf8Xgm zqe6hR)KV9QUXuT1#N(@G?(68~;o|CTlaLY?7Z;WgHx`psAit!<1jXbP#KcZ7Fs}WF0UlmX aE})?QdI0okxfyu?KvPXmwd$c=x^2, view(X, :, p)/view(pp.chol.U, p, p), dims=2) - From 2bc2138106cf068f2c4fc89413010856fb90bcd4 Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Fri, 13 Dec 2024 20:23:20 +0100 Subject: [PATCH 101/106] leverage returns a vec --- src/linpred.jl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/linpred.jl b/src/linpred.jl index b2257d9d..981b6850 100644 --- a/src/linpred.jl +++ b/src/linpred.jl @@ -436,8 +436,7 @@ function modelmatrix(pp::LinPred; weighted::Bool=isweighted(pp)) end function leverage(x::LinPredModel) - h = leverage(x.pp) - #return h + h = vec(leverage(x.pp)) hasfield(typeof(x.rr), :wrkwt) ? x.rr.wrkwt.*h : x.rr.wts.*h end From 05696002902ccf4eb9095a29af5a445872093004 Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Fri, 13 Dec 2024 20:38:50 +0100 Subject: [PATCH 102/106] Fix cookdistance return type --- docs/src/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/index.md b/docs/src/index.md index 3730d403..97aaf0de 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -320,7 +320,7 @@ Note that it's currently only implemented for linear models without weights. ```jldoctest methods julia> round.(cooksdistance(mdl); digits=8) -3×1 Matrix{Float64}: +3-element Vector{Float64}: 2.5 0.25 2.5 From dd1b4a80f1de2fdf3aa8931f97a3dcf6b5280810 Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Wed, 18 Dec 2024 12:51:29 +0100 Subject: [PATCH 103/106] Update docs/src/index.md Co-authored-by: Milan Bouchet-Valat --- docs/src/index.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/src/index.md b/docs/src/index.md index 97aaf0de..1868df97 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -138,8 +138,8 @@ Both `lm` and `glm` allow weighted estimation. The three different - `ProbabilityWeights` represent the inverse of the sampling probability for each observation, providing a correction mechanism for under- or over-sampling certain population groups. These weights may also be referred to as sampling weights. - -`GLM.jl` internally uses UnitWeights for unweighted regression. When no weights are specified, the model defaults to using `UnitWeights`, effectively treating all observations as equally weighted. +- `UnitWeights` attribute a weight of 1 to each observation, which corresponds + to unweighted regression (the default). To indicate which kind of weights should be used, the vector of weights must be wrapped in one of the three weights types, and then passed to the `weights` keyword argument. From 1c5953d1ab932358b51c21ca875091709eee7f40 Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Wed, 18 Dec 2024 12:51:46 +0100 Subject: [PATCH 104/106] Update docs/src/index.md Co-authored-by: Milan Bouchet-Valat --- docs/src/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/src/index.md b/docs/src/index.md index 1868df97..b111b1f2 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -125,7 +125,7 @@ x: 4 -0.032673 0.0797865 -0.41 0.6831 -0.191048 0.125702 ## Weighting -Both `lm` and `glm` allow weighted estimation. The three different +Both `lm` and `glm` allow weighted estimation. The four different [types of weights](https://juliastats.org/StatsBase.jl/stable/weights/) defined in [StatsBase.jl](https://github.com/JuliaStats/StatsBase.jl) can be used to fit a model: From cd39578a7d34afac6acceb7cccf472e03501006f Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Wed, 18 Dec 2024 12:53:26 +0100 Subject: [PATCH 105/106] Update src/glmfit.jl Co-authored-by: Milan Bouchet-Valat --- src/glmfit.jl | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/glmfit.jl b/src/glmfit.jl index a6009469..358adc68 100644 --- a/src/glmfit.jl +++ b/src/glmfit.jl @@ -323,9 +323,7 @@ function loglikelihood(r::GlmResp{T,D,L,<:AbstractWeights}) where {T,D,L} ll += loglik_apweights_obs(d, y[i], mu[i], wts[i], δ, wts.sum, N) end else - #@inbounds for i in eachindex(y, mu, wts) - throw(ArgumentError("The `loglikelihood` for probability weighted models is not currently supported.")) - #end + throw(ArgumentError("The `loglikelihood` for probability weighted models is not currently supported.")) end return ll end From 574ec6901c2f45314139dbdbe94ca3b19cf2835b Mon Sep 17 00:00:00 2001 From: Giuseppe Ragusa Date: Wed, 18 Dec 2024 12:54:33 +0100 Subject: [PATCH 106/106] Update src/linpred.jl Co-authored-by: Milan Bouchet-Valat --- src/linpred.jl | 1 - 1 file changed, 1 deletion(-) diff --git a/src/linpred.jl b/src/linpred.jl index 981b6850..d3299261 100644 --- a/src/linpred.jl +++ b/src/linpred.jl @@ -493,7 +493,6 @@ weights(obj::RegressionModel) = weights(obj.model) weights(m::LinPredModel) = weights(m.rr) weights(pp::LinPred) = pp.wts -isweighted(obj::RegressionModel) = isweighted(obj.model.pp) isweighted(m::LinPredModel) = isweighted(m.pp) isweighted(pp::LinPred) = weights(pp) isa Union{FrequencyWeights, AnalyticWeights, ProbabilityWeights}