Skip to content

Commit

Permalink
[G]VIF (JuliaStats#548)
Browse files Browse the repository at this point in the history
* [G]VIF

* add reference value source

* more tests

* glm tests
  • Loading branch information
palday authored Sep 14, 2023
1 parent 673e15d commit b1ba4c5
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 3 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ SpecialFunctions = "0.6, 0.7, 0.8, 0.9, 0.10, 1, 2.0"
StatsAPI = "1.4"
StatsBase = "0.33.5, 0.34"
StatsFuns = "0.6, 0.7, 0.8, 0.9, 1.0"
StatsModels = "0.6.23, 0.7"
StatsModels = "0.7.3"
Tables = "1"
julia = "1.6"

Expand Down
3 changes: 2 additions & 1 deletion src/GLM.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ module GLM
import Base: (\), convert, show, size
import LinearAlgebra: cholesky, cholesky!
import Statistics: cor
using StatsAPI
import StatsBase: coef, coeftable, coefnames, confint, deviance, nulldeviance, dof, dof_residual,
loglikelihood, nullloglikelihood, nobs, stderror, vcov,
residuals, predict, predict!,
Expand All @@ -21,7 +22,7 @@ module GLM
export coef, coeftable, confint, deviance, nulldeviance, dof, dof_residual,
loglikelihood, nullloglikelihood, nobs, stderror, vcov, residuals, predict,
fitted, fit, fit!, model_response, response, modelmatrix, r2, r², adjr2, adjr²,
cooksdistance, hasintercept, dispersion
cooksdistance, hasintercept, dispersion, vif, gvif, termnames

export
# types
Expand Down
2 changes: 1 addition & 1 deletion src/linpred.jl
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,7 @@ fitted(m::LinPredModel) = m.rr.mu
predict(mm::LinPredModel) = fitted(mm)
residuals(obj::LinPredModel) = residuals(obj.rr)

function formula(obj::LinPredModel)
function StatsModels.formula(obj::LinPredModel)
obj.formula === nothing && throw(ArgumentError("model was fitted without a formula"))
return obj.formula
end
Expand Down
43 changes: 43 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2011,3 +2011,46 @@ end
@test_throws ArgumentError lm(@formula(OptDen ~ Carb), form; method=:pr)
@test_throws ArgumentError glm(@formula(OptDen ~ Carb), form, Normal(); method=:pr)
end

@testset "[G]VIF" begin
# Reference values from car::vif in R:
# > library(car)
# > data(Duncan)
# > lm1 = lm(prestige ~ 1 + income + education, Duncan)
# > vif(lm1)
# income education
# 2.1049 2.1049
# > lm2 = lm(prestige ~ 1 + income + education + type, Duncan)
# > vif(lm2)
# GVIF Df GVIF^(1/(2*Df))
# income 2.209178 1 1.486330
# education 5.297584 1 2.301648
# type 5.098592 2 1.502666
duncan = RDatasets.dataset("car", "Duncan")
lm1 = lm(@formula(Prestige ~ 1 + Income + Education), duncan)
@test termnames(lm1)[2] == coefnames(lm1)
@test vif(lm1) gvif(lm1)

lm1_noform = lm(modelmatrix(lm1), response(lm1))
@test vif(lm1) vif(lm1_noform)
@test_throws ArgumentError("model was fitted without a formula") gvif(lm1_noform)

lm1log = lm(@formula(Prestige ~ 1 + exp(log(Income)) + exp(log(Education))), duncan)
@test termnames(lm1log)[2] == coefnames(lm1log) == ["(Intercept)", "exp(log(Income))", "exp(log(Education))"]
@test vif(lm1) vif(lm1log)

gm1 = glm(modelmatrix(lm1), response(lm1), Normal())
@test vif(lm1) vif(gm1)

lm2 = lm(@formula(Prestige ~ 1 + Income + Education + Type), duncan)
@test termnames(lm2)[2] != coefnames(lm2)
@test gvif(lm2; scale=true) [1.486330, 2.301648, 1.502666] atol=1e-4

gm2 = glm(@formula(Prestige ~ 1 + Income + Education + Type), duncan, Normal())
@test termnames(gm2)[2] != coefnames(gm2)
@test gvif(gm2; scale=true) [1.486330, 2.301648, 1.502666] atol=1e-4

# the VIF definition depends on modelmatrix, vcov and stderror returning valid
# values. It doesn't care about links, offsets, etc. as long as the model matrix,
# vcov matrix and stderrors are well defined.
end

0 comments on commit b1ba4c5

Please sign in to comment.