ftest improvements (#337)

Allow passing models from simpler to more complex, giving equivalent tests but with reversed signs for ΔDOF, ΔSSR and ΔR². Deprecate passing only one model, which makes no test at all. Stop printing residual degrees of freedom, as DOF and difference in DOF seems enough. Print two spaces instead of one between columns to make table more readable, and add horizontal lines to match the CoefTable printing. Make struct type stable by filling tuples with an initial NaN entry when needed. Make tests cover more situations by having a model with ΔDOF of 2. Small code improvements.
JuliaStats · Oct 17, 2019 · c97277b · c97277b
1 parent 03220f7
commit c97277b
Show file tree

Hide file tree

Showing 2 changed files with 141 additions and 73 deletions.
diff --git a/src/ftest.jl b/src/ftest.jl
@@ -1,14 +1,14 @@
 mutable struct FTestResult{N}
+    nobs::Int
     ssr::NTuple{N, Float64}
     dof::NTuple{N, Int}
-    dof_resid::NTuple{N, Int}
     r2::NTuple{N, Float64}
-    fstat::Tuple{Vararg{Float64}}
-    pval::Tuple{Vararg{PValue}}
+    fstat::NTuple{N, Float64}
+    pval::NTuple{N, Float64}
 end
 
 """A helper function to determine if mod1 is nested in mod2"""
-function issubmodel(mod1::LinPredModel, mod2::LinPredModel; atol=0::Real)
+function issubmodel(mod1::LinPredModel, mod2::LinPredModel; atol::Real=0.0)
     mod1.rr.y != mod2.rr.y && return false # Response variables must be equal
 
     # Test that models are nested
@@ -28,15 +28,14 @@ _diffn(t::NTuple{N, T}) where {N, T} = ntuple(i->t[i]-t[i+1], N-1)
 
 _diff(t::NTuple{N, T}) where {N, T} = ntuple(i->t[i+1]-t[i], N-1)
 
-dividetuples(t1::NTuple{N}, t2::NTuple{N}) where {N} = ntuple(i->t1[i]/t2[i], N)
-
 """
-    ftest(mod::LinearModel...; atol=0::Real)
+    ftest(mod::LinearModel...; atol::Real=0.0)
 
-For each sequential pair of linear predictors in `mod`, perform an F-test to determine if
-the first one fits significantly better than the next.
+For each sequential pair of linear models in `mod...`, perform an F-test to determine if
+the one model fits significantly better than the other. Models must have been fitted
+on the same data, and be nested either in forward or backward direction.
 
-A table is returned containing residual degrees of freedom (DOF), degrees of freedom,
+A table is returned containing consumed degrees of freedom (DOF),
 difference in DOF from the preceding model, sum of squared residuals (SSR), difference in
 SSR from the preceding model, R², difference in R² from the preceding model, and F-statistic
 and p-value for the comparison between the two models.
@@ -55,77 +54,108 @@ this is an ANOVA, our null hypothesis is that `Result ~ 1` fits the data as well
 `Result ~ 1 + Treatment`.
 
 ```jldoctest
-julia> dat = DataFrame(Treatment=[1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2.],
-                       Result=[1.1, 1.2, 1, 2.2, 1.9, 2, .9, 1, 1, 2.2, 2, 2],
-                       Other=[1, 1, 2, 1, 2, 1, 3, 1, 1, 2, 2, 1]);
-
-julia> model = lm(@formula(Result ~ 1 + Treatment), dat);
+julia> dat = DataFrame(Result=[1.1, 1.2, 1, 2.2, 1.9, 2, 0.9, 1, 1, 2.2, 2, 2],
+                       Treatment=[1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2],
+                       Other=categorical([1, 1, 2, 1, 2, 1, 3, 1, 1, 2, 2, 1]));
 
 julia> nullmodel = lm(@formula(Result ~ 1), dat);
 
-julia> bigmodel = lm(@formula(Result ~ 1 + Treatment + Other), dat);
-
-julia> ft = ftest(model.model, nullmodel.model)
-        Res. DOF DOF ΔDOF    SSR    ΔSSR     R²    ΔR²       F* p(>F)
-Model 1       10   3      0.1283         0.9603
-Model 2       11   2   -1 3.2292 -3.1008 0.0000 0.9603 241.6234 <1e-7
+julia> model = lm(@formula(Result ~ 1 + Treatment), dat);
 
+julia> bigmodel = lm(@formula(Result ~ 1 + Treatment + Other), dat);
 
-julia> ftest(bigmodel.model, model.model, nullmodel.model)
-        Res. DOF DOF ΔDOF    SSR    ΔSSR     R²    ΔR²       F*  p(>F)
-Model 1        9   4      0.1038         0.9678
-Model 2       10   3   -1 0.1283 -0.0245 0.9603 0.0076   2.1236 0.1790
-Model 3       11   2   -1 3.2292 -3.1008 0.0000 0.9603 241.6234  <1e-7
+julia> ftest(nullmodel.model, model.model)
+F-test: 2 models fitted on 12 observations
+─────────────────────────────────────────────────────────────────
+     DOF  ΔDOF     SSR     ΔSSR       R²     ΔR²        F*  p(>F)
+─────────────────────────────────────────────────────────────────
+[1]    2        3.2292           -0.0000                         
+[2]    3     1  0.1283  -3.1008   0.9603  0.9603  241.6234  <1e-7
+─────────────────────────────────────────────────────────────────
+
+julia> ftest(nullmodel.model, model.model, bigmodel.model)
+F-test: 3 models fitted on 12 observations
+──────────────────────────────────────────────────────────────────
+     DOF  ΔDOF     SSR     ΔSSR       R²     ΔR²        F*   p(>F)
+──────────────────────────────────────────────────────────────────
+[1]    2        3.2292           -0.0000                          
+[2]    3     1  0.1283  -3.1008   0.9603  0.9603  241.6234   <1e-7
+[3]    5     2  0.1017  -0.0266   0.9685  0.0082    1.0456  0.3950
+──────────────────────────────────────────────────────────────────
 ```
 """
-function ftest(mods::LinearModel...; atol=0::Real)
-    nmodels = length(mods)
-    for i in 2:nmodels
-        issubmodel(mods[i], mods[i-1], atol=atol) ||
-        throw(ArgumentError("F test $i is only valid if model $i is nested in model $(i-1)"))
+function ftest(mods::LinearModel...; atol::Real=0.0)
+    if length(mods) < 2
+        Base.depwarn("Passing less than two models to ftest is deprecated", :ftest)
+    end
+    if !all(==(nobs(mods[1])), nobs.(mods))
+        throw(ArgumentError("F test is only valid for models fitted on the same data, " *
+                            "but number of observations differ"))
+    end
+    forward = length(mods) == 1 || dof(mods[1]) <= dof(mods[2])
+    if forward
+        for i in 2:length(mods)
+            if dof(mods[i-1]) >= dof(mods[i]) || !issubmodel(mods[i-1], mods[i], atol=atol)
+                throw(ArgumentError("F test is only valid for nested models"))
+            end
+        end
+    else
+        for i in 2:length(mods)
+            if dof(mods[i]) >= dof(mods[i-1]) || !issubmodel(mods[i], mods[i-1], atol=atol)
+                throw(ArgumentError("F test is only valid for nested models"))
+            end
+        end
     end
 
     SSR = deviance.(mods)
 
-    nparams = dof.(mods)
-
-    df2 = _diffn(nparams)
-    df1 = Int.(dof_residual.(mods))
-
-    MSR1 = dividetuples(_diff(SSR), df2)
-    MSR2 = dividetuples(SSR, df1)[1:nmodels-1]
+    df = dof.(mods)
+    Δdf = _diff(df)
+    dfr = Int.(dof_residual.(mods))
+    MSR1 = _diffn(SSR) ./ Δdf
+    MSR2 = (SSR ./ dfr)
+    if forward
+        MSR2 = MSR2[2:end]
+        dfr_big = dfr[2:end]
+    else
+        MSR2 = MSR2[1:end-1]
+        dfr_big = dfr[1:end-1]
+    end
 
-    fstat = dividetuples(MSR1, MSR2)
-    pval = PValue.(ccdf.(FDist.(df2, df1[1:nmodels-1]), fstat))
-    return FTestResult(SSR, nparams, df1, r2.(mods), fstat, pval)
+    fstat = (NaN, (MSR1 ./ MSR2)...)
+    pval = (NaN, ccdf.(FDist.(abs.(Δdf), dfr_big), abs.(fstat[2:end]))...)
+    return FTestResult(Int(nobs(mods[1])), SSR, df, r2.(mods), fstat, pval)
 end
 
 function show(io::IO, ftr::FTestResult{N}) where N
-    Δdof = _diffn(ftr.dof_resid)
-    Δssr = _diffn(ftr.ssr)
-    ΔR² = _diffn(ftr.r2)
+    Δdof = _diff(ftr.dof)
+    Δssr = _diff(ftr.ssr)
+    ΔR² = _diff(ftr.r2)
 
-    nc = 10
+    nc = 9
     nr = N
     outrows = Matrix{String}(undef, nr+1, nc)
 
-    outrows[1, :] = ["", "Res. DOF", "DOF", "ΔDOF", "SSR", "ΔSSR",
+    outrows[1, :] = ["", "DOF", "ΔDOF", "SSR", "ΔSSR",
                      "R²", "ΔR²", "F*", "p(>F)"]
 
-    outrows[2, :] = ["Model 1", @sprintf("%.0d", ftr.dof_resid[1]),
-                     @sprintf("%.0d", ftr.dof[1]), " ",
+    outrows[2, :] = ["[1]", @sprintf("%.0d", ftr.dof[1]), " ",
                      @sprintf("%.4f", ftr.ssr[1]), " ",
                      @sprintf("%.4f", ftr.r2[1]), " ", " ", " "]
 
     for i in 2:nr
-        outrows[i+1, :] = ["Model $i", @sprintf("%.0d", ftr.dof_resid[i]),
+        outrows[i+1, :] = ["[$i]",
                            @sprintf("%.0d", ftr.dof[i]), @sprintf("%.0d", Δdof[i-1]),
                            @sprintf("%.4f", ftr.ssr[i]), @sprintf("%.4f", Δssr[i-1]),
                            @sprintf("%.4f", ftr.r2[i]), @sprintf("%.4f", ΔR²[i-1]),
-                           @sprintf("%.4f", ftr.fstat[i-1]), string(ftr.pval[i-1]) ]
+                           @sprintf("%.4f", ftr.fstat[i]), string(PValue(ftr.pval[i])) ]
     end
     colwidths = length.(outrows)
     max_colwidths = [maximum(view(colwidths, :, i)) for i in 1:nc]
+    totwidth = sum(max_colwidths) + 2*8
+
+    println(io, "F-test: $N models fitted on $(ftr.nobs) observations")
+    println(io, '─'^totwidth)
 
     for r in 1:nr+1
         for c in 1:nc
@@ -134,12 +164,14 @@ function show(io::IO, ftr::FTestResult{N}) where N
 
             padding = " "^(max_colwidths[c]-cur_cell_len)
             if c > 1
-                padding = " "*padding
+                padding = "  "*padding
             end
 
             print(io, padding)
             print(io, cur_cell)
         end
         print(io, "\n")
+        r == 1 && println(io, '─'^totwidth)
     end
+    print(io, '─'^totwidth)
 end
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -472,7 +472,7 @@ end
 @testset "F test for model comparison" begin
     d = DataFrame(Treatment=[1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2.],
                   Result=[1.1, 1.2, 1, 2.2, 1.9, 2, .9, 1, 1, 2.2, 2, 2],
-                  Other=[1, 1, 2, 1, 2, 1, 3, 1, 1, 2, 2, 1])
+                  Other=categorical([1, 1, 2, 1, 2, 1, 3, 1, 1, 2, 2, 1]))
     mod = lm(@formula(Result~Treatment), d).model
     othermod = lm(@formula(Result~Other), d).model
     nullmod = lm(@formula(Result~1), d).model
@@ -483,32 +483,68 @@ end
     @test !GLM.issubmodel(bothmod, mod)
     @test GLM.issubmodel(othermod, bothmod)
 
-    @test_throws ArgumentError ftest(mod, othermod)
-
-    d[:Sum] = d[:Treatment] + d[:Other]
+    d[:Sum] = d[:Treatment] + (d[:Other] .== 1)
     summod = lm(@formula(Result~Sum), d).model
     @test GLM.issubmodel(summod, bothmod)
 
-    ft = ftest(mod, nullmod)
-    @test isapprox(ft.pval[1].v,  2.481215056713184e-8)
-    @test sprint(show, ftest(mod, nullmod)) ==
-        """
-                Res. DOF DOF ΔDOF    SSR    ΔSSR      R²    ΔR²       F* p(>F)
-        Model 1       10   3      0.1283          0.9603                      
-        Model 2       11   2   -1 3.2292 -3.1008 -0.0000 0.9603 241.6234 <1e-7
-        """
+    ft1a = ftest(mod, nullmod)
+    @test isnan(ft1a.pval[1])
+    @test ft1a.pval[2] ≈ 2.481215056713184e-8
+    @test sprint(show, ft1a) == """
+        F-test: 2 models fitted on 12 observations
+        ─────────────────────────────────────────────────────────────────
+             DOF  ΔDOF     SSR    ΔSSR       R²      ΔR²        F*  p(>F)
+        ─────────────────────────────────────────────────────────────────
+        [1]    3        0.1283           0.9603                          
+        [2]    2    -1  3.2292  3.1008  -0.0000  -0.9603  241.6234  <1e-7
+        ─────────────────────────────────────────────────────────────────"""
+
+    ft1b = ftest(nullmod, mod)
+    @test isnan(ft1b.pval[1])
+    @test ft1b.pval[2] ≈ 2.481215056713184e-8
+    @test sprint(show, ft1b) == """
+        F-test: 2 models fitted on 12 observations
+        ─────────────────────────────────────────────────────────────────
+             DOF  ΔDOF     SSR     ΔSSR       R²     ΔR²        F*  p(>F)
+        ─────────────────────────────────────────────────────────────────
+        [1]    2        3.2292           -0.0000                         
+        [2]    3     1  0.1283  -3.1008   0.9603  0.9603  241.6234  <1e-7
+        ─────────────────────────────────────────────────────────────────"""
 
     bigmod = lm(@formula(Result~Treatment+Other), d).model
-    ft2 = ftest(bigmod, mod, nullmod)
-    @test isapprox(ft2.pval[2].v,  2.481215056713184e-8)
-    @test isapprox(ft2.pval[1].v, 0.17903437900958952)
-    @test sprint(show, ftest(bigmod, mod, nullmod)) ==
-        """
-                Res. DOF DOF ΔDOF    SSR    ΔSSR      R²    ΔR²       F*  p(>F)
-        Model 1        9   4      0.1038          0.9678                       
-        Model 2       10   3   -1 0.1283 -0.0245  0.9603 0.0076   2.1236 0.1790
-        Model 3       11   2   -1 3.2292 -3.1008 -0.0000 0.9603 241.6234  <1e-7
-        """
+    ft2a = ftest(nullmod, mod, bigmod)
+    @test isnan(ft2a.pval[1])
+    @test ft2a.pval[2] ≈ 2.481215056713184e-8
+    @test ft2a.pval[3] ≈ 0.3949973540194818
+    @test sprint(show, ft2a) == """
+        F-test: 3 models fitted on 12 observations
+        ──────────────────────────────────────────────────────────────────
+             DOF  ΔDOF     SSR     ΔSSR       R²     ΔR²        F*   p(>F)
+        ──────────────────────────────────────────────────────────────────
+        [1]    2        3.2292           -0.0000                          
+        [2]    3     1  0.1283  -3.1008   0.9603  0.9603  241.6234   <1e-7
+        [3]    5     2  0.1017  -0.0266   0.9685  0.0082    1.0456  0.3950
+        ──────────────────────────────────────────────────────────────────"""
+
+    ft2b = ftest(bigmod, mod, nullmod)
+    @test isnan(ft2b.pval[1])
+    @test ft2b.pval[2] ≈ 0.3949973540194818
+    @test ft2b.pval[3] ≈ 2.481215056713184e-8
+    @test sprint(show, ft2b) == """
+        F-test: 3 models fitted on 12 observations
+        ──────────────────────────────────────────────────────────────────
+             DOF  ΔDOF     SSR    ΔSSR       R²      ΔR²        F*   p(>F)
+        ──────────────────────────────────────────────────────────────────
+        [1]    5        0.1017           0.9685                           
+        [2]    3    -2  0.1283  0.0266   0.9603  -0.0082    1.0456  0.3950
+        [3]    2    -1  3.2292  3.1008  -0.0000  -0.9603  241.6234   <1e-7
+        ──────────────────────────────────────────────────────────────────"""
+
+    @test_throws ArgumentError ftest(mod, bigmod, nullmod)
+    @test_throws ArgumentError ftest(nullmod, bigmod, mod)
+    @test_throws ArgumentError ftest(bigmod, nullmod, mod)
+    mod2 = lm(@formula(Result~Treatment), d[2:end, :]).model
+    @test_throws ArgumentError ftest(mod, mod2)
 end
 
 @testset "F test rounding error" begin