Merge branch 'main' into pa/ls_ci

beacon-biosignals · Dec 11, 2024 · 80f6dea · 80f6dea
2 parents 36197e0 + f443002
commit 80f6dea
Show file tree

Hide file tree

Showing 11 changed files with 175 additions and 36 deletions.
diff --git a/.github/workflows/YASG.yml b/.github/workflows/YASG.yml
@@ -16,22 +16,26 @@ on:
       - 'format/**'
 jobs:
   format-check:
-    name: YASG Enforcement (Julia ${{ matrix.julia-version }} - ${{ github.event_name }})
+    name: YASG Enforcement (Julia ${{ matrix.julia-version }})
     # Run on push's or non-draft PRs
     if: (github.event_name == 'push') || (github.event.pull_request.draft == false)
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        julia-version: [1.6]
+        julia-version: [1.9]
     steps:
       - uses: julia-actions/setup-julia@latest
         with:
           version: ${{ matrix.julia-version }}
       - uses: actions/checkout@v4
+      - name: Cache
+        uses: julia-actions/cache@v2
+        with:
+            cache-compiled: "true"
       - name: Instantiate `format` environment and format
         run: |
-          julia --project=format -e 'using Pkg; Pkg.instantiate()'
-          julia --project=format 'format/run.jl'
+          julia --project=format -O0 -e 'using Pkg; Pkg.instantiate()'
+          julia --project=format -O0 'format/run.jl'
       - uses: reviewdog/action-suggester@v1
         if: github.event_name == 'pull_request'
         with:

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -12,40 +12,34 @@ on:
         - main
 jobs:
   test:
-    name: Julia ${{ matrix.version }} - StatsModels ${{ matrix.statsmodels }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }}
+    name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }}
     runs-on: ${{ matrix.os }}
     strategy:
       fail-fast: false
       matrix:
         version:
           - '1'
-          - '1.6'
+          - 'min'
           - 'nightly'
-        statsmodels:
-          - '0.6'
-          - '0.7'
         os:
           - ubuntu-latest
         arch:
           - x64
           # - x86
     steps:
       - uses: actions/checkout@v4
-      - uses: julia-actions/setup-julia@v1
+      - uses: julia-actions/setup-julia@v2
         with:
           version: ${{ matrix.version }}
           arch: ${{ matrix.arch }}
-      - name: "Install StatsModels"
-        shell: julia --color=yes --project {0}
-        run: |
-          using Pkg
-          Pkg.add(Pkg.PackageSpec(; name="StatsModels", version="${{ matrix.statsmodels }}"))
-      - uses: julia-actions/cache@v1
+      - name: Cache
+        uses: julia-actions/cache@v2
+        with:
+            cache-compiled: "true"
       - uses: julia-actions/julia-buildpkg@v1
       - uses: julia-actions/julia-runtest@v1
       - uses: julia-actions/julia-processcoverage@v1
-      - uses: codecov/codecov-action@v4
+      - uses: codecov/codecov-action@7f8b4b4bde536c465e797be725718b88c5d95e0e # v5.1.1
         with:
           files: lcov.info
           token: ${{ secrets.CODECOV_TOKEN }}
-
diff --git a/.github/workflows/documenter.yml b/.github/workflows/documenter.yml
@@ -12,9 +12,13 @@ jobs:
     runs-on: ubuntu-20.04
     steps:
       - uses: actions/checkout@v4
-      - uses: julia-actions/setup-julia@v1
+      - uses: julia-actions/setup-julia@v2
         with:
-          version: 1.6
+          version: 1
+      - name: Cache
+        uses: julia-actions/cache@v2
+        with:
+            cache-compiled: "true"
       - uses: julia-actions/julia-buildpkg@latest
       - uses: julia-actions/julia-docdeploy@latest
         env:

diff --git a/.github/workflows/spellcheck.yml b/.github/workflows/spellcheck.yml
@@ -11,7 +11,7 @@ jobs:
       - name: Checkout Actions Repository
         uses: actions/checkout@v4
       - name: Check spelling
-        uses: crate-ci/typos@master
+        uses: crate-ci/typos@2872c382bb9668d4baa5eade234dcbc0048ca2cf # v1.28.2
         with:
             config: _typos.toml
             write_changes: true

diff --git a/.gitignore b/.gitignore
@@ -23,3 +23,5 @@ docs/site/
 # environment.
 Manifest.toml
 Manifest-v*.toml
+coverage/lcov.info
+*.cov
diff --git a/README.md b/README.md
@@ -8,7 +8,7 @@ Effects Prediction for Linear and Generalized Linear models
 [![codecov](https://codecov.io/gh/beacon-biosignals/Effects.jl/branch/main/graph/badge.svg?token=AAK9265TXH)](https://codecov.io/gh/beacon-biosignals/Effects.jl)
 [![DOI](https://zenodo.org/badge/323392527.svg)](https://zenodo.org/badge/latestdoi/323392527)
 
-[build-img]: https://github.com/beacon-biosignals/Effects.jl/workflows/CI/badge.svg
+[build-img]: https://github.com/beacon-biosignals/Effects.jl/actions/workflows/ci.yml/badge.svg
 [build-url]: https://github.com/beacon-biosignals/Effects.jl/actions
 
 Regression is a foundational technique of statistical analysis, and many common statistical tests are based on regression models (e.g., ANOVA, t-test, correlation tests, etc.).

diff --git a/docs/Project.toml b/docs/Project.toml
@@ -16,9 +16,8 @@ StatsModels = "3eaba693-59b7-5ba5-a881-562e759f1c8d"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [compat]
-AlgebraOfGraphics = "0.4.5"
+AlgebraOfGraphics = "0.6"
 DataFrames = "1"
 Documenter = "1.3"
 GLM = "1.3"
-StatsModels = "0.6.20"
 julia = "1.6"
diff --git a/docs/src/index.md b/docs/src/index.md
@@ -121,8 +121,8 @@ refgrid[!, :lower] = @. refgrid.weight - 1.96 * refgrid.err
 refgrid[!, :upper] = @. refgrid.weight + 1.96 * refgrid.err
 sort!(refgrid, [:age])
 
-plt = data(refgrid) * mapping(:age, :weight; lower=:lower, upper=:upper, color=:sex) *
-      (visual(Lines) + visual(LinesFill))
+plt = data(refgrid) * mapping(:age, :weight; color=:sex) *
+      (visual(Lines) + mapping(; lower=:lower, upper=:upper) * visual(LinesFill))
 draw(plt)
 ```
 
@@ -149,8 +149,8 @@ refgrid_centered[!, :lower] = @. refgrid_centered.weight - 1.96 * refgrid_center
 refgrid_centered[!, :upper] = @. refgrid_centered.weight + 1.96 * refgrid_centered.err
 sort!(refgrid_centered, [:age])
 
-plt = data(refgrid_centered) * mapping(:age, :weight; lower=:lower, upper=:upper, color=:sex) *
-      (visual(Lines) + visual(LinesFill))
+plt = data(refgrid_centered) * mapping(:age, :weight; color=:sex) *
+      (visual(Lines) + mapping(; lower=:lower, upper=:upper) * visual(LinesFill))
 draw(plt)
 ```
 

diff --git a/ext/EffectsMixedModelsExt.jl b/ext/EffectsMixedModelsExt.jl
@@ -5,10 +5,11 @@ using Effects
 using MixedModels
 using StatsBase
 
-using Effects: typify, _difference_method!, _responsename
+using Effects: typify, _difference_method!,
+               _responsename, _model_link
 using LinearAlgebra: diag
 using StatsModels: modelcols
-using GLM: Link
+using GLM: GLM, Link
 
 Effects._model_link(m::GeneralizedLinearMixedModel, ::AutoInvLink) = Link(m)
 
@@ -29,4 +30,92 @@ function Effects.effects!(reference_grid::DataFrame, model::MixedModel;
     return reference_grid
 end
 
+_invlink(model::MixedModel, invlink) = Base.Fix1(GLM.linkinv, _model_link(model, invlink))
+_invlink(::MixedModel, ::typeof(identity)) = identity
+
+"""
+    effects!(reference_grid::DataFrame, model::MixedModel, boot::MixedModelBootstrap;
+             eff_col=nothing, err_col=:err, typical=mean, invlink=identity,
+             lower_col=:lower, upper_col=:upper, level=nothing)
+
+Use the results of a bootstrap to compute empirical error estimates
+(instead of using the variance-covariance matrix).
+
+!!! warning
+    This method is experimental and may change in its defaults or
+    disappear entirely in a future release!
+"""
+function Effects.effects!(reference_grid::DataFrame, model::MixedModel,
+                          boot::MixedModelBootstrap;
+                          eff_col=nothing, err_col=:err, typical=mean, invlink=identity,
+                          lower_col=:lower, upper_col=:upper,
+                          level=nothing)
+    # right now this is written for a RegressionModel and implicitly assumes
+    # the existence of an appropriate formula method
+    form = formula(model)
+    form_typical = typify(reference_grid, form, modelmatrix(model); typical=typical)
+    # we don't need to worry about pivoting / fixef rank deficiency here
+    # because the default -0.0 placeholder coefs still yield the correct predictions
+    # it's the NaNs in the vcov that create problems, but this method sidesteps
+    # the vcov computation, so we're all good!
+    X = modelcols(form_typical, reference_grid)
+    eff = X * coef(model)
+    # each row is a bootstrap replicate
+    # each column is a different row of X
+    # this seems like a weird way to store things, until you remember
+    # that we aggregate across replicates and thus want to take advantage
+    # of column major storage
+    boot_err = mapreduce(vcat, groupby(DataFrame(boot.β), :iter)) do gdf
+        β = gdf[!, :β]
+        return (X * β)'
+    end
+
+    err = map(std, eachcol(boot_err))
+    _difference_method!(eff, err, model, invlink)
+    reference_grid[!, something(eff_col, _responsename(model))] = eff
+    reference_grid[!, err_col] = err
+
+    # logic here is slightly different than for other methods
+    # because we can compute empirical CIs instead of relying on a Wald
+    # approximation
+    if !isnothing(level)
+        lower_tail = (1 - level) / 2
+        upper_tail = 1 - lower_tail
+
+        ci = map(eachcol(boot_err)) do col
+            return quantile(col, [lower_tail, upper_tail])
+        end
+
+        invlink = _invlink(model, invlink)
+        reference_grid[!, lower_col] = invlink.(first.(ci))
+        reference_grid[!, upper_col] = invlink.(last.(ci))
+    end
+    return reference_grid
+end
+
+"""
+    effects(design::AbstractDict, model::MixedModel, boot::MixedModelBootstrap;
+            eff_col=nothing, err_col=:err, typical=mean,
+            lower_col=:lower, upper_col=:upper, invlink=identity,
+            level=nothing)
+
+Use the results of a bootstrap to compute empirical error estimates
+(instead of using the variance-covariance matrix).
+
+!!! warning
+    This method is experimental and may change in its defaults or
+    disappear entirely in a future release!
+"""
+function Effects.effects(design::AbstractDict, model::MixedModel, boot::MixedModelBootstrap;
+                         eff_col=nothing, err_col=:err, typical=mean,
+                         lower_col=:lower, upper_col=:upper, invlink=identity,
+                         level=nothing)
+    grid = expand_grid(design)
+    dv = something(eff_col, _responsename(model))
+    level = isnothing(level) ? 0.68 : level
+    effects!(grid, model, boot; eff_col=dv, err_col, typical, invlink, level, lower_col,
+             upper_col)
+    return grid
+end
+
 end # module
diff --git a/src/regressionmodel.jl b/src/regressionmodel.jl
@@ -10,7 +10,7 @@ determined from the model type.
 !!! compat "Julia 1.9"
     Automatic inverse link determination is implemented using package
     extensions, which are available beginning in Julia 1.9.
-    
+
 An error is thrown if the inverse link cannot be determined. This will
 always occur with Julia versions prior to 1.9, and will otherwise occur
 when no extension has been loaded that specifies the link function for
@@ -143,7 +143,7 @@ _invlink_and_deriv(invlink, η) = (invlink(η), ForwardDiff.derivative(invlink,
 _invlink_and_deriv(::typeof(identity), η) = (η, 1)
 # this isn't the best name because it sometimes returns the inverse link and sometimes the link (Link())
 # for now, this is private API, but we should see how this goes and whether we can make it public API
-# so local extensions (instead of Package-Extensions) are better supported 
+# so local extensions (instead of Package-Extensions) are better supported
 _model_link(::RegressionModel, invlink::Function) = invlink
 function _model_link(model::RegressionModel, ::AutoInvLink)
     msg = string("cannot automatically determine inverse link for models ",

diff --git a/test/mixedmodels.jl b/test/mixedmodels.jl
@@ -7,23 +7,32 @@ using Test
 
 rng = StableRNG(42)
 x = rand(rng, 100)
-data = (x=x, x2=1.5 .* x, y=rand(rng, [0, 1], 100), z=repeat('A':'T', 5))
-model = @suppress fit(MixedModel, @formula(y ~ x + x2 + (1 | z)), data; progress=false)
+a = rand(rng, 100)
+data = (; x, x2=1.5 .* x, a, y=rand(rng, [0, 1], 100), g=repeat('A':'T', 5))
+model = @suppress fit(MixedModel, @formula(y ~ a + x + x2 + (1 | g)), data; progress=false)
 dropped_idx = model.feterm.piv[end]
 dropped_coef = coefnames(model)[dropped_idx]
 kept_coef = last(fixefnames(model))
 
 # due to the vagaries of numerical linear algebra and pivoting, it's
 # not completely deterministic which of x and x2 gets dropped, though it
 # will tend to be x2
-design = Dict(Symbol(kept_coef) => [0])
+design = Dict(Symbol(kept_coef) => [1], :a => [2])
 eff = effects(design, model)
 @test dropped_coef ∉ names(eff)
 @test kept_coef ∈ names(eff)
-@test !isnan(only(eff.err))
+@test all(!isnan, eff.err)
 @test eff.y - eff.err ≈ eff.lower
 @test eff.y + eff.err ≈ eff.upper
 
+β = fixef(model)
+# note that in the design above, we had a = 2
+# β[1] = intercept
+# β[2] = a, multiply by 2 because that's what we put in the design
+# β[3] = whatever of x|x2 wasn't pivoted out, "multiply by 1" (ie do nothing)
+#        because we had keep_coef = 1 in the design
+@test only(eff.y) ≈ β[1] + 2 * β[2] + β[3]
+
 # there is one bit of weirdness -- the removed coefficients behave like any other
 # variable in the "design" that is missing from the model.
 design = Dict(Symbol(dropped_coef) => [0])
@@ -33,3 +42,41 @@ design = Dict(:q => [0])
 eff_not_present = effects(design, model)
 
 @test Matrix(eff_dropped) ≈ Matrix(eff_not_present)
+
+# bootstrap
+design = Dict(Symbol(kept_coef) => [1], :a => [2])
+eff = effects(design, model)
+
+boot = @suppress parametricbootstrap(StableRNG(666), 1000, model)
+bootdf = unstack(DataFrame(boot.β), :coefname, :β)
+# make sure that the bootstrap is correctly zeroing out
+@test all(isequal(-0.0), bootdf[!, dropped_coef])
+
+# now make sure the bootstrap gives approximately the same results
+eff_boot = effects(design, model, boot)
+@test eff_boot.y ≈ eff.y
+@test eff_boot.err ≈ eff.err atol = 0.005
+
+# test intervals and multiple rows in refgrid
+design = Dict(Symbol(kept_coef) => 1:2, :a => [2])
+eff95 = effects(design, model; level=0.95)
+eff_boot95 = effects(design, model, boot; level=0.95)
+@test eff_boot95.y ≈ eff95.y
+@test eff_boot95.lower ≈ eff95.lower atol = 0.1
+@test eff_boot95.upper ≈ eff95.upper atol = 0.1
+
+glmm = @suppress fit(MixedModel,
+                     @formula(use ~ 1 + age + abs2(age) + livch + urban + (1 | dist)),
+                     MixedModels.dataset(:contra),
+                     Bernoulli(); fast=true)
+
+gboot = @suppress parametricbootstrap(StableRNG(345), 250, glmm)
+
+design = Dict(:age => -1:1:1)
+geff_boot = effects(design, glmm, gboot; eff_col="y", invlink=AutoInvLink(), level=0.95)
+geff = effects(design, glmm; eff_col="y", invlink=AutoInvLink(), level=0.95)
+
+@test geff_boot.y ≈ geff.y
+@test geff_boot.err ≈ geff.err atol = 0.05
+@test geff_boot.lower ≈ geff.lower atol = 0.05
+@test geff_boot.upper ≈ geff.upper atol = 0.05