Skip to content

Commit

Permalink
rework passing and handling of FE coefficients to simulate! for ran…
Browse files Browse the repository at this point in the history
…k deficient models (#778)

* rework passing and handling of FE coefficients to `simulate`! for rank deficient models

* add test

* NEWS

* version bump
  • Loading branch information
palday authored Aug 2, 2024
1 parent 918457f commit 19b90aa
Show file tree
Hide file tree
Showing 5 changed files with 62 additions and 25 deletions.
7 changes: 6 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
MixedModels v4.25.3 Release Notes
==============================
- Fix a bug in the handling of rank deficiency in the `simulate[!]` code. This has important correctness implications for bootstrapping models with rank-deficient fixed effects (as can happen in the case of partial crossing of the fixed effects / missing cells). [#778]

MixedModels v4.25.2 Release Notes
==============================
- Use `public` keyword so that users don't see unnecessary docstring warnings on 1.11+. [#776]
- Fix accidental export of `dataset` and `datasets` and make them `public` instead. [#776]

MixedModels v4.25.1 Release Notes
==============================
- Use more sophisticated checks on property names in `restoreoptsum` to allow for optsums saved by pre-v4.25 versions to be used with this version and later. [#775]
- Use more sophisticated checks on property names in `restoreoptsum` to allow for optsums saved by pre-v4.25 versions to be used with this version and later. [#775]

MixedModels v4.25 Release Notes
==============================
Expand Down Expand Up @@ -549,3 +553,4 @@ Package dependencies
[#773]: https://github.com/JuliaStats/MixedModels.jl/issues/773
[#775]: https://github.com/JuliaStats/MixedModels.jl/issues/775
[#776]: https://github.com/JuliaStats/MixedModels.jl/issues/776
[#778]: https://github.com/JuliaStats/MixedModels.jl/issues/778
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "MixedModels"
uuid = "ff71e718-51f3-5ec2-a782-8ffcbfa3c316"
author = ["Phillip Alday <[email protected]>", "Douglas Bates <[email protected]>", "Jose Bayoan Santiago Calderon <[email protected]>"]
version = "4.25.2"
version = "4.25.3"

[deps]
Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45"
Expand Down
15 changes: 9 additions & 6 deletions src/bootstrap.jl
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ end

"""
parametricbootstrap([rng::AbstractRNG], nsamp::Integer, m::MixedModel{T}, ftype=T;
β = coef(m), σ = m.σ, θ = m.θ, progress=true, optsum_overrides=(;))
β = fixef(m), σ = m.σ, θ = m.θ, progress=true, optsum_overrides=(;))
Perform `nsamp` parametric bootstrap replication fits of `m`, returning a `MixedModelBootstrap`.
Expand Down Expand Up @@ -214,7 +214,7 @@ function parametricbootstrap(
n::Integer,
morig::MixedModel{T},
ftype::Type{<:AbstractFloat}=T;
β::AbstractVector=coef(morig),
β::AbstractVector=fixef(morig),
σ=morig.σ,
θ::AbstractVector=morig.θ,
use_threads::Bool=false,
Expand All @@ -232,9 +232,13 @@ function parametricbootstrap(
if σ !== missing
σ = T(σ)
end
β, θ = convert(Vector{T}, β), convert(Vector{T}, θ)
βsc, θsc = similar(ftype.(β)), similar(ftype.(θ))
p, k = length(β), length(θ)
β = convert(Vector{T}, β)
θ = convert(Vector{T}, θ)
# scratch -- note that this is the length of the unpivoted coef vector
βsc = coef(morig)
θsc = zeros(ftype, length(θ))
p = length(βsc)
k = length(θsc)
m = deepcopy(morig)
for (key, val) in pairs(optsum_overrides)
setfield!(m.optsum, key, val)
Expand All @@ -251,7 +255,6 @@ function parametricbootstrap(
samp = replicate(n; progress) do
simulate!(rng, m; β, σ, θ)
refit!(m; progress=false)
# @info "" m.optsum.feval
(
objective=ftype.(m.objective),
σ=ismissing(m.σ) ? missing : ftype(m.σ),
Expand Down
34 changes: 18 additions & 16 deletions src/simulate.jl
Original file line number Diff line number Diff line change
Expand Up @@ -18,29 +18,33 @@ function simulate(m::MixedModel, args...; kwargs...)
end

"""
simulate!(rng::AbstractRNG, m::MixedModel{T}; β=m.β, σ=m.σ, θ=T[])
simulate!(m::MixedModel; β=m.β, σ=m.σ, θ=m.θ)
simulate!(rng::AbstractRNG, m::MixedModel{T}; β=fixef(m), σ=m.σ, θ=T[])
simulate!(m::MixedModel; β=fixef(m), σ=m.σ, θ=m.θ)
Overwrite the response (i.e. `m.trms[end]`) with a simulated response vector from model `m`.
This simulation includes sampling new values for the random effects.
`β` can be specified either as a pivoted, full rank coefficient vector (cf. [`fixef`](@ref))
or as an unpivoted full dimension coefficient vector (cf. [`coef`](@ref)), where the entries
corresponding to redundant columns will be ignored.
!!! note
Note that `simulate!` methods with a `y::AbstractVector` as the first argument
(besides the RNG) and `simulate` methods return the simulated response. This is
in contrast to `simulate!` methods with a `m::MixedModel` as the first argument,
which modify the model's response and return the entire modified model.
"""
function simulate!(
rng::AbstractRNG, m::LinearMixedModel{T}; β=coef(m), σ=m.σ, θ=T[]
rng::AbstractRNG, m::LinearMixedModel{T}; β=fixef(m), σ=m.σ, θ=T[]
) where {T}
# XXX should we add support for doing something with weights?
simulate!(rng, m.y, m; β, σ, θ)
return unfit!(m)
end

function simulate!(
rng::AbstractRNG, m::GeneralizedLinearMixedModel{T}; β=coef(m), σ=m.σ, θ=T[]
rng::AbstractRNG, m::GeneralizedLinearMixedModel{T}; β=fixef(m), σ=m.σ, θ=T[]
) where {T}
# note that these m.resp.y and m.LMM.y will later be synchronized in (re)fit!()
# but for now we use them as distinct scratch buffers to avoid allocations
Expand Down Expand Up @@ -85,7 +89,7 @@ function _rand(rng::AbstractRNG, d::Distribution, location, scale=NaN, n=1)
return rand(rng, dist) / n
end

function simulate!(m::MixedModel{T}; β=coef(m), σ=m.σ, θ=T[]) where {T}
function simulate!(m::MixedModel{T}; β=fixef(m), σ=m.σ, θ=T[]) where {T}
return simulate!(Random.GLOBAL_RNG, m; β, σ, θ)
end

Expand Down Expand Up @@ -121,7 +125,7 @@ function simulate!(
y::AbstractVector,
m::LinearMixedModel,
newdata::Tables.ColumnTable;
β=m.β,
β=fixef(m),
σ=m.σ,
θ=m.θ,
)
Expand All @@ -147,20 +151,18 @@ function simulate!(
end

function simulate!(
rng::AbstractRNG, y::AbstractVector, m::LinearMixedModel{T}; β=m.β, σ=m.σ, θ=m.θ
rng::AbstractRNG, y::AbstractVector, m::LinearMixedModel{T}; β=fixef(m), σ=m.σ, θ=m.θ
) where {T}
length(β) == length(pivot(m)) || length(β) == m.feterm.rank ||
length(β) == length(pivot(m)) || length(β) == rank(m) ||
throw(ArgumentError("You must specify all (non-singular) βs"))

β = convert(Vector{T}, β)
σ = T(σ)
θ = convert(Vector{T}, θ)
isempty(θ) || setθ!(m, θ)

if length(β) length(pivot(m))
padding = length(pivot(m)) - rank(m)
append!(β, fill(-0.0, padding))
invpermute!(β, pivot(m))
if length(β) == length(pivot(m))
β = view(view(β, pivot(m)), 1:rank(m))
end

# initialize y to standard normal
Expand All @@ -172,15 +174,15 @@ function simulate!(
end

# scale by σ and add fixed-effects contribution
return mul!(y, m.X, β, one(T), σ)
return mul!(y, fullrankx(m), β, one(T), σ)
end

function simulate!(
rng::AbstractRNG,
y::AbstractVector,
m::GeneralizedLinearMixedModel,
newdata::Tables.ColumnTable;
β=m.β,
β=fixef(m),
σ=m.σ,
θ=m.θ,
)
Expand Down Expand Up @@ -209,7 +211,7 @@ function simulate!(
rng::AbstractRNG,
y::AbstractVector,
m::GeneralizedLinearMixedModel{T};
β=m.β,
β=fixef(m),
σ=m.σ,
θ=m.θ,
) where {T}
Expand Down Expand Up @@ -250,7 +252,7 @@ function _simulate!(

if length(β) == length(pivot(m))
# unlike LMM, GLMM stores the truncated, pivoted vector directly
β = view(β, view(pivot(m), 1:(rank(m))))
β = view(view(β, pivot(m)), 1:rank(m))
end
fast = (length(m.θ) == length(m.optsum.final))
setpar! = fast ? setθ! : setβθ!
Expand Down
29 changes: 28 additions & 1 deletion test/bootstrap.jl
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ end
rng = MersenneTwister(0);
x = rand(rng, 100);
data = (x = x, x2 = 1.5 .* x, y = rand(rng, [0,1], 100), z = repeat('A':'T', 5))
@testset "$family" for family in [Normal(), Bernoulli()]
@testset "$family" for family in [Normal(), Bernoulli()]
model = @suppress fit(MixedModel, @formula(y ~ x + x2 + (1|z)), data, family; progress=false)
boot = quickboot(model, 10)

Expand All @@ -242,6 +242,33 @@ end
yf = simulate(StableRNG(1), model; β=fixef(model))
@test all(x -> isapprox(x...), zip(yc, yf))
end

@testset "partial crossing" begin
id = lpad.(string.(1:40), 2, "0")
B = ["b0", "b1", "b2"]
C = ["c0", "c1", "c2", "c3", "c4"]
df = DataFrame(reshape(collect(Iterators.product(B, C, id)), :), [:b, :c, :id])
df[!, :y] .= 0
filter!(df) do row
b = last(row.b)
c = last(row.c)
return b != c
end

m = LinearMixedModel(@formula(y ~ 1 + b * c + (1|id)), df)
β = 1:rank(m)
σ = 1
simulate!(StableRNG(628), m; β, σ)
fit!(m)

boot = parametricbootstrap(StableRNG(271828), 1000, m);
bootci = DataFrame(shortestcovint(boot))
filter!(:group => ismissing, bootci)
select!(bootci, :names => disallowmissing => :coef, :lower, :upper)
transform!(bootci, [:lower, :upper] => ByRow(middle) => :mean)

@test all(x -> isapprox(x[1], x[2]; atol=0.1), zip(coef(m), bootci.mean))
end
end
end

Expand Down

2 comments on commit 19b90aa

@palday
Copy link
Member Author

@palday palday commented on 19b90aa Aug 2, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/112308

Tip: Release Notes

Did you know you can add release notes too? Just add markdown formatted text underneath the comment after the text
"Release notes:" and it will be added to the registry PR, and if TagBot is installed it will also be added to the
release that TagBot creates. i.e.

@JuliaRegistrator register

Release notes:

## Breaking changes

- blah

To add them here just re-invoke and the PR will be updated.

Tagging

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v4.25.3 -m "<description of version>" 19b90aa0384100fa9c3a38db631afbb89287b848
git push origin v4.25.3

Please sign in to comment.