From f03c65b8434cba111240427a974011b9ca08d4bc Mon Sep 17 00:00:00 2001 From: Phillip Alday Date: Thu, 7 Nov 2024 22:29:18 -0600 Subject: [PATCH] restore and save optsum for GLMM (#791) * start work on restoreoptsum for glmm * unfit! now wipes more state * restore and save optsum for GLMM * NEWS, version bump * Blue Style * undo change to LMM? * news tweak * He's always watching your style Co-authored-by: Alex Arslan * in the dark Co-authored-by: Alex Arslan * tweaks Co-authored-by: Alex Arslan * test update * :Facepalm: --- NEWS.md | 6 ++ Project.toml | 2 +- src/generalizedlinearmixedmodel.jl | 6 +- src/optsummary.jl | 6 ++ src/serialization.jl | 93 +++++++++++++++++++++++------- test/pirls.jl | 26 +++++++++ test/pls.jl | 4 +- 7 files changed, 117 insertions(+), 26 deletions(-) diff --git a/NEWS.md b/NEWS.md index 84cae83e9..5fc9f88f7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,8 @@ +MixedModels v4.27.0 Release Notes +============================== +- `saveoptsum` and `restoreoptsum!` now support `GeneralizedLinearMixedModel`s [#791] +- `unfit!(::GeneralizedLinearMixedModel)` (called internally by `refit!`) now does a better job of fully resetting the model state [#791] + MixedModels v4.26.1 Release Notes ============================== - lower and upper edges of profile confidence intervals for REML-fitted models are no longer flipped [#785] @@ -569,3 +574,4 @@ Package dependencies [#778]: https://github.com/JuliaStats/MixedModels.jl/issues/778 [#783]: https://github.com/JuliaStats/MixedModels.jl/issues/783 [#785]: https://github.com/JuliaStats/MixedModels.jl/issues/785 +[#791]: https://github.com/JuliaStats/MixedModels.jl/issues/791 diff --git a/Project.toml b/Project.toml index a08cd0c5f..42348b1eb 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "MixedModels" uuid = "ff71e718-51f3-5ec2-a782-8ffcbfa3c316" author = ["Phillip Alday ", "Douglas Bates ", "Jose Bayoan Santiago Calderon "] -version = "4.26.1" +version = "4.27.0" [deps] Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45" diff --git a/src/generalizedlinearmixedmodel.jl b/src/generalizedlinearmixedmodel.jl index cfc06a778..8cf23aedd 100644 --- a/src/generalizedlinearmixedmodel.jl +++ b/src/generalizedlinearmixedmodel.jl @@ -767,7 +767,6 @@ function stderror!(v::AbstractVector{T}, m::GeneralizedLinearMixedModel{T}) wher end function unfit!(model::GeneralizedLinearMixedModel{T}) where {T} - deviance!(model, 1) reevaluateAend!(model.LMM) reterms = model.LMM.reterms @@ -775,11 +774,14 @@ function unfit!(model::GeneralizedLinearMixedModel{T}) where {T} # we need to reset optsum so that it # plays nice with the modifications fit!() does optsum.lowerbd = mapfoldl(lowerbd, vcat, reterms) - optsum.initial = mapfoldl(getθ, vcat, reterms) + # for variances (bounded at zero), we have ones, while + # for everything else (bounded at -Inf), we have zeros + optsum.initial = map(T ∘ iszero, optsum.lowerbd) optsum.final = copy(optsum.initial) optsum.xtol_abs = fill!(copy(optsum.initial), 1.0e-10) optsum.initial_step = T[] optsum.feval = -1 + deviance!(model, 1) return model end diff --git a/src/optsummary.jl b/src/optsummary.jl index 2ee69144c..864f09cd6 100644 --- a/src/optsummary.jl +++ b/src/optsummary.jl @@ -162,3 +162,9 @@ function _check_nlopt_return(ret, failure_modes=_NLOPT_FAILURE_MODES) @warn("NLopt optimization failure: $ret") end end + +function Base.:(==)(o1::OptSummary{T}, o2::OptSummary{T}) where {T} + return all(fieldnames(OptSummary)) do fn + return getfield(o1, fn) == getfield(o2, fn) + end +end diff --git a/src/serialization.jl b/src/serialization.jl index 52ec1dafe..708504c18 100644 --- a/src/serialization.jl +++ b/src/serialization.jl @@ -1,15 +1,78 @@ """ - restoreoptsum!(m::LinearMixedModel, io::IO; atol::Real=0, rtol::Real=atol>0 ? 0 : √eps) - restoreoptsum!(m::LinearMixedModel, filename; atol::Real=0, rtol::Real=atol>0 ? 0 : √eps) + restoreoptsum!(m::MixedModel, io::IO; atol::Real=0, rtol::Real=atol>0 ? 0 : √eps) + restoreoptsum!(m::MixedModel, filename; atol::Real=0, rtol::Real=atol>0 ? 0 : √eps) Read, check, and restore the `optsum` field from a JSON stream or filename. """ +function restoreoptsum!(m::MixedModel, filename; kwargs...) + return open(filename, "r") do io + return restoreoptsum!(m, io; kwargs...) + end +end + function restoreoptsum!( m::LinearMixedModel{T}, io::IO; atol::Real=zero(T), rtol::Real=atol > 0 ? zero(T) : √eps(T), +) where {T} + dict = JSON3.read(io) + ops = restoreoptsum!(m.optsum, dict) + for (par, obj_at_par) in (:initial => :finitial, :final => :fmin) + if !isapprox( + objective(updateL!(setθ!(m, getfield(ops, par)))), getfield(ops, obj_at_par); + rtol, atol, + ) + throw( + ArgumentError( + "model at $par does not match stored $obj_at_par within atol=$atol, rtol=$rtol" + ), + ) + end + end + return m +end + +function restoreoptsum!( + m::GeneralizedLinearMixedModel{T}, io::IO; atol::Real=zero(T), + rtol::Real=atol > 0 ? zero(T) : √eps(T), ) where {T} dict = JSON3.read(io) ops = m.optsum + + # need to accommodate fast and slow fits + resize!(ops.initial, length(dict.initial)) + resize!(ops.final, length(dict.final)) + + theta_beta_len = length(m.θ) + length(m.β) + if length(dict.initial) == theta_beta_len # fast=false + if length(ops.lowerbd) == length(m.θ) + prepend!(ops.lowerbd, fill(-Inf, length(m.β))) + end + setpar! = setβθ! + varyβ = false + else # fast=true + setpar! = setθ! + varyβ = true + if length(ops.lowerbd) != length(m.θ) + deleteat!(ops.lowerbd, 1:length(m.β)) + end + end + restoreoptsum!(ops, dict) + for (par, obj_at_par) in (:initial => :finitial, :final => :fmin) + if !isapprox( + deviance(pirls!(setpar!(m, getfield(ops, par)), varyβ), dict.nAGQ), + getfield(ops, obj_at_par); rtol, atol, + ) + throw( + ArgumentError( + "model at $par does not match stored $obj_at_par within atol=$atol, rtol=$rtol" + ), + ) + end + end + return m +end + +function restoreoptsum!(ops::OptSummary{T}, dict::AbstractDict) where {T} allowed_missing = ( :lowerbd, # never saved, -Inf not allowed in JSON :xtol_zero_abs, # added in v4.25.0 @@ -27,7 +90,9 @@ function restoreoptsum!( if length(setdiff(allowed_missing, keys(dict))) > 1 # 1 because :lowerbd @warn "optsum was saved with an older version of MixedModels.jl: consider resaving." end + if any(ops.lowerbd .> dict.initial) || any(ops.lowerbd .> dict.final) + @debug "" ops.lowerbd dict.initial dict.final throw(ArgumentError("initial or final parameters in io do not satisfy lowerbd")) end for fld in (:feval, :finitial, :fmin, :ftol_rel, :ftol_abs, :maxfeval, :nAGQ, :REML) @@ -37,13 +102,6 @@ function restoreoptsum!( ops.xtol_rel = copy(dict.xtol_rel) copyto!(ops.initial, dict.initial) copyto!(ops.final, dict.final) - for (v, f) in (:initial => :finitial, :final => :fmin) - if !isapprox( - objective(updateL!(setθ!(m, getfield(ops, v)))), getfield(ops, f); rtol, atol - ) - throw(ArgumentError("model m at $v does not give stored $f")) - end - end ops.optimizer = Symbol(dict.optimizer) ops.returnvalue = Symbol(dict.returnvalue) # compatibility with fits saved before the introduction of various extensions @@ -59,30 +117,23 @@ function restoreoptsum!( else [(convert(Vector{T}, first(entry)), T(last(entry))) for entry in fitlog] end - return m -end - -function restoreoptsum!(m::LinearMixedModel{T}, filename; kwargs...) where {T} - open(filename, "r") do io - restoreoptsum!(m, io; kwargs...) - end + return ops end """ - saveoptsum(io::IO, m::LinearMixedModel) - saveoptsum(filename, m::LinearMixedModel) + saveoptsum(io::IO, m::MixedModel) + saveoptsum(filename, m::MixedModel) Save `m.optsum` (w/o the `lowerbd` field) in JSON format to an IO stream or a file The reason for omitting the `lowerbd` field is because it often contains `-Inf` values that are not allowed in JSON. """ -saveoptsum(io::IO, m::LinearMixedModel) = JSON3.write(io, m.optsum) -function saveoptsum(filename, m::LinearMixedModel) +saveoptsum(io::IO, m::MixedModel) = JSON3.write(io, m.optsum) +function saveoptsum(filename, m::MixedModel) open(filename, "w") do io saveoptsum(io, m) end end -# TODO: write methods for GLMM # TODO, maybe: something nice for the MixedModelBootstrap diff --git a/test/pirls.jl b/test/pirls.jl index 96a005033..6b31245fb 100644 --- a/test/pirls.jl +++ b/test/pirls.jl @@ -239,3 +239,29 @@ end @test isapprox(first(gm5.β), -0.13860166843315044, atol=1.e-3) @test isapprox(last(gm5.β), -0.034414458364713504, atol=1.e-3) end + +@testset "GLMM saveoptsum" begin + cbpp = dataset(:cbpp) + gm_original = GeneralizedLinearMixedModel(first(gfms[:cbpp]), cbpp, Binomial(); wts=cbpp.hsz) + gm_restored = GeneralizedLinearMixedModel(first(gfms[:cbpp]), cbpp, Binomial(); wts=cbpp.hsz) + fit!(gm_original; progress=false, nAGQ=1) + + io = IOBuffer() + + saveoptsum(seekstart(io), gm_original) + restoreoptsum!(gm_restored, seekstart(io)) + @test gm_original.optsum == gm_restored.optsum + @test deviance(gm_original) ≈ deviance(gm_restored) + + refit!(gm_original; progress=false, nAGQ=3) + saveoptsum(seekstart(io), gm_original) + restoreoptsum!(gm_restored, seekstart(io)) + @test gm_original.optsum == gm_restored.optsum + @test deviance(gm_original) ≈ deviance(gm_restored) + + refit!(gm_original; progress=false, fast=true) + saveoptsum(seekstart(io), gm_original) + restoreoptsum!(gm_restored, seekstart(io)) + @test gm_original.optsum == gm_restored.optsum + @test deviance(gm_original) ≈ deviance(gm_restored) +end diff --git a/test/pls.jl b/test/pls.jl index cc84b296e..98d239ec3 100644 --- a/test/pls.jl +++ b/test/pls.jl @@ -530,8 +530,8 @@ end fm_mod = deepcopy(fm) fm_mod.optsum.fmin += 1 saveoptsum(seekstart(io), fm_mod) - @test_throws(ArgumentError("model m at final does not give stored fmin"), - restoreoptsum!(m, seekstart(io))) + @test_throws(ArgumentError("model at final does not match stored fmin within atol=0.0, rtol=1.0e-8"), + restoreoptsum!(m, seekstart(io); atol=0.0, rtol=1e-8)) restoreoptsum!(m, seekstart(io); atol=1) @test m.optsum.fmin - fm.optsum.fmin ≈ 1