diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 00000000..2c63c085 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,2 @@ +{ +} diff --git a/clusters_clt.tex b/clusters_clt.tex new file mode 100644 index 00000000..bab96e09 --- /dev/null +++ b/clusters_clt.tex @@ -0,0 +1,24 @@ +\setlength{\LTpost}{0mm} +\begin{longtable}{l|rrrrrrrr} +\toprule +\multicolumn{1}{l}{} & \multicolumn{2}{c}{\emph{G} = 50} & \multicolumn{2}{c}{\emph{G} = 100} & \multicolumn{2}{c}{\emph{G} = 150} & \multicolumn{2}{c}{\emph{G} = 200} \\ +\cmidrule(lr){2-3} \cmidrule(lr){4-5} \cmidrule(lr){6-7} \cmidrule(lr){8-9} +\multicolumn{1}{l}{} & \(\frac{\sqrt{G}\bar{X}_{n}}{\bar{\sigma}_{n}}\) & \(\frac{\sqrt{G}\bar{X}_{n}}{\hat{\sigma}_{n}}\) & \(\frac{\sqrt{G}\bar{X}_{n}}{\bar{\sigma}_{n}}\) & \(\frac{\sqrt{G}\bar{X}_{n}}{\hat{\sigma}_{n}}\) & \(\frac{\sqrt{G}\bar{X}_{n}}{\bar{\sigma}_{n}}\) & \(\frac{\sqrt{G}\bar{X}_{n}}{\hat{\sigma}_{n}}\) & \(\frac{\sqrt{G}\bar{X}_{n}}{\bar{\sigma}_{n}}\) & \(\frac{\sqrt{G}\bar{X}_{n}}{\hat{\sigma}_{n}}\) \\ +\midrule\addlinespace[2.5pt] +\multicolumn{9}{l}{\(\alpha=1.5, \beta=1.9\)} \\ +\midrule\addlinespace[2.5pt] +10\% & $0.03$ & $0.06$ & $0.04$ & $0.03$ & $0.03$ & $0.04$ & $0.05$ & $0.06$ \\ +5\% & $0.02$ & $0.03$ & $0.02$ & $0.01$ & $0.00$ & $0.01$ & $0.04$ & $0.06$ \\ +1\% & $0.01$ & $0.00$ & $0.01$ & $0.00$ & $0.00$ & $0.01$ & $0.01$ & $0.02$ \\ +\midrule\addlinespace[2.5pt] +\multicolumn{9}{l}{\(\alpha=1.5, \beta=2.1\)} \\ +\midrule\addlinespace[2.5pt] +10\% & $0.05$ & $0.03$ & $0.03$ & $0.08$ & $0.03$ & $0.03$ & $0.02$ & $0.08$ \\ +5\% & $0.02$ & $0.02$ & $0.01$ & $0.05$ & $0.02$ & $0.01$ & $0.02$ & $0.05$ \\ +1\% & $0.01$ & $0.00$ & $0.00$ & $0.02$ & $0.00$ & $0.00$ & $0.00$ & $0.01$ \\ +\bottomrule +\end{longtable} +\begin{minipage}{\linewidth} +The table shows the rejection rates for \(G=\{50,100,150,200\}\) and \(\alpha=1.5\) and \(\beta=1.9\) and \(\beta=2.1\). The Monte Carlo is based on 10,000 simulations. The simulation standard errors are: 0.009 for \(\alpha=10\%\), 0.007 for \(\alpha=5\%\), and 0.0031 for \(\alpha=1\%\).\\ +\end{minipage} + diff --git a/docs/Project.toml b/docs/Project.toml index a6e35558..36455e8a 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -1,15 +1,20 @@ [deps] +CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597" DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" +GLM = "38e38edf-8417-5370-95a0-9cbb8c7f171a" Optim = "429524aa-4258-5aef-a3af-852621145aeb" +RCall = "6f49c342-dc21-5d91-9882-a32aef131414" RDatasets = "ce6b1742-4840-55fa-b093-852dadbb1d8b" +Revise = "295af30f-e4ad-537b-8983-00126c2a3abe" StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" +StatsFuns = "4c63d2b9-4356-54db-8cca-17b64c39e42c" StatsModels = "3eaba693-59b7-5ba5-a881-562e759f1c8d" [compat] DataFrames = "1" Documenter = "1" -Optim = "1.6.2" \ No newline at end of file +Optim = "1.6.2" diff --git a/docs/src/api.md b/docs/src/api.md index 89765067..99ff2a24 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -2,7 +2,7 @@ ```@meta DocTestSetup = quote - using CategoricalArrays, DataFrames, Distributions, GLM, RDatasets + using CategoricalArrays, DataFrames, Distributions, GLM, RDatasets, StableRNGs end ``` diff --git a/docs/src/examples.md b/docs/src/examples.md index 1711ff7d..a2965282 100644 --- a/docs/src/examples.md +++ b/docs/src/examples.md @@ -61,7 +61,7 @@ julia> dof(ols) 3 julia> dof_residual(ols) -1.0 +1 julia> round(aic(ols); digits=5) 5.84252 @@ -214,8 +214,8 @@ sales ^ 2 -6.94594e-9 3.72614e-9 -1.86 0.0725 -1.45667e-8 6.7487e-10 ```jldoctest julia> data = DataFrame(X=[1,2,2], Y=[1,0,1]) 3×2 DataFrame - Row │ X Y - │ Int64 Int64 + Row │ X Y + │ Int64 Int64 ─────┼────────────── 1 │ 1 1 2 │ 2 0 @@ -319,8 +319,8 @@ julia> using GLM, RDatasets julia> form = dataset("datasets", "Formaldehyde") 6×2 DataFrame - Row │ Carb OptDen - │ Float64 Float64 + Row │ Carb OptDen + │ Float64 Float64 ─────┼────────────────── 1 │ 0.1 0.086 2 │ 0.3 0.269 @@ -473,8 +473,8 @@ julia> dobson = DataFrame(Counts = [18.,17,15,20,10,21,25,13,13], Outcome = categorical([1,2,3,1,2,3,1,2,3]), Treatment = categorical([1,1,1,2,2,2,3,3,3])) 9×3 DataFrame - Row │ Counts Outcome Treatment - │ Float64 Cat… Cat… + Row │ Counts Outcome Treatment + │ Float64 Cat… Cat… ─────┼───────────────────────────── 1 │ 18.0 1 1 2 │ 17.0 2 1 @@ -510,32 +510,11 @@ julia> round(deviance(gm1), digits=5) In this example, we choose the best model from a set of λs, based on minimum BIC. -```jldoctest +```jldoctest; filter = r"(\d*)\.(\d{7})\d+" => s"\1.\2***" julia> using GLM, RDatasets, StatsBase, DataFrames, Optim -julia> trees = DataFrame(dataset("datasets", "trees")) -31×3 DataFrame - Row │ Girth Height Volume - │ Float64 Int64 Float64 -─────┼────────────────────────── - 1 │ 8.3 70 10.3 - 2 │ 8.6 65 10.3 - 3 │ 8.8 63 10.2 - 4 │ 10.5 72 16.4 - 5 │ 10.7 81 18.8 - 6 │ 10.8 83 19.7 - 7 │ 11.0 66 15.6 - 8 │ 11.0 75 18.2 - ⋮ │ ⋮ ⋮ ⋮ - 25 │ 16.3 77 42.6 - 26 │ 17.3 81 55.4 - 27 │ 17.5 82 55.7 - 28 │ 17.9 80 58.3 - 29 │ 18.0 80 51.5 - 30 │ 18.0 80 51.0 - 31 │ 20.6 87 77.0 - 16 rows omitted - +julia> trees = DataFrame(dataset("datasets", "trees")); + julia> bic_glm(λ) = bic(glm(@formula(Volume ~ Height + Girth), trees, Normal(), PowerLink(λ))); julia> optimal_bic = optimize(bic_glm, -1.0, 1.0); @@ -554,9 +533,9 @@ Coefficients: ──────────────────────────────────────────────────────────────────────────── (Intercept) -1.07586 0.352543 -3.05 0.0023 -1.76684 -0.384892 Height 0.0232172 0.00523331 4.44 <1e-05 0.0129601 0.0334743 -Girth 0.242837 0.00922555 26.32 <1e-99 0.224756 0.260919 +Girth 0.242837 0.00922556 26.32 <1e-99 0.224756 0.260919 ──────────────────────────────────────────────────────────────────────────── julia> round(optimal_bic.minimum, digits=5) 156.37638 -``` \ No newline at end of file +``` diff --git a/docs/src/index.md b/docs/src/index.md index 65fd104f..b111b1f2 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -123,6 +123,110 @@ x: 4 -0.032673 0.0797865 -0.41 0.6831 -0.191048 0.125702 ─────────────────────────────────────────────────────────────────────────── ``` +## Weighting + +Both `lm` and `glm` allow weighted estimation. The four different +[types of weights](https://juliastats.org/StatsBase.jl/stable/weights/) defined in +[StatsBase.jl](https://github.com/JuliaStats/StatsBase.jl) can be used to fit a model: + +- `AnalyticWeights` describe a non-random relative importance (usually between 0 and 1) for + each observation. These weights may also be referred to as reliability weights, precision + weights or inverse variance weights. These are typically used when the observations being + weighted are aggregate values (e.g., averages) with differing variances. +- `FrequencyWeights` describe the number of times (or frequency) each observation was seen. + These weights may also be referred to as case weights or repeat weights. +- `ProbabilityWeights` represent the inverse of the sampling probability for each observation, + providing a correction mechanism for under- or over-sampling certain population groups. + These weights may also be referred to as sampling weights. +- `UnitWeights` attribute a weight of 1 to each observation, which corresponds + to unweighted regression (the default). + +To indicate which kind of weights should be used, the vector of weights must be wrapped in +one of the three weights types, and then passed to the `weights` keyword argument. +Short-hand functions `aweights`, `fweights`, and `pweights` can be used to construct +`AnalyticWeights`, `FrequencyWeights`, and `ProbabilityWeights`, respectively. + +We illustrate the API with randomly generated data. + +```jldoctest weights +julia> using StableRNGs, DataFrames, GLM + +julia> data = DataFrame(y = rand(StableRNG(1), 100), x = randn(StableRNG(2), 100), weights = repeat([1, 2, 3, 4], 25)); + +julia> m = lm(@formula(y ~ x), data) +LinearModel + +y ~ 1 + x + +Coefficients: +────────────────────────────────────────────────────────────────────────── + Coef. Std. Error t Pr(>|t|) Lower 95% Upper 95% +────────────────────────────────────────────────────────────────────────── +(Intercept) 0.517369 0.0280232 18.46 <1e-32 0.461758 0.57298 +x -0.0500249 0.0307201 -1.63 0.1066 -0.110988 0.0109382 +────────────────────────────────────────────────────────────────────────── + +julia> m_aweights = lm(@formula(y ~ x), data, wts=aweights(data.weights)) +LinearModel + +y ~ 1 + x + +Coefficients: +────────────────────────────────────────────────────────────────────────── + Coef. Std. Error t Pr(>|t|) Lower 95% Upper 95% +────────────────────────────────────────────────────────────────────────── +(Intercept) 0.51673 0.0270707 19.09 <1e-34 0.463009 0.570451 +x -0.0478667 0.0308395 -1.55 0.1239 -0.109067 0.0133333 +────────────────────────────────────────────────────────────────────────── + +julia> m_fweights = lm(@formula(y ~ x), data, wts=fweights(data.weights)) +LinearModel + +y ~ 1 + x + +Coefficients: +───────────────────────────────────────────────────────────────────────────── + Coef. Std. Error t Pr(>|t|) Lower 95% Upper 95% +───────────────────────────────────────────────────────────────────────────── +(Intercept) 0.51673 0.0170172 30.37 <1e-84 0.483213 0.550246 +x -0.0478667 0.0193863 -2.47 0.0142 -0.0860494 -0.00968394 +───────────────────────────────────────────────────────────────────────────── + +julia> m_pweights = lm(@formula(y ~ x), data, wts=pweights(data.weights)) +LinearModel + +y ~ 1 + x + +Coefficients: +─────────────────────────────────────────────────────────────────────────── + Coef. Std. Error t Pr(>|t|) Lower 95% Upper 95% +─────────────────────────────────────────────────────────────────────────── +(Intercept) 0.51673 0.0287193 17.99 <1e-32 0.459737 0.573722 +x -0.0478667 0.0265532 -1.80 0.0745 -0.100561 0.00482739 +─────────────────────────────────────────────────────────────────────────── + +``` + +!!! warning + + In the old API, weights were passed as `AbstractVectors` and were silently treated in + the internal computation of standard errors and related quantities as `FrequencyWeights`. + Passing weights as `AbstractVector` is still allowed for backward compatibility, but it + is deprecated. When weights are passed following the old API, they are now coerced to + `FrequencyWeights` and a deprecation warning is issued. + +The type of the weights will affect the variance of the estimated coefficients and the +quantities involving this variance. The coefficient point estimates will be the same +regardless of the type of weights. + +```jldoctest weights +julia> loglikelihood(m_aweights) +-16.296307561384253 + +julia> loglikelihood(m_fweights) +-25.51860961756451 +``` + ## Comparing models with F-test Comparisons between two or more linear models can be performed using the `ftest` function, @@ -176,8 +280,8 @@ Many of the methods provided by this package have names similar to those in [R]( - `vcov`: variance-covariance matrix of the coefficient estimates -Note that the canonical link for negative binomial regression is `NegativeBinomialLink`, but -in practice one typically uses `LogLink`. +Note that the canonical link for negative binomial regression is `NegativeBinomialLink`, +but in practice one typically uses `LogLink`. ```jldoctest methods julia> using GLM, DataFrames, StatsBase @@ -209,7 +313,9 @@ julia> round.(predict(mdl, test_data); digits=8) 9.33333333 ``` -The [`cooksdistance`](@ref) method computes [Cook's distance](https://en.wikipedia.org/wiki/Cook%27s_distance) for each observation used to fit a linear model, giving an estimate of the influence of each data point. +The [`cooksdistance`](@ref) method computes +[Cook's distance](https://en.wikipedia.org/wiki/Cook%27s_distance) for each observation +used to fit a linear model, giving an estimate of the influence of each data point. Note that it's currently only implemented for linear models without weights. ```jldoctest methods diff --git a/src/GLM.jl b/src/GLM.jl index 59c327db..c22de932 100644 --- a/src/GLM.jl +++ b/src/GLM.jl @@ -12,17 +12,18 @@ module GLM import Statistics: cor using StatsAPI import StatsBase: coef, coeftable, coefnames, confint, deviance, nulldeviance, dof, dof_residual, - loglikelihood, nullloglikelihood, nobs, stderror, vcov, - residuals, predict, predict!, - fitted, fit, model_response, response, modelmatrix, r2, r², adjr2, adjr², PValue + loglikelihood, nullloglikelihood, nobs, stderror, vcov, residuals, predict, predict!, + fitted, fit, model_response, response, modelmatrix, r2, r², adjr2, adjr², + PValue, weights, leverage import StatsFuns: xlogy import SpecialFunctions: erfc, erfcinv, digamma, trigamma import StatsModels: hasintercept import Tables export coef, coeftable, confint, deviance, nulldeviance, dof, dof_residual, - loglikelihood, nullloglikelihood, nobs, stderror, vcov, residuals, predict, + loglikelihood, nullloglikelihood, nobs, stderror, vcov, residuals, predict, predict!, fitted, fit, fit!, model_response, response, modelmatrix, r2, r², adjr2, adjr², - cooksdistance, hasintercept, dispersion, vif, gvif, termnames + cooksdistance, hasintercept, dispersion, vif, gvif, termnames, weights, AnalyticWeights, + ProbabilityWeights, FrequencyWeights, UnitWeights, uweights, fweights, pweights, aweights, leverage export # types @@ -109,13 +110,15 @@ module GLM If `method=:cholesky` (the default), then the `Cholesky` decomposition method will be used. If `method=:qr`, then the `QR` decomposition method (which is more stable but slower) will be used. - - `wts::Vector=similar(y,0)`: Prior frequency (a.k.a. case) weights of observations. - Such weights are equivalent to repeating each observation a number of times equal - to its weight. Do note that this interpretation gives equal point estimates but - different standard errors from analytical (a.k.a. inverse variance) weights and - from probability (a.k.a. sampling) weights which are the default in some other - software. - Can be length 0 to indicate no weighting (default). + - `wts::AbstractWeights`: Weights of observations. + The weights can be of type `AnalyticWeights`, `FrequencyWeights`, + `ProbabilityWeights`, or `UnitWeights`. `AnalyticWeights` describe a non-random + relative importance (usually between 0 and 1) for each observation. These weights may + also be referred to as reliability weights, precision weights or inverse variance weights. + `FrequencyWeights` describe the number of times (or frequency) each observation was seen. + `ProbabilityWeights` represent the inverse of the sampling probability for each observation, + providing a correction mechanism for under- or over-sampling certain population groups. `UnitWeights` + (default) describe the case in which all weights are equal to 1 (so no weighting takes place). - `contrasts::AbstractDict{Symbol}=Dict{Symbol,Any}()`: a `Dict` mapping term names (as `Symbol`s) to term types (e.g. `ContinuousTerm`) or contrasts (e.g., `HelmertCoding()`, `SeqDiffCoding(; levels=["a", "b", "c"])`, diff --git a/src/ftest.jl b/src/ftest.jl index 97ce89da..d398a5e8 100644 --- a/src/ftest.jl +++ b/src/ftest.jl @@ -57,7 +57,10 @@ F-statistic: 241.62 on 12 observations and 1 degrees of freedom, p-value: <1e-07 """ function ftest(mod::LinearModel) hasintercept(mod) || throw(ArgumentError("ftest only works for models with an intercept")) - + wts = weights(mod) + if wts isa ProbabilityWeights + throw(ArgumentError("`ftest` for probability weighted models is not currently supported.")) + end rss = deviance(mod) tss = nulldeviance(mod) @@ -228,3 +231,7 @@ function show(io::IO, ftr::FTestResult{N}) where N end print(io, '─'^totwidth) end + +function ftest(r::LinearModel{T,<:ProbabilityWeights}) where {T} + throw(ArgumentError("`ftest` for probability weighted models is not currently supported.")) +end diff --git a/src/glmfit.jl b/src/glmfit.jl index 9bdf817f..358adc68 100644 --- a/src/glmfit.jl +++ b/src/glmfit.jl @@ -3,7 +3,7 @@ The response vector and various derived vectors in a generalized linear model. """ -struct GlmResp{V<:FPVector,D<:UnivariateDistribution,L<:Link} <: ModResp +struct GlmResp{V<:FPVector,D<:UnivariateDistribution,L<:Link,W<:AbstractWeights} <: ModResp "`y`: response vector" y::V d::D @@ -17,15 +17,15 @@ struct GlmResp{V<:FPVector,D<:UnivariateDistribution,L<:Link} <: ModResp mu::V "`offset:` offset added to `Xβ` to form `eta`. Can be of length 0" offset::V - "`wts:` prior case weights. Can be of length 0." - wts::V + "`wts`: prior case weights. Can be of length 0." + wts::W "`wrkwt`: working case weights for the Iteratively Reweighted Least Squares (IRLS) algorithm" wrkwt::V "`wrkresid`: working residuals for IRLS" wrkresid::V end -function GlmResp(y::V, d::D, l::L, η::V, μ::V, off::V, wts::V) where {V<:FPVector, D, L} +function GlmResp(y::V, d::D, l::L, η::V, μ::V, off::V, wts::W) where {V<:FPVector, D, L, W} n = length(y) nη = length(η) nμ = length(μ) @@ -35,42 +35,49 @@ function GlmResp(y::V, d::D, l::L, η::V, μ::V, off::V, wts::V) where {V<:FPVec # Check y values checky(y, d) + ## We don't support custom types of weights that a user may define + if !(wts isa Union{FrequencyWeights, AnalyticWeights, ProbabilityWeights, UnitWeights}) + throw(ArgumentError("The type of `wts` was $W. The supported weights types are " * + "`FrequencyWeights`, `AnalyticWeights`, `ProbabilityWeights` and `UnitWeights`.")) + end + # Lengths of y, η, and η all need to be n if !(nη == nμ == n) throw(DimensionMismatch("lengths of η, μ, and y ($nη, $nμ, $n) are not equal")) end # Lengths of wts and off can be either n or 0 - if lw != 0 && lw != n - throw(DimensionMismatch("wts must have length $n or length 0 but was $lw")) + if lw != n + throw(DimensionMismatch("wts must have length $n but was $lw")) end if lo != 0 && lo != n throw(DimensionMismatch("offset must have length $n or length 0 but was $lo")) end - return GlmResp{V,D,L}(y, d, l, similar(y), η, μ, off, wts, similar(y), similar(y)) + return GlmResp{V,D,L,W}(y, d, l, similar(y), η, μ, off, wts, similar(y), similar(y)) end -function GlmResp(y::FPVector, d::Distribution, l::Link, off::FPVector, wts::FPVector) +function GlmResp(y::FPVector, d::Distribution, l::Link, off::FPVector, wts::AbstractWeights) # Instead of convert(Vector{Float64}, y) to be more ForwardDiff friendly _y = convert(Vector{float(eltype(y))}, y) _off = convert(Vector{float(eltype(off))}, off) - _wts = convert(Vector{float(eltype(wts))}, wts) η = similar(_y) μ = similar(_y) - r = GlmResp(_y, d, l, η, μ, _off, _wts) - initialeta!(r.eta, d, l, _y, _wts, _off) + r = GlmResp(_y, d, l, η, μ, _off, wts) + initialeta!(r.eta, d, l, _y, wts, _off) updateμ!(r, r.eta) return r end -function GlmResp(y::AbstractVector{<:Real}, d::D, l::L, off::AbstractVector{<:Real}, - wts::AbstractVector{<:Real}) where {D, L} - GlmResp(float(y), d, l, float(off), float(wts)) -end +GlmResp(y::AbstractVector{<:Real}, d::D, l::L, off::AbstractVector{<:Real}, + wts::AbstractWeights) where {D, L} = + GlmResp(float(y), d, l, float(off), wts) deviance(r::GlmResp) = sum(r.devresid) +weights(r::GlmResp) = r.wts +isweighted(r::GlmResp) = weights(r) isa Union{AnalyticWeights, FrequencyWeights, ProbabilityWeights} + """ cancancel(r::GlmResp{V,D,L}) @@ -94,14 +101,14 @@ the linear predictor, `linPr`. """ function updateμ! end -function updateμ!(r::GlmResp{T}, linPr::T) where T<:FPVector +function updateμ!(r::GlmResp{T,D,L,<:AbstractWeights}, linPr::T) where {T<:FPVector,D,L} isempty(r.offset) ? copyto!(r.eta, linPr) : broadcast!(+, r.eta, linPr, r.offset) updateμ!(r) - if !isempty(r.wts) + if isweighted(r) map!(*, r.devresid, r.devresid, r.wts) map!(*, r.wrkwt, r.wrkwt, r.wts) end - r + return r end function updateμ!(r::GlmResp{V,D,L}) where {V<:FPVector,D,L} @@ -262,14 +269,14 @@ deviance(m::AbstractGLM) = deviance(m.rr) function nulldeviance(m::GeneralizedLinearModel) r = m.rr - wts = weights(r.wts) + wts = weights(r) y = r.y d = r.d offset = r.offset hasint = hasintercept(m) dev = zero(eltype(y)) if isempty(offset) # Faster method - if !isempty(wts) + if isweighted(m) mu = hasint ? mean(y, wts) : linkinv(r.link, zero(eltype(y))*zero(eltype(wts))/1) @@ -295,47 +302,53 @@ function nulldeviance(m::GeneralizedLinearModel) return dev end -function loglikelihood(m::AbstractGLM) - r = m.rr - wts = r.wts - y = r.y - mu = r.mu - d = r.d - ll = zero(eltype(mu)) - if !isempty(wts) - ϕ = deviance(m)/sum(wts) - @inbounds for i in eachindex(y, mu, wts) +loglikelihood(m::AbstractGLM) = loglikelihood(m.rr) + +function loglikelihood(r::GlmResp{T,D,L,<:AbstractWeights}) where {T,D,L} + y = r.y + mu = r.mu + wts = weights(r) + d = r.d + ll = zero(eltype(mu)) + n = nobs(r) + N = length(y) + δ = deviance(r) + ϕ = δ/n + if wts isa Union{FrequencyWeights, UnitWeights} + @inbounds for i in eachindex(y, mu) ll += loglik_obs(d, y[i], mu[i], wts[i], ϕ) end - else - ϕ = deviance(m)/length(y) - @inbounds for i in eachindex(y, mu) - ll += loglik_obs(d, y[i], mu[i], 1, ϕ) + elseif wts isa AnalyticWeights + @inbounds for i in eachindex(y, mu, wts) + ll += loglik_apweights_obs(d, y[i], mu[i], wts[i], δ, wts.sum, N) end + else + throw(ArgumentError("The `loglikelihood` for probability weighted models is not currently supported.")) end - ll + return ll end function nullloglikelihood(m::GeneralizedLinearModel) r = m.rr - wts = r.wts + wts = weights(m) + sumwt = sum(wts) y = r.y d = r.d offset = r.offset hasint = hasintercept(m) - ll = zero(eltype(y)) + ll = zero(eltype(y)) if isempty(r.offset) # Faster method - if !isempty(wts) - mu = hasint ? mean(y, weights(wts)) : linkinv(r.link, zero(ll)/1) - ϕ = nulldeviance(m)/sum(wts) + mu = hasint ? mean(y, wts) : linkinv(r.link, zero(ll)/1) + δ = nulldeviance(m) + ϕ = nulldeviance(m)/nobs(m) + N = length(y) + if wts isa Union{FrequencyWeights, UnitWeights} @inbounds for i in eachindex(y, wts) ll += loglik_obs(d, y[i], mu, wts[i], ϕ) end else - mu = hasint ? mean(y) : linkinv(r.link, zero(ll)/1) - ϕ = nulldeviance(m)/length(y) - @inbounds for i in eachindex(y) - ll += loglik_obs(d, y[i], mu, 1, ϕ) + @inbounds for i in eachindex(y, wts) + ll += loglik_apweights_obs(d, y[i], mu, wts[i], δ, sumwt, N) end end else @@ -368,7 +381,7 @@ function _fit!(m::AbstractGLM, verbose::Bool, maxiter::Integer, minstepfac::Real lp = r.mu # Initialize β, μ, and compute deviance - if start == nothing || isempty(start) + if start === nothing || isempty(start) # Compute beta update based on default response value # if no starting values have been passed delbeta!(p, wrkresp(r), r.wrkwt) @@ -469,7 +482,7 @@ end function StatsBase.fit!(m::AbstractGLM, y; - wts=nothing, + wts=uweights(length(y)), offset=nothing, verbose::Bool=false, maxiter::Integer=30, @@ -500,8 +513,7 @@ function StatsBase.fit!(m::AbstractGLM, r = m.rr V = typeof(r.y) - r.y = copy!(r.y, y) - isa(wts, Nothing) || copy!(r.wts, wts) + copy!(r.y, y) isa(offset, Nothing) || copy!(r.offset, offset) initialeta!(r.eta, r.d, r.l, r.y, r.wts, r.offset) updateμ!(r, r.eta) @@ -563,26 +575,37 @@ function fit(::Type{M}, dropcollinear::Bool = true, method::Symbol = :cholesky, dofit::Union{Bool, Nothing} = nothing, - wts::AbstractVector{<:Real} = similar(y, 0), + wts::AbstractVector{<:Real} = uweights(length(y)), offset::AbstractVector{<:Real} = similar(y, 0), fitargs...) where {M<:AbstractGLM} + if dofit === nothing dofit = true else Base.depwarn("`dofit` argument to `fit` is deprecated", :fit) end - # Check that X and y have the same number of observations if size(X, 1) != size(y, 1) throw(DimensionMismatch("number of rows in X and y must match")) end - rr = GlmResp(y, d, l, offset, wts) + # For backward compatibility accept wts as AbstractArray and coerce them to FrequencyWeights + _wts = if wts isa AbstractWeights + wts + elseif wts isa AbstractVector + Base.depwarn("Passing weights as vector is deprecated in favor of explicitly using " * + "`AnalyticWeights`, `ProbabilityWeights`, or `FrequencyWeights`. Proceeding " * + "by coercing `wts` to `FrequencyWeights`", :fit) + fweights(wts) + else + throw(ArgumentError("`wts` should be an AbstractVector coercible to AbstractWeights")) + end + rr = GlmResp(y, d, l, offset, _wts) if method === :cholesky - res = M(rr, cholpred(X, dropcollinear), nothing, false) + res = M(rr, cholpred(X, dropcollinear, _wts), nothing, false) elseif method === :qr - res = M(rr, qrpred(X, dropcollinear), nothing, false) + res = M(rr, qrpred(X, dropcollinear, _wts), nothing, false) else throw(ArgumentError("The only supported values for keyword argument `method` are `:cholesky` and `:qr`.")) end @@ -616,20 +639,30 @@ function fit(::Type{M}, end f, (y, X) = modelframe(f, data, contrasts, M) - + wts = wts === nothing ? uweights(length(y)) : wts + _wts = if wts isa AbstractWeights + wts + elseif wts isa AbstractVector + Base.depwarn("Passing weights as vector is deprecated in favor of explicitly using " * + "`AnalyticWeights`, `ProbabilityWeights`, or `FrequencyWeights`. Proceeding " * + "by coercing `wts` to `FrequencyWeights`", :fit) + fweights(wts) + else + throw(ArgumentError("`wts` should be an AbstractVector coercible to AbstractWeights")) + end # Check that X and y have the same number of observations if size(X, 1) != size(y, 1) throw(DimensionMismatch("number of rows in X and y must match")) end off = offset === nothing ? similar(y, 0) : offset - wts = wts === nothing ? similar(y, 0) : wts - rr = GlmResp(y, d, l, off, wts) + + rr = GlmResp(y, d, l, off, _wts) if method === :cholesky - res = M(rr, cholpred(X, dropcollinear), f, false) + res = M(rr, cholpred(X, dropcollinear, _wts), f, false) elseif method === :qr - res = M(rr, qrpred(X, dropcollinear), f, false) + res = M(rr, qrpred(X, dropcollinear, _wts), f, false) else throw(ArgumentError("The only supported values for keyword argument `method` are `:cholesky` and `:qr`.")) end @@ -794,27 +827,14 @@ function initialeta!(eta::AbstractVector, dist::UnivariateDistribution, link::Link, y::AbstractVector, - wts::AbstractVector, + wts::AbstractWeights, off::AbstractVector) n = length(y) - lw = length(wts) lo = length(off) - if lw == n - @inbounds @simd for i = eachindex(y, eta, wts) - μ = mustart(dist, y[i], wts[i]) - eta[i] = linkfun(link, μ) - end - elseif lw == 0 - @inbounds @simd for i = eachindex(y, eta) - μ = mustart(dist, y[i], 1) - eta[i] = linkfun(link, μ) - end - else - throw(ArgumentError("length of wts must be either $n or 0 but was $lw")) - end + _initialeta!(eta, dist, link, y, wts) if lo == n @inbounds @simd for i = eachindex(eta, off) @@ -827,6 +847,20 @@ function initialeta!(eta::AbstractVector, return eta end +function _initialeta!(eta, dist, link, y, wts::AbstractWeights) + if wts isa UnitWeights + @inbounds @simd for i in eachindex(y, eta) + μ = mustart(dist, y[i], 1) + eta[i] = linkfun(link, μ) + end + else + @inbounds @simd for i in eachindex(y, eta) + μ = mustart(dist, y[i], wts[i]) + eta[i] = linkfun(link, μ) + end + end +end + # Helper function to check that the values of y are in the allowed domain function checky(y, d::Distribution) if any(x -> !insupport(d, x), y) @@ -840,3 +874,48 @@ function checky(y, d::Binomial) end return nothing end + +nobs(r::GlmResp{V,D,L,W}) where {V,D,L,W<:AbstractWeights} = oftype(sum(one(eltype(r.wts))), length(r.y)) +nobs(r::GlmResp{V,D,L,W}) where {V,D,L,W<:FrequencyWeights} = sum(r.wts) + +function residuals(r::GlmResp; weighted::Bool=false) + y, η, μ = r.y, r.eta, r.mu + dres = similar(μ) + + @inbounds for i in eachindex(y, μ) + μi = μ[i] + yi = y[i] + dres[i] = sqrt(max(0, devresid(r.d, yi, μi))) * sign(yi-μi) + end + + if weighted + dres .*= sqrt.(r.wts) + end + + return dres +end + +function momentmatrix(m::GeneralizedLinearModel) + X = modelmatrix(m; weighted=false) + r, d = varstruct(m) + return Diagonal(r.*d)*X +end + +function varstruct(x::GeneralizedLinearModel) + wrkwt = working_weights(x) + wrkres = working_residuals(x) + r = wrkwt .* wrkres + if x.rr.d isa Union{Gamma, Geometric, InverseGaussian} + r, sum(wrkwt)/sum(abs2, r) + else + r, 1.0 + end +end + +function invloglikhessian(m::GeneralizedLinearModel) + r, d = varstruct(m) + return invfact(m.pp)/d +end + +invfact(f::DensePredChol) = invchol(f) +invfact(f::DensePredQR) = invqr(f) diff --git a/src/glmtools.jl b/src/glmtools.jl index 544cb1ee..2314d4b6 100644 --- a/src/glmtools.jl +++ b/src/glmtools.jl @@ -541,3 +541,33 @@ loglik_obs(::Poisson, y, μ, wt, ϕ) = wt*logpdf(Poisson(μ), y) # Γ(θ+y) / (y! * Γ(θ)) * p^θ(1-p)^y # Hence, p = θ/(μ+θ) loglik_obs(d::NegativeBinomial, y, μ, wt, ϕ) = wt*logpdf(NegativeBinomial(d.r, d.r/(μ+d.r)), y) + + +## Slight different interface for analytic and probability weights +## ϕ is the deviance - not the deviance/n nor sum(wt) +## sumwt is sum(wt) +## n is the number of observations +loglik_apweights_obs(::Bernoulli, y, μ, wt, ϕ, sumwt, n) = logpdf(Binomial(round(Int, wt), μ), round(wt*y)) +loglik_apweights_obs(::Binomial, y, μ, wt, ϕ, sumwt, n) = logpdf(Binomial(round(Int, wt), μ), round(wt*y)) +loglik_apweights_obs(::Gamma, y, μ, wt, ϕ, sumwt, n) = wt*logpdf(Gamma(inv(ϕ/sumwt), μ*ϕ/sumwt), y) +loglik_apweights_obs(::Geometric, y, μ, wt, ϕ, sumwt, n) = wt*logpdf(Geometric(1 / (μ + 1)), y) +loglik_apweights_obs(::InverseGaussian, y, μ, wt, ϕ, sumwt, n) = -(wt*(1 + log(2π*(ϕ/sumwt))) + 3*log(y)*wt)/2 +loglik_apweights_obs(::Normal, y, μ, wt, ϕ, sumwt, n) = ((-log(2π*ϕ/n) - 1) + log(wt))/2 +loglik_apweights_obs(::Poisson, y, μ, wt, ϕ, sumwt, n) = wt*logpdf(Poisson(μ), y) +loglik_apweights_obs(d::NegativeBinomial, y, μ, wt, ϕ, sumwt, n) = wt*logpdf(NegativeBinomial(d.r, d.r/(μ+d.r)), y) + +## Studentized Pearson residuals +# function pearson_residuals(m::GeneralizedLinearModel) +# r = m.rr +# wts = r.wts +# y, η, μ = r.y, r.eta, r.mu +# h = leverage(m) +# sqrt(dispersion(m)).*((y .- μ).*sqrt.(wts)) ./ sqrt.(dispersion(m).*glmvar.(r.d, μ)) +# end + +# function cooksdistance(m::GeneralizedLinearModel) +# h = leverage(m) +# hh = h./(1.0.-h).^2 +# Rp = pearson_residuals(m) +# (Rp.^2).*hh./(dispersion(m)^2*dof(m)) +# end diff --git a/src/linpred.jl b/src/linpred.jl index bb06fa17..d3299261 100644 --- a/src/linpred.jl +++ b/src/linpred.jl @@ -31,29 +31,37 @@ A `LinPred` type with a dense QR decomposition of `X` - `qr`: either a `QRCompactWY` or `QRPivoted` object created from `X`, with optional row weights. - `scratchm1`: scratch Matrix{T} of the same size as `X` """ -mutable struct DensePredQR{T<:BlasReal,Q<:Union{QRCompactWY, QRPivoted}} <: DensePred +mutable struct DensePredQR{T<:BlasReal, Q<:Union{QRCompactWY, QRPivoted}, W<:AbstractWeights} <: DensePred X::Matrix{T} # model matrix beta0::Vector{T} # base coefficient vector delbeta::Vector{T} # coefficient increment scratchbeta::Vector{T} qr::Q + wts::W scratchm1::Matrix{T} - function DensePredQR(X::AbstractMatrix, pivot::Bool=false) + function DensePredQR(X::AbstractMatrix, pivot::Bool, wts::W) where W<:Union{AbstractWeights} n, p = size(X) T = typeof(float(zero(eltype(X)))) Q = pivot ? QRPivoted : QRCompactWY fX = float(X) - cfX = fX === X ? copy(fX) : fX + if wts isa UnitWeights + cfX = fX === X ? copy(fX) : fX + else + cfX = Diagonal(sqrt.(wts))*fX + end F = pivot ? pivoted_qr!(cfX) : qr!(cfX) - new{T,Q}(Matrix{T}(X), + new{T,Q,W}(Matrix{T}(X), zeros(T, p), zeros(T, p), zeros(T, p), F, + wts, similar(X, T)) end end + +DensePredQR(X::AbstractMatrix) = DensePredQR(X, false, uweights(size(X,1))) """ delbeta!(p::LinPred, r::Vector) @@ -61,12 +69,16 @@ Evaluate and return `p.delbeta` the increment to the coefficient vector from res """ function delbeta! end -function delbeta!(p::DensePredQR{T,<:QRCompactWY}, r::Vector{T}) where T<:BlasReal - p.delbeta = p.qr \ r +function delbeta!(p::DensePredQR{T, <:QRCompactWY,<:AbstractWeights}, r::Vector{T}) where T<:BlasReal + r̃ = p.wts isa UnitWeights ? r : (wtsqrt = sqrt.(p.wts); wtsqrt .*= r; wtsqrt) + #rnk = rank(p.qr.R) + #rnk == length(p.delbeta) || throw(RankDeficientException(rnk)) + p.delbeta = p.qr\r̃ + #mul!(p.scratchm1, Diagonal(ones(size(r))), p.X) return p end -function delbeta!(p::DensePredQR{T,<:QRCompactWY}, r::Vector{T}, wt::Vector{T}) where T<:BlasReal +function delbeta!(p::DensePredQR{T, <:QRCompactWY,<:AbstractWeights}, r::Vector{T}, wt::AbstractVector) where T<:BlasReal X = p.X wtsqrt = sqrt.(wt) sqrtW = Diagonal(wtsqrt) @@ -77,23 +89,23 @@ function delbeta!(p::DensePredQR{T,<:QRCompactWY}, r::Vector{T}, wt::Vector{T}) return p end -function delbeta!(p::DensePredQR{T,<:QRPivoted}, r::Vector{T}) where T<:BlasReal +function delbeta!(p::DensePredQR{T,<:QRPivoted,<:AbstractWeights}, r::Vector{T}) where T<:BlasReal + r̃ = p.wts isa UnitWeights ? r : (wtsqrt = sqrt.(p.wts); wtsqrt .*= r; wtsqrt) rnk = rank(p.qr.R) if rnk == length(p.delbeta) - p.delbeta = p.qr \ r + p.delbeta = p.qr \ r̃ else R = UpperTriangular(view(parent(p.qr.R), 1:rnk, 1:rnk)) piv = p.qr.p fill!(p.delbeta, 0) - p.delbeta[1:rnk] = R \ view(p.qr.Q'r, 1:rnk) + p.delbeta[1:rnk] = R \ view(p.qr.Q'*r̃, 1:rnk) invpermute!(p.delbeta, piv) end return p end -function delbeta!(p::DensePredQR{T,<:QRPivoted}, r::Vector{T}, wt::Vector{T}) where T<:BlasReal +function delbeta!(p::DensePredQR{T,<:QRPivoted,<:AbstractWeights}, r::Vector{T}, wt::AbstractVector{T}) where T<:BlasReal X = p.X - W = Diagonal(wt) wtsqrt = sqrt.(wt) sqrtW = Diagonal(wtsqrt) mul!(p.scratchm1, sqrtW, X) @@ -125,32 +137,46 @@ A `LinPred` type with a dense Cholesky factorization of `X'X` - `scratchbeta`: scratch vector of length `p`, used in `linpred!` method - `chol`: a `Cholesky` object created from `X'X`, possibly using row weights. - `scratchm1`: scratch Matrix{T} of the same size as `X` -- `scratchm2`: scratch Matrix{T} os the same size as `X'X` +- `scratchm2`: scratch Matrix{T} of the same size as `X'X` """ -mutable struct DensePredChol{T<:BlasReal,C} <: DensePred +mutable struct DensePredChol{T<:BlasReal,C,W<:AbstractWeights} <: DensePred X::Matrix{T} # model matrix beta0::Vector{T} # base vector for coefficients delbeta::Vector{T} # coefficient increment scratchbeta::Vector{T} chol::C + wts::W scratchm1::Matrix{T} scratchm2::Matrix{T} end -function DensePredChol(X::AbstractMatrix, pivot::Bool) - F = Hermitian(float(X'X)) - T = eltype(F) + +function DensePredChol(X::AbstractMatrix, pivot::Bool, wts::AbstractWeights) + if wts isa UnitWeights + F = Hermitian(float(X'X)) + T = eltype(F) + scr = similar(X, T) + else + T = float(promote_type(eltype(wts), eltype(X))) + scr = similar(X, T) + mul!(scr, Diagonal(wts), X) + F = Hermitian(float(scr'X)) + end F = pivot ? pivoted_cholesky!(F, tol = -one(T), check = false) : cholesky!(F) DensePredChol(Matrix{T}(X), zeros(T, size(X, 2)), zeros(T, size(X, 2)), zeros(T, size(X, 2)), F, - similar(X, T), + wts, + scr, similar(cholfactors(F))) end -cholpred(X::AbstractMatrix, pivot::Bool=false) = DensePredChol(X, pivot) -qrpred(X::AbstractMatrix, pivot::Bool=false) = DensePredQR(X, pivot) +DensePredChol(X::AbstractMatrix, pivot::Bool) = + DensePredChol(X, pivot, uweights(size(X,1))) + +cholpred(X::AbstractMatrix, pivot::Bool, wts::AbstractWeights=uweights(size(X,1))) = DensePredChol(X, pivot, wts) +qrpred(X::AbstractMatrix, pivot::Bool=false, wts::AbstractWeights=uweights(size(X,1))) = DensePredQR(X, pivot, wts) cholfactors(c::Union{Cholesky,CholeskyPivoted}) = c.factors cholesky!(p::DensePredChol{T}) where {T<:FP} = p.chol @@ -161,14 +187,16 @@ function cholesky(p::DensePredChol{T}) where T<:FP Cholesky(copy(cholfactors(c)), c.uplo, c.info) end -function delbeta!(p::DensePredChol{T,<:Cholesky}, r::Vector{T}) where T<:BlasReal - ldiv!(p.chol, mul!(p.delbeta, transpose(p.X), r)) +function delbeta!(p::DensePredChol{T,<:Cholesky,<:AbstractWeights}, r::Vector{T}) where T<:BlasReal + X = p.wts isa UnitWeights ? p.scratchm1 .= p.X : mul!(p.scratchm1, Diagonal(p.wts), p.X) + ldiv!(p.chol, mul!(p.delbeta, transpose(X), r)) p end -function delbeta!(p::DensePredChol{T,<:CholeskyPivoted}, r::Vector{T}) where T<:BlasReal +function delbeta!(p::DensePredChol{T,<:CholeskyPivoted,<:AbstractWeights}, r::Vector{T}) where T<:BlasReal ch = p.chol - delbeta = mul!(p.delbeta, adjoint(p.X), r) + X = p.wts isa UnitWeights ? p.scratchm1 .= p.X : mul!(p.scratchm1, Diagonal(p.wts), p.X) + delbeta = mul!(p.delbeta, adjoint(X), r) rnk = rank(ch) if rnk == length(delbeta) ldiv!(ch, delbeta) @@ -183,7 +211,7 @@ function delbeta!(p::DensePredChol{T,<:CholeskyPivoted}, r::Vector{T}) where T<: p end -function delbeta!(p::DensePredChol{T,<:Cholesky}, r::Vector{T}, wt::Vector{T}) where T<:BlasReal +function delbeta!(p::DensePredChol{T,<:Cholesky,<:AbstractWeights}, r::Vector{T}, wt::Vector{T}) where T<:BlasReal scr = mul!(p.scratchm1, Diagonal(wt), p.X) cholesky!(Hermitian(mul!(cholfactors(p.chol), transpose(scr), p.X), :U)) mul!(p.delbeta, transpose(scr), r) @@ -191,7 +219,7 @@ function delbeta!(p::DensePredChol{T,<:Cholesky}, r::Vector{T}, wt::Vector{T}) w p end -function delbeta!(p::DensePredChol{T,<:CholeskyPivoted}, r::Vector{T}, wt::Vector{T}) where T<:BlasReal +function delbeta!(p::DensePredChol{T,<:CholeskyPivoted,<:AbstractWeights}, r::Vector{T}, wt::Vector{T}) where T<:BlasReal piv = p.chol.p # inverse vector delbeta = p.delbeta # p.scratchm1 = WX @@ -223,37 +251,55 @@ function delbeta!(p::DensePredChol{T,<:CholeskyPivoted}, r::Vector{T}, wt::Vecto p end -mutable struct SparsePredChol{T,M<:SparseMatrixCSC,C} <: GLM.LinPred +mutable struct SparsePredChol{T,M<:SparseMatrixCSC,C,W<:AbstractWeights} <: GLM.LinPred X::M # model matrix Xt::M # X' beta0::Vector{T} # base vector for coefficients delbeta::Vector{T} # coefficient increment scratchbeta::Vector{T} chol::C - scratch::M + wts::W + scratchm1::M end -function SparsePredChol(X::SparseMatrixCSC{T}) where T + +function SparsePredChol(X::SparseMatrixCSC{T}, wts::AbstractVector) where T chol = cholesky(sparse(I, size(X, 2), size(X,2))) - return SparsePredChol{eltype(X),typeof(X),typeof(chol)}(X, + return SparsePredChol{eltype(X),typeof(X),typeof(chol), typeof(wts)}(X, X', zeros(T, size(X, 2)), zeros(T, size(X, 2)), zeros(T, size(X, 2)), chol, + wts, similar(X)) end -cholpred(X::SparseMatrixCSC, pivot::Bool=false) = SparsePredChol(X) +cholpred(X::SparseMatrixCSC, pivot::Bool, wts::AbstractWeights=uweights(size(X,1))) = + SparsePredChol(X, wts) + +function delbeta!(p::SparsePredChol{T,M,C,<:UnitWeights}, r::Vector{T}, wt::Vector{T}) where {T,M,C} + scr = mul!(p.scratchm1, Diagonal(wt), p.X) + XtWX = p.Xt*scr + c = p.chol = cholesky(Symmetric{eltype(XtWX),typeof(XtWX)}(XtWX, 'L')) + p.delbeta = c \ mul!(p.delbeta, adjoint(scr), r) +end -function delbeta!(p::SparsePredChol{T}, r::Vector{T}, wt::Vector{T}) where T - scr = mul!(p.scratch, Diagonal(wt), p.X) +function delbeta!(p::SparsePredChol{T,M,C,<:AbstractWeights}, r::Vector{T}, wt::Vector{T}) where {T,M,C} + scr = mul!(p.scratchm1, Diagonal(wt.*p.wts), p.X) XtWX = p.Xt*scr c = p.chol = cholesky(Symmetric{eltype(XtWX),typeof(XtWX)}(XtWX, 'L')) p.delbeta = c \ mul!(p.delbeta, adjoint(scr), r) end -function delbeta!(p::SparsePredChol{T}, r::Vector{T}) where T - scr = p.scratch = p.X +function delbeta!(p::SparsePredChol{T,M,C,<:UnitWeights}, r::Vector{T}) where {T,M,C} + scr = p.X + XtWX = p.Xt*scr + c = p.chol = cholesky(Symmetric{eltype(XtWX),typeof(XtWX)}(XtWX, 'L')) + p.delbeta = c \ mul!(p.delbeta, adjoint(scr), r) +end + +function delbeta!(p::SparsePredChol{T,M,C,<:AbstractWeights}, r::Vector{T}) where {T,M,C} + scr = p.scratchm1 .= p.X.*p.wts XtWX = p.Xt*scr c = p.chol = cholesky(Symmetric{eltype(XtWX),typeof(XtWX)}(XtWX, 'L')) p.delbeta = c \ mul!(p.delbeta, adjoint(scr), r) @@ -262,12 +308,12 @@ end LinearAlgebra.cholesky(p::SparsePredChol{T}) where {T} = copy(p.chol) LinearAlgebra.cholesky!(p::SparsePredChol{T}) where {T} = p.chol -function invqr(p::DensePredQR{T,<: QRCompactWY}) where T +function invqr(p::DensePredQR{T,<: QRCompactWY, <:AbstractWeights}) where T Rinv = inv(p.qr.R) Rinv*Rinv' end -function invqr(p::DensePredQR{T,<: QRPivoted}) where T +function invqr(p::DensePredQR{T,<: QRPivoted, <:AbstractWeights}) where T rnk = rank(p.qr.R) k = length(p.delbeta) if rnk == k @@ -308,7 +354,47 @@ inverse(x::DensePred) = invchol(x) inverse(x::DensePredQR) = invqr(x) inverse(x::SparsePredChol) = invchol(x) -vcov(x::LinPredModel) = rmul!(inverse(x.pp), dispersion(x, true)) +working_residuals(x::LinPredModel) = x.rr.wrkresid +working_weights(x::LinPredModel) = x.rr.wrkwt + +function vcov(x::LinPredModel) + if weights(x) isa ProbabilityWeights + ## n-1 degrees of freedom - This is coherent with the `R` package `survey`, + ## `STATA` uses n-k + s = nobs(x)/(nobs(x) - 1) + mm = momentmatrix(x) + A = invloglikhessian(x) + _vcov(x.pp, mm, A).*s + else + rmul!(inverse(x.pp), dispersion(x, true)) + end +end + +function _vcov(pp::DensePred, Z::Matrix, A::Matrix) + if linpred_rank(pp) < size(Z, 2) + nancols = [all(isnan, col) for col in eachcol(A)] + nnancols = .!nancols + idx, nidx = findall(nancols), findall(nnancols) + Zv = view(Z, :, nidx) + B = Zv'Zv + Av = view(A, nidx, nidx) + V = similar(pp.scratchm1, (size(A)...)) + V[nidx, nidx] = Av * B * Av + V[idx, :] .= NaN + V[:, idx] .= NaN + else + B = Z'Z + V = A * B * A + end + return V +end + +function _vcov(pp::SparsePredChol, Z::Matrix, A::Matrix) + ## SparsePredChol does not handle rankdeficient cases + B = Z'*Z + V = A*B*A + return V +end function cor(x::LinPredModel) Σ = vcov(x) @@ -336,18 +422,64 @@ function modelframe(f::FormulaTerm, data, contrasts::AbstractDict, ::Type{M}) wh f, modelcols(f, data) end -modelmatrix(obj::LinPredModel) = obj.pp.X +modelframe(obj::LinPredModel) = obj.fr + +modelmatrix(obj::LinPredModel; weighted::Bool=isweighted(obj)) = modelmatrix(obj.pp; weighted=weighted) + +function modelmatrix(pp::LinPred; weighted::Bool=isweighted(pp)) + Z = if weighted + Diagonal(sqrt.(pp.wts))*pp.X + else + pp.X + end + return Z +end + +function leverage(x::LinPredModel) + h = vec(leverage(x.pp)) + hasfield(typeof(x.rr), :wrkwt) ? x.rr.wrkwt.*h : x.rr.wts.*h +end + +function leverage(pp::DensePredChol{T,<:CholeskyPivoted}) where T + X = modelmatrix(pp; weighted=false) + rnk = rank(pp.chol) + A = GLM.inverse(pp) + p = pp.chol.p[1:rnk] + diag(X[:,p]*A[p,p]*X[:,p]') + # sum(x->x^2, view(X, :, p)/view(pp.chol.U, p, p), dims=2) +end + +function leverage(pp::DensePredChol{T,<:Cholesky}) where T + X = modelmatrix(pp; weighted=false) + @show X/pp.chol.U + sum(x -> x^2, X/pp.chol.U, dims=2) +end + +function leverage(pp::DensePredQR{T,<:QRPivoted}) where T + X = modelmatrix(pp; weighted=false) + ch = pp.qr + rnk = rank(ch.R) + p = invperm(ch.p)[1:rnk] + sum(x -> x^2, view(X, :, 1:rnk)/view(ch.R, p, p), dims=2) +end + +function leverage(pp::DensePredQR{T,<:QRCompactWY}) where T + X = modelmatrix(pp; weighted=false) + sum(x -> x^2, X/pp.qr.R, dims=2) +end + response(obj::LinPredModel) = obj.rr.y fitted(m::LinPredModel) = m.rr.mu predict(mm::LinPredModel) = fitted(mm) -residuals(obj::LinPredModel) = residuals(obj.rr) function StatsModels.formula(obj::LinPredModel) obj.formula === nothing && throw(ArgumentError("model was fitted without a formula")) return obj.formula end +residuals(obj::LinPredModel; weighted::Bool=false) = residuals(obj.rr; weighted=weighted) + """ nobs(obj::LinearModel) nobs(obj::GLM) @@ -355,18 +487,19 @@ end For linear and generalized linear models, returns the number of rows, or, when prior weights are specified, the sum of weights. """ -function nobs(obj::LinPredModel) - if isempty(obj.rr.wts) - oftype(sum(one(eltype(obj.rr.wts))), length(obj.rr.y)) - else - sum(obj.rr.wts) - end -end +nobs(obj::LinPredModel) = nobs(obj.rr) + +weights(obj::RegressionModel) = weights(obj.model) +weights(m::LinPredModel) = weights(m.rr) +weights(pp::LinPred) = pp.wts + +isweighted(m::LinPredModel) = isweighted(m.pp) +isweighted(pp::LinPred) = weights(pp) isa Union{FrequencyWeights, AnalyticWeights, ProbabilityWeights} coef(x::LinPred) = x.beta0 coef(obj::LinPredModel) = coef(obj.pp) coefnames(x::LinPredModel) = - x.formula === nothing ? ["x$i" for i in 1:length(coef(x))] : coefnames(formula(x).rhs) + x.formula === nothing ? ["x$i" for i in 1:length(coef(x))] : StatsModels.vectorize(coefnames(formula(x).rhs)) dof_residual(obj::LinPredModel) = nobs(obj) - linpred_rank(obj) diff --git a/src/lm.jl b/src/lm.jl index f26fb8e5..0dd86cdf 100644 --- a/src/lm.jl +++ b/src/lm.jl @@ -7,39 +7,36 @@ Encapsulates the response for a linear model - `mu`: current value of the mean response vector or fitted value - `offset`: optional offset added to the linear predictor to form `mu` -- `wts`: optional vector of prior frequency (a.k.a. case) weights for observations +- `wts`: optional weights for observations (as `AbstractWeights`) - `y`: observed response vector Either or both `offset` and `wts` may be of length 0 """ -mutable struct LmResp{V<:FPVector} <: ModResp # response in a linear model +mutable struct LmResp{V<:FPVector, W<:AbstractWeights} <: ModResp # response in a linear model mu::V # mean response offset::V # offset added to linear predictor (may have length 0) - wts::V # prior weights (may have length 0) + wts::W # prior weights (may have length 0) y::V # response - function LmResp{V}(mu::V, off::V, wts::V, y::V) where V + function LmResp{V, W}(mu::V, off::V, wts::W, y::V) where {V, W} n = length(y) length(mu) == n || error("mismatched lengths of mu and y") ll = length(off) ll == 0 || ll == n || error("length of offset is $ll, must be $n or 0") ll = length(wts) - ll == 0 || ll == n || error("length of wts is $ll, must be $n or 0") - new{V}(mu, off, wts, y) + ll == n || ll == 0 || error("length of wts is $ll, must be $n or 0") + new{V,W}(mu, off, wts, y) end end -function LmResp(y::AbstractVector{<:Real}, wts::Union{Nothing,AbstractVector{<:Real}}=nothing) +function LmResp(y::AbstractVector{<:Real}, wts::AbstractWeights) # Instead of convert(Vector{Float64}, y) to be more ForwardDiff friendly _y = convert(Vector{float(eltype(y))}, y) - _wts = if wts === nothing - similar(_y, 0) - else - convert(Vector{float(eltype(wts))}, wts) - end - return LmResp{typeof(_y)}(zero(_y), zero(_y), _wts, _y) + return LmResp{typeof(_y), typeof(wts)}(zero(_y), zero(_y), wts, _y) end -function updateμ!(r::LmResp{V}, linPr::V) where V<:FPVector +LmResp(y::AbstractVector{<:Real}) = LmResp(y, uweights(length(y))) + +function updateμ!(r::LmResp{V}, linPr::V) where {V<:FPVector} n = length(linPr) length(r.y) == n || error("length(linPr) is $n, should be $(length(r.y))") length(r.offset) == 0 ? copyto!(r.mu, linPr) : broadcast!(+, r.mu, linPr, r.offset) @@ -52,25 +49,49 @@ function deviance(r::LmResp) y = r.y mu = r.mu wts = r.wts - v = zero(eltype(y)) + zero(eltype(y)) * zero(eltype(wts)) - if isempty(wts) - @inbounds @simd for i = eachindex(y,mu) + if wts isa UnitWeights + v = zero(eltype(y)) + zero(eltype(y)) + @inbounds @simd for i in eachindex(y,mu,wts) v += abs2(y[i] - mu[i]) end else - @inbounds @simd for i = eachindex(y,mu,wts) + v = zero(eltype(y)) + zero(eltype(y)) * zero(eltype(wts)) + @inbounds @simd for i in eachindex(y,mu,wts) v += abs2(y[i] - mu[i])*wts[i] end end - v + return v end -function loglikelihood(r::LmResp) - n = isempty(r.wts) ? length(r.y) : sum(r.wts) +weights(r::LmResp) = r.wts +isweighted(r::LmResp) = weights(r) isa Union{AnalyticWeights, FrequencyWeights, ProbabilityWeights} + +nobs(r::LmResp{<:Any,W}) where {W<:FrequencyWeights} = sum(r.wts) +nobs(r::LmResp{<:Any,W}) where {W<:AbstractWeights} = oftype(sum(one(eltype(r.wts))), length(r.y)) + +function loglikelihood(r::LmResp{T,<:Union{UnitWeights, FrequencyWeights}}) where T + n = nobs(r) -n/2 * (log(2π * deviance(r)/n) + 1) end -residuals(r::LmResp) = r.y - r.mu +function loglikelihood(r::LmResp{T,<:AnalyticWeights}) where T + N = length(r.y) + n = sum(log, weights(r)) + (n - N * (log(2π * deviance(r)/N) + 1))/2 +end + +function loglikelihood(r::LmResp{T,<:ProbabilityWeights}) where T + throw(ArgumentError("The `loglikelihood` for probability weighted models is not currently supported.")) +end + +function residuals(r::LmResp; weighted::Bool=false) + wts = weights(r) + if weighted && isweighted(r) + sqrt.(wts) .* (r.y .- r.mu) + else + r.y .- r.mu + end +end """ LinearModel @@ -93,11 +114,7 @@ end LinearAlgebra.cholesky(x::LinearModel) = cholesky(x.pp) function StatsBase.fit!(obj::LinearModel) - if isempty(obj.rr.wts) - delbeta!(obj.pp, obj.rr.y) - else - delbeta!(obj.pp, obj.rr.y, obj.rr.wts) - end + delbeta!(obj.pp, obj.rr.y) obj.pp.beta0 .= obj.pp.delbeta updateμ!(obj.rr, linpred(obj.pp, zero(eltype(obj.rr.y)))) return obj @@ -126,9 +143,23 @@ Fit a linear model to data. $FIT_LM_DOC """ + +function convert_weights(wts) + if wts isa Union{FrequencyWeights, AnalyticWeights, ProbabilityWeights, UnitWeights} + wts + elseif wts isa AbstractVector + Base.depwarn("Passing weights as vector is deprecated in favor of explicitly using " * + "`AnalyticWeights`, `ProbabilityWeights`, or `FrequencyWeights`. Proceeding " * + "by coercing `wts` to `FrequencyWeights`", :fit) + fweights(wts) + else + throw(ArgumentError("`wts` should be an `AbstractVector` coercible to `AbstractWeights`")) + end +end + function fit(::Type{LinearModel}, X::AbstractMatrix{<:Real}, y::AbstractVector{<:Real}, allowrankdeficient_dep::Union{Bool,Nothing}=nothing; - wts::AbstractVector{<:Real}=similar(y, 0), + wts::Union{AbstractWeights{<:Real}, AbstractVector{<:Real}}=uweights(length(y)), dropcollinear::Bool=true, method::Symbol=:cholesky) if allowrankdeficient_dep !== nothing @@ -136,11 +167,13 @@ function fit(::Type{LinearModel}, X::AbstractMatrix{<:Real}, y::AbstractVector{< "argument `dropcollinear` instead. Proceeding with positional argument value: $allowrankdeficient_dep" dropcollinear = allowrankdeficient_dep end + # For backward compatibility accept wts as AbstractArray and coerce them to FrequencyWeights + _wts = convert_weights(wts) if method === :cholesky - fit!(LinearModel(LmResp(y, wts), cholpred(X, dropcollinear), nothing)) + fit!(LinearModel(LmResp(y, _wts), cholpred(X, dropcollinear, _wts), nothing)) elseif method === :qr - fit!(LinearModel(LmResp(y, wts), qrpred(X, dropcollinear), nothing)) + fit!(LinearModel(LmResp(y, _wts), qrpred(X, dropcollinear, _wts), nothing)) else throw(ArgumentError("The only supported values for keyword argument `method` are `:cholesky` and `:qr`.")) end @@ -148,17 +181,18 @@ end function fit(::Type{LinearModel}, f::FormulaTerm, data, allowrankdeficient_dep::Union{Bool,Nothing}=nothing; - wts::Union{AbstractVector{<:Real}, Nothing}=nothing, + wts::Union{AbstractWeights{<:Real}, AbstractVector{<:Real}}=uweights(0), dropcollinear::Bool=true, method::Symbol=:cholesky, contrasts::AbstractDict{Symbol}=Dict{Symbol,Any}()) - f, (y, X) = modelframe(f, data, contrasts, LinearModel) - wts === nothing && (wts = similar(y, 0)) + f, (y, X) = modelframe(f, data, contrasts, LinearModel) + _wts = convert_weights(wts) + _wts = isempty(_wts) ? uweights(length(y)) : _wts if method === :cholesky - fit!(LinearModel(LmResp(y, wts), cholpred(X, dropcollinear), f)) + fit!(LinearModel(LmResp(y, _wts), cholpred(X, dropcollinear, _wts), f)) elseif method === :qr - fit!(LinearModel(LmResp(y, wts), qrpred(X, dropcollinear), f)) + fit!(LinearModel(LmResp(y, _wts), qrpred(X, dropcollinear, _wts), f)) else throw(ArgumentError("The only supported values for keyword argument `method` are `:cholesky` and `:qr`.")) end @@ -195,14 +229,9 @@ For linear models, the deviance of the null model is equal to the total sum of s """ function nulldeviance(obj::LinearModel) y = obj.rr.y - wts = obj.rr.wts - + wts = obj.pp.wts if hasintercept(obj) - if isempty(wts) - m = mean(y) - else - m = mean(y, weights(wts)) - end + m = mean(y, wts) else @warn("Starting from GLM.jl 1.8, null model is defined as having no predictor at all " * "when a model without an intercept is passed.") @@ -210,29 +239,39 @@ function nulldeviance(obj::LinearModel) end v = zero(eltype(y))*zero(eltype(wts)) - if isempty(wts) - @inbounds @simd for yi in y - v += abs2(yi - m) + if wts isa UnitWeights + @inbounds @simd for i = eachindex(y,wts) + v += abs2(y[i] - m) end else @inbounds @simd for i = eachindex(y,wts) v += abs2(y[i] - m)*wts[i] end end - v + return v +end + +function nullloglikelihood(m::LinearModel) + wts = weights(m) + if wts isa Union{UnitWeights, FrequencyWeights} + n = nobs(m) + -n/2 * (log(2π * nulldeviance(m)/n) + 1) + else + N = length(m.rr.y) + n = sum(log, wts) + 0.5*(n - N * (log(2π * nulldeviance(m)/N) + 1)) + end end loglikelihood(obj::LinearModel) = loglikelihood(obj.rr) -function nullloglikelihood(obj::LinearModel) - r = obj.rr - n = isempty(r.wts) ? length(r.y) : sum(r.wts) - -n/2 * (log(2π * nulldeviance(obj)/n) + 1) -end r2(obj::LinearModel) = 1 - deviance(obj)/nulldeviance(obj) adjr2(obj::LinearModel) = 1 - (1 - r²(obj))*(nobs(obj)-hasintercept(obj))/dof_residual(obj) +working_residuals(x::LinearModel) = residuals(x) +working_weights(x::LinearModel) = x.pp.wts + function dispersion(x::LinearModel, sqr::Bool=false) dofr = dof_residual(x) ssqr = deviance(x.rr)/dofr @@ -316,8 +355,7 @@ function StatsModels.predict!(res::Union{AbstractVector, prediction, lower, upper = res length(prediction) == length(lower) == length(upper) == size(newx, 1) || throw(DimensionMismatch("length of vectors in `res` must equal the number of rows in `newx`")) - length(mm.rr.wts) == 0 || error("prediction with confidence intervals not yet implemented for weighted regression") - + mm.rr.wts isa UnitWeights || error("prediction with confidence intervals not yet implemented for weighted regression") dev = deviance(mm) dofr = dof_residual(mm) ret = diag(newx*vcov(mm)*newx') @@ -339,28 +377,44 @@ function confint(obj::LinearModel; level::Real=0.95) quantile(TDist(dof_residual(obj)), (1. - level)/2.) * [1. -1.] end +function momentmatrix(m::LinearModel) + X = modelmatrix(m; weighted=false) + r = residuals(m; weighted=false) + mm = X .* r + if isweighted(m) + mm .* weights(m) + else + mm + end +end + +invloglikhessian(m::LinearModel) = inverse(m.pp) + +function varstruct(x::LinearModel) + wrkwt = working_weights(x) + wrkres = working_residuals(x) + r = wrkwt .* wrkres + return r, one(eltype(r)) +end + """ cooksdistance(obj::LinearModel) Compute [Cook's distance](https://en.wikipedia.org/wiki/Cook%27s_distance) for each observation in linear model `obj`, giving an estimate of the influence of each data point. -Currently only implemented for linear models without weights. """ function StatsBase.cooksdistance(obj::LinearModel) - u = residuals(obj) - mse = dispersion(obj,true) + u = residuals(obj; weighted=isweighted(obj)) + mse = GLM.dispersion(obj,true) k = dof(obj)-1 - d_res = dof_residual(obj) - X = modelmatrix(obj) - XtX = crossmodelmatrix(obj) - k == size(X,2) || throw(ArgumentError("Models with collinear terms are not currently supported.")) - wts = obj.rr.wts - if isempty(wts) - hii = diag(X * inv(XtX) * X') - else - throw(ArgumentError("Weighted models are not currently supported.")) - end + hii = leverage(obj) D = @. u^2 * (hii / (1 - hii)^2) / (k*mse) return D end + +## To remove when https://github.com/JuliaStats/StatsAPI.jl/pull/16 is merged +function crossmodelmatrix(model::RegressionModel; weighted::Bool=false) + x = weighted ? modelmatrix(model; weighted=weighted) : modelmatrix(model) + return Symmetric(x' * x) +end diff --git a/test/analytic_weights.jl b/test/analytic_weights.jl new file mode 100644 index 00000000..ced37571 --- /dev/null +++ b/test/analytic_weights.jl @@ -0,0 +1,777 @@ +rng = StableRNG(123) + +x1 = rand(rng, 25) +x2 = ifelse.(randn(rng, 25) .> 0, 1, 0) +y = ifelse.(0.004 .- 0.01 .* x1 .+ 1.5 .* x2 .+ randn(rng, 25) .> 0, 1, 0) +w = rand(rng, 25) * 6 +w = floor.(w) .+ 1 +df = DataFrame(y=y, x1=x1, x2=x2, w=w) + + +clotting = DataFrame( + u=log.([5, 10, 15, 20, 30, 40, 60, 80, 100]), + lot1=[118, 58, 42, 35, 27, 25, 21, 19, 18], + w=[1.5, 2.0, 1.1, 4.5, 2.4, 3.5, 5.6, 5.4, 6.7] +) + +quine.aweights = log.(3 .+ 3 .* quine.Days) +quine.pweights = 1.0 ./ (quine.aweights ./ sum(quine.aweights)) +quine.fweights = floor.(quine.aweights) + +dobson = DataFrame( + Counts=[18.0, 17, 15, 20, 10, 20, 25, 13, 12], + Outcome=categorical(repeat(string.('A':'C'), outer=3)), + Treatment=categorical(repeat(string.('a':'c'), inner=3)), + w=[1, 2, 1, 2, 3, 4, 3, 2, 1] +) + +itr = Iterators.product((:qr, :cholesky), (true, false)) + + +@testset "Linear model ftest with $dmethod method with dropcollinear=$drop" for (dmethod, drop) ∈ itr + model_0 = lm(@formula(y ~ x1), df; wts=aweights(df.w), method=dmethod, dropcollinear=drop) + model_1 = lm(@formula(y ~ x1 + x2), df; wts=aweights(df.w), method=dmethod, dropcollinear=drop) + X = hcat(ones(length(df.y)), df.x1, df.x2) + model_2 = lm(X, y; wts=aweights(df.w), method=dmethod, dropcollinear=drop) + @test ftest(model_1).fstat ≈ 1.551275 rtol = 1e-05 + @test ftest(model_2) === ftest(model_1) + @test ftest(model_0, model_1).fstat[2] ≈ 1.7860438 rtol = 1e-05 + @test ftest(model_0, model_2).fstat[2] ≈ 1.7860438 rtol = 1e-05 +end + + +@testset "GLM: Binomial with LogitLink link - AnalyticWeights with $dmethod method" for (dmethod, drop) ∈ itr + model = glm(@formula(y ~ 1 + x1 + x2), df, Binomial(), LogitLink(), wts=aweights(df.w), + method=dmethod, dropcollinear=drop, atol=1e-08, rtol=1e-08) + @test deviance(model) ≈ 39.58120350785813 rtol = 1e-06 + @test loglikelihood(model) ≈ -19.79060175392906 rtol = 1e-06 + @test coef(model) ≈ [0.6333582770515337, 1.8861277804531265, 18.61281712203539] rtol = 1e-06 + @test stderror(model) ≈ [0.9021013750843575, 2.063002891039618, 2337.217357530545] rtol = 1e-07 + @test aic(model) ≈ 45.58120350785812 rtol = 1e-07 + @test bic(model) ≈ 49.237830982462725 rtol = 1e-07 + @test GLM.momentmatrix(model) ≈ [1.095702695280752 0.1983501744547734 0.0; + 0.6292210259386299 0.2312324009639611 0.0; + -0.869357286789858 -0.5816508224007081 -0.0; + 0.3287258318630744 0.0140466407925222 0.0; + 2.08484144507e-8 9.1314170785019e-9 2.085e-8; + 0.5274699949608909 0.2549210423027525 0.0; + -0.8883810924640739 -0.678699816121261 -0.0; + 0.169878909537636 0.15705018022658265 0.0; + 0.3346238606654708 0.1723463870596023 0.0; + 0.17263445723951362 0.085461258206114 0.0; + 1.90303118479143e-8 1.3346718776025414e-8 1.90e-8; + 1.60927355387381e-8 1.1161427644959331e-8 1.61e-8; + 2.2803477805569965e-9 1.9982067289774133e-9 2.28e-9; + 6.018177387787048e-9 5.682479294453922e-9 6.02e-9; + 1.4765287153134531e-8 1.0914686191045351e-8 1.48e-8; + 2.0721163707495327e-8 1.1594416669570361e-8 2.07e-8; + 0.8473483434776684 0.4295002258959193 0.0; + 0.5426271690329769 0.35055009398999054 0.0; + -4.541414638754089 -1.2097095921684677 -0.0; + 1.2385948537889822 0.31353946330992544 0.0; + 3.067144219729067e-8 1.078462894246146e-8 3.07e-8; + 1.7613428517425065e-8 9.289130102493344e-9 1.76e-8; + 1.8334365002191494e-8 1.3220774075528698e-8 1.83e-8; + 1.6687848932140258e-8 3.1458514759844027e-9 1.67e-8; + 0.4123258762224241 0.2630623634882926 0.0] rtol = 1e-07 +end + + + +@testset "GLM: Binomial with ProbitLink link - AnalyticWeights with $dmethod method" for (dmethod, drop) ∈ itr + model = glm(@formula(y ~ 1 + x1 + x2), df, Binomial(), ProbitLink(), + wts=aweights(df.w), method=dmethod, dropcollinear=drop, rtol=1e-09) + @test deviance(model) ≈ 39.595360462143866 rtol = 1e-06 + @test loglikelihood(model) ≈ -19.797680231071933 rtol = 1e-06 + @test coef(model) ≈ [0.42120722997197313, 1.0416447141541567, 4.916910225354065] rtol = 1e-07 + @test stderror(model) ≈ [0.5216506352923727, 1.1455457218079563, 325.2782732702344] rtol = 1e-07 + @test aic(model) ≈ 45.595360462143866 rtol = 1e-07 + @test bic(model) ≈ 49.25198793674846 rtol = 1e-07 + @test GLM.momentmatrix(model) ≈ [1.8176341588673794 0.32903820904987535 0.0; + 1.0975399310212473 0.40333489019264895 0.0; + -1.6205390909958874 -1.0842353418245372 -0.0; + 0.5269343763678408 0.02251620404797942 0.0; + 2.1411260807372573e-7 9.377938695085409e-8 2.14e-7; + 0.9490575664910063 0.45867015444762754 0.0; + -1.7015508605205314 -1.2999401562600912 -0.0; + 0.33388087355781 0.3086672236664311 0.0; + 0.6070565699014323 0.31266152495891864 0.0; + 0.3115689943654785 0.15423965008066182 0.0; + 6.62696579317951e-8 4.647756142241091e-8 6.63e-8; + 5.7919749595281985e-8 4.0171361342870654e-8 5.79e-8; + 3.7134640590626956e-9 3.2540075395089014e-9 3.71e-9; + 7.229998935730756e-9 6.826704599068171e-9 7.23e-9; + 4.373923181354305e-8 3.233259092972413e-8 4.37e-8; + 1.3039185369174096e-7 7.296006649823636e-8 1.30e-7; + 1.5339832954887218 0.7775387501543354 0.0; + 1.016200760890786 0.6564899300523522 0.0; + -7.736929921295398 -2.060908127581581 -0.0; + 2.0943898311662204 0.5301764831468441 0.0; + 4.4180333118496e-7 1.553459717259102e-7 4.42e-7; + 1.2636718773015955e-7 6.664467660855266e-8 1.26e-7; + 5.869289625690482e-8 4.232301043195289e-8 5.86e-8; + 4.453972209837739e-7 8.396249934481249e-8 4.45e-7; + 0.7707735136764122 0.49175061259680825 0.0] rtol = 1e-07 +end + +@testset "GLM: Binomial with CauchitLink link - AnalyticWeights - method $dmethod" for (dmethod, drop) ∈ itr + model = glm(@formula(y ~ 1 + x1 + x2), df, Binomial(), CauchitLink(), wts=aweights(df.w), + method=dmethod, dropcollinear=drop, rtol=1e-08, atol=1e-08) + @test deviance(model) ≈ 39.627559015619845 rtol = 1e-07 + @test loglikelihood(model) ≈ -19.813779507809922 rtol = 1e-07 + @test aic(model) ≈ 45.627559015619845 rtol = 1e-07 + @test bic(model) ≈ 49.28418649022444 rtol = 1e-07 + @test GLM.momentmatrix(model) ≈ [ 1.003054020887253 0.1815783979426737 0.0; + 0.4264622162277366 0.15672057689370572 0.0; + -0.41221991029044563 -0.27579920646405165 -0.0; + 0.39009720364187195 0.016669074233287458 0.0; + 0.0 0.0 0.0; + 0.311923278855423 0.15074944190941555 0.0; + -0.37849361968132017 -0.28915918208960645 -0.0; + 0.08167834727345773 0.07551025135974303 0.0; + 0.1919243490466221 0.0988496997230555 0.0; + 0.10090666946769812 0.049953010957329104 0.0; + 0.0 0.0 0.0; + 0.0 0.0 0.0; + 0.0 0.0 0.0; + 0.0 0.0 0.0; + 0.0 0.0 0.0; + 0.0 0.0 0.0; + 0.4897032828746528 0.2482186602895976 0.0; + 0.28232074585439737 0.18238593576314036 0.0; + -3.7015013060867705 -0.985979478109429 -0.0; + 0.9986020018483154 0.25278737010890295 0.0; + 0.0 0.0 0.0; + 0.0 0.0 0.0; + 0.0 0.0 0.0; + 0.0 4.7109001281144596e-17 0.0; + 0.21554272008110664 0.1375154474822352 0.0] rtol = 1e-07 +end + +@testset "GLM: Binomial with CloglogLink link - AnalyticWeights with $dmethod method" for (dmethod, drop) ∈ itr + model = glm(@formula(y ~ 1 + x1 + x2), df, Binomial(), CloglogLink(), wts=aweights(df.w), + method=dmethod, dropcollinear=drop, rtol=5e-10, atol=1e-10) + @test deviance(model) ≈ 39.61484762863061 rtol = 1e-07 + @test loglikelihood(model) ≈ -19.807423814315307 rtol = 1e-07 + @test coef(model) ≈ [0.12095167614339054, 0.8666201161364425, 2.71457411130009] rtol=1e-07 + @test stderror(model) ≈ [0.46442064138194333, 0.9661962332997427, 462.67067410332123] rtol=1e-07 + @test aic(model) ≈ 45.61484762863061 rtol = 1e-07 + @test bic(model) ≈ 49.27147510323522 rtol = 1e-07 + @test GLM.momentmatrix(model) ≈ [ 1.9242952153533148 0.3483465846271526 0.0; + 1.2514530854268051 0.45989642702311906 0.0; + -2.0153062620933504 -1.348357645985017 -0.0; + 0.5261952160086218 0.02248461930760421 0.0; + 1.5917320997016487e-7 6.971642718424943e-8 1.5e-7; + 1.1286597324859617 0.5454701085543264 0.0; + -2.188084897309478 -1.6716393787069568 -0.0; + 0.44263603677181956 0.4092095336554625 0.0; + 0.7298024393361812 0.3758811862272388 0.0; + 0.37203439025955143 0.1841725435114846 0.0; + 1.79124103038821e-9 1.2562689715087133e-9 0.0; + 1.7546756596715808e-9 1.216989204144432e-9 0.0; + 5.7157964501561416e-12 5.00859694540867e-12 0.0; + 3.1204806380028555e-12 2.9464180717205265e-12 0.0; + 6.677005273849182e-10 4.93572637661486e-10 6.7e-10; + 2.403535247525871e-8 1.3448853323683519e-8 2.4e-8; + 1.839078160139044 0.9321839020515984 0.0; + 1.2724386238625014 0.8220257013418844 0.0; + -8.529708800751662 -2.2720829026754306 -0.0; + 2.2835873542705203 0.5780701827470168 0.0; + 7.796454326518419e-7 2.7413731130574843e-7 7.79e-7; + 3.441301868580375e-8 1.8149050735676883e-8 3.44e-8; + 1.181434863206281e-9 8.519238822580661e-10 1.18e-9; + 3.115862778487023e-6 5.873759740112369e-7 3.11e-6; + 0.9629196988432743 0.6143391585021523 0.0] rtol = 1e-05 +end + +@testset "GLM: Gamma with InverseLink link - AnalyticWeights with $dmethod method" for (dmethod, drop) ∈ itr + model = glm(@formula(lot1 ~ 1 + u), clotting, Gamma(), InverseLink(), + wts=aweights(clotting.w), method=dmethod, dropcollinear=drop, atol=1e-07, rtol=1e-08) + @test deviance(model) ≈ 0.03933389380881642 rtol = 1e-07 + @test loglikelihood(model) ≈ -43.359078787690514 rtol = 1e-07 + @test coef(model) ≈ [-0.017217012596343607, 0.015649040406186487] rtol = 1e-07 + @test stderror(model) ≈ [0.0009144223353860925, 0.0003450913537314497] rtol = 1e-04 + @test aic(model) ≈ 92.71815757538103 rtol = 1e-07 + @test bic(model) ≈ 93.30983130738969 rtol = 1e-07 + @test GLM.momentmatrix(model) ≈ [1900.1063511093867 3058.103199132267; + -1643.317155973023 -3783.877586404854; + -420.13783432322964 -1137.7543467296691; + -981.2887166533023 -2939.6782781526754; + 313.30087123532877 1065.5981029180723; + -186.60227446859759 -688.353296378139; + 324.34628373045786 1327.9854430687467; + 430.8197010892654 1887.863404915401; + 262.77277766267576 1210.113361381432] rtol = 1e-07 +end + +@testset "GLM: Gamma with IdentityLink link - AnalyticWeights with $dmethod method" for (dmethod, drop) ∈ itr + model = glm(@formula(lot1 ~ 1 + u), clotting, Gamma(), IdentityLink(), + wts=aweights(clotting.w), method=dmethod, dropcollinear=drop, rtol=1e-10, atol=1e-10, minstepfac=0.00001) + @test deviance(model) ≈ 1.3435348802929383 rtol = 1e-07 + @test loglikelihood(model) ≈ -101.19916126647321 rtol = 1e-07 + @test coef(model) ≈ [86.45700434128152, -15.320695650698417] rtol = 1e-05 + @test stderror(model) ≈ [16.07962739541372, 3.766841480457265] rtol = 1e-05 + @test GLM.aic(model) ≈ 208.39832253294642 rtol = 1e-07 + @test GLM.bic(model) ≈ 208.9899962649551 rtol = 1e-07 + @test GLM.momentmatrix(model) ≈ [ 0.26061914480947884 0.4194503323625281; + 0.06148544891860896 0.14157547811603585; + -0.019061929106842457 -0.051620660951180786; + -0.1795782998461795 -0.5379685084791557; + -0.1764962075232437 -0.6002984389013568; + -0.2277661940139623 -0.8402020334398342; + -0.3204523427685144 -1.3120423070655995; + -0.054878647210950426 -0.2404796937532563; + 0.6561290267416002 3.0215858321118008] rtol = 1e-04 +end + +@testset "GLM: Gamma with LogLink link - AnalyticWeights with $dmethod method" for (dmethod, drop) ∈ itr + model = glm(@formula(lot1 ~ 1 + u), clotting, Gamma(), LogLink(), + wts=aweights(clotting.w), method=dmethod, dropcollinear=drop, atol=1e-09, rtol=1e-09) + @test deviance(model) ≈ 0.41206342934199663 rtol = 1e-07 + @test loglikelihood(model) ≈ -81.79777246247532 rtol = 1e-07 + @test coef(model) ≈ [5.325107090308856, -0.5495682740033511] rtol = 1e-07 + @test stderror(model) ≈ [0.20287310816341905, 0.053062600599660774] rtol = 1e-07 + @test GLM.aic(model) ≈ 169.59554492495064 rtol = 1e-07 + @test GLM.bic(model) ≈ 170.18721865695932 rtol = 1e-07 + @test GLM.momentmatrix(model) ≈ [ 14.39716447431257 23.171342336508012; + 0.0374983950207553 0.0863432453859933; + -2.5490869750808054 -6.903055495494598; + -12.821435846444906 -38.40958915849704; + -8.713283462827741 -29.635596899449876; + -6.520303896525519 -24.05261507847203; + -4.123729229896082 -16.88396834850135; + 3.70269025008355 16.225287295813413; + 16.590486289982852 76.40201283367323] rtol = 1e-07 +end + +@testset "GLM: Gamma with InverseLink link - AnalyticWeights with $dmethod method" for (dmethod, drop) ∈ itr + model = glm(@formula(lot1 ~ 1 + u), clotting, Gamma(), InverseLink(), + wts=aweights(clotting.w), method=dmethod, dropcollinear=drop, atol=1e-09, rtol=1e-09) + @test deviance(model) ≈ 0.03933389380881642 rtol = 1e-07 + @test loglikelihood(model) ≈ -43.359078787690514 rtol = 1e-07 + @test coef(model) ≈ [-0.017217012596343607, 0.015649040406186487] rtol = 1e-07 + @test stderror(model) ≈ [0.0009144223353860925, 0.0003450913537314497] rtol = 1e-07 + @test GLM.aic(model) ≈ 92.71815757538103 rtol = 1e-07 + @test GLM.bic(model) ≈ 93.30983130738969 rtol = 1e-07 + @test GLM.momentmatrix(model) ≈ [ 1900.1063511093867 3058.103199132267; + -1643.317155973023 -3783.877586404854; + -420.13783432322964 -1137.7543467296691; + -981.2887166533023 -2939.6782781526754; + 313.30087123532877 1065.5981029180723; + -186.60227446859759 -688.353296378139; + 324.34628373045786 1327.9854430687467; + 430.8197010892654 1887.863404915401; + 262.77277766267576 1210.113361381432] rtol = 1e-07 +end + +@testset "GLM: InverseGaussian with InverseSquareLink link - AnalyticWeights with $dmethod method" for (dmethod, drop) ∈ itr + model = glm(@formula(lot1 ~ 1 + u), clotting, InverseGaussian(), InverseSquareLink(), + wts=aweights(clotting.w), method=dmethod, dropcollinear=drop, atol=1e-09, rtol=1e-09) + @test deviance(model) ≈ 0.021377370485120707 rtol = 1e-07 + @test loglikelihood(model) ≈ -86.82546665077861 rtol = 1e-07 + @test coef(model) ≈ [-0.0012633718975150973, 0.0008126490405747128] rtol = 1e-07 + @test stderror(model) ≈ [0.00016779409928094252, 9.025235597677238e-5] rtol = 1e-07 + @test GLM.aic(model) ≈ 179.65093330155722 rtol = 1e-07 + @test GLM.bic(model) ≈ 180.2426070335659 rtol = 1e-07 + @test GLM.momentmatrix(model) ≈ [ 28815.030725087538 46376.00289690935; + -21039.070620903 -48444.250382140235; + -6195.618377983015 -16778.045594449453; + -15686.073415243622 -46991.276375382586; + -1716.0787284468345 -5836.722477919495; + -2086.203482054124 -7695.75316205041; + 3418.087237993986 13994.826896081435; + 6065.271775021221 26578.18246467872; + 8424.676595366931 38797.069483575455] rtol = 1e-06 +end + +@testset "GLM: NegativeBinomial with LogLink link - AnalyticWeights - method: dmethod" for (dmethod, drop) ∈ itr + model = glm(@formula(Days ~ Eth + Sex + Age + Lrn), quine, NegativeBinomial(2), + LogLink(), wts=aweights(quine.aweights), method=dmethod, + dropcollinear=drop, atol=1e-08, rtol=1e-08) + @test deviance(model) ≈ 624.7631999565588 rtol = 1e-07 + @test loglikelihood(model) ≈ -2004.5939464322778 rtol = 1e-07 + @test coef(model) ≈ [3.02411915515531, -0.4641576651688563, 0.0718560942992554, + -0.47848540911607984, 0.09677889908013552, 0.3562972562034356, + 0.3480161821981514] rtol = 1e-07 + @test stderror(model) ≈ [0.1950707397084349, 0.13200639191036218, 0.1373161597645507, + 0.2088476016141468, 0.20252412726336674, 0.21060778935484836, + 0.16126722793064027] rtol = 1e-07 + ## Tests below are broken because dof(model)==8 instead of 7 + @test_broken aic(model) ≈ 4023.1878928645556 rtol = 1e-07 + @test_broken bic(model) ≈ 4044.073139216514 rtol = 1e-07 + @test GLM.momentmatrix(model) ≈ [ + -3.866780529709063 -0.0 -3.866780529709063 -0.0 -0.0 -0.0 -3.866780529709063 + -4.370085797122667 -0.0 -4.370085797122667 -0.0 -0.0 -0.0 -4.370085797122667 + -3.956562495375882 -0.0 -3.956562495375882 -0.0 -0.0 -0.0 -3.956562495375882 + -4.102299119258251 -0.0 -4.102299119258251 -0.0 -0.0 -0.0 -0.0 + -4.102299119258251 -0.0 -4.102299119258251 -0.0 -0.0 -0.0 -0.0 + -2.8243330916399567 -0.0 -2.8243330916399567 -0.0 -0.0 -0.0 -0.0 + -0.7247974261272416 -0.0 -0.7247974261272416 -0.0 -0.0 -0.0 -0.0 + -0.0382123316932152 -0.0 -0.0382123316932152 -0.0 -0.0 -0.0 -0.0 + -3.813241073891047 -0.0 -3.813241073891047 -3.813241073891047 -0.0 -0.0 -3.813241073891047 + -3.813241073891047 -0.0 -3.813241073891047 -3.813241073891047 -0.0 -0.0 -3.813241073891047 + -1.593192001014045 -0.0 -1.593192001014045 -1.593192001014045 -0.0 -0.0 -1.593192001014045 + -2.7127578570401822 -0.0 -2.7127578570401822 -2.7127578570401822 -0.0 -0.0 -0.0 + 0.14484002662039835 0.0 0.14484002662039835 0.14484002662039835 0.0 0.0 0.0 + -4.754224412754331 -0.0 -4.754224412754331 -0.0 -4.754224412754331 -0.0 -4.754224412754331 + -0.6279394841753847 -0.0 -0.6279394841753847 -0.0 -0.6279394841753847 -0.0 -0.6279394841753847 + 5.160032033317412 0.0 5.160032033317412 0.0 5.160032033317412 0.0 5.160032033317412 + 6.363463014626628 0.0 6.363463014626628 0.0 6.363463014626628 0.0 6.363463014626628 + -2.991376095035898 -0.0 -2.991376095035898 -0.0 -2.991376095035898 -0.0 -0.0 + -2.492994950052581 -0.0 -2.492994950052581 -0.0 -2.492994950052581 -0.0 -0.0 + -2.492994950052581 -0.0 -2.492994950052581 -0.0 -2.492994950052581 -0.0 -0.0 + -2.226530220466526 -0.0 -2.226530220466526 -0.0 -2.226530220466526 -0.0 -0.0 + 5.713017320814697 0.0 5.713017320814697 0.0 5.713017320814697 0.0 0.0 + 6.908456992944485 0.0 6.908456992944485 0.0 6.908456992944485 0.0 0.0 + 8.12839634400043 0.0 8.12839634400043 0.0 8.12839634400043 0.0 0.0 + -4.628254089687799 -0.0 -4.628254089687799 -0.0 -0.0 -4.628254089687799 -0.0 + -2.183958840253964 -0.0 -2.183958840253964 -0.0 -0.0 -2.183958840253964 -0.0 + -2.183958840253964 -0.0 -2.183958840253964 -0.0 -0.0 -2.183958840253964 -0.0 + -0.9503472567532946 -0.0 -0.9503472567532946 -0.0 -0.0 -0.9503472567532946 -0.0 + 0.6731546773300909 0.0 0.6731546773300909 0.0 0.0 0.6731546773300909 0.0 + 1.2423198758199778 0.0 1.2423198758199778 0.0 0.0 1.2423198758199778 0.0 + 1.8236065476231822 0.0 1.8236065476231822 0.0 0.0 1.8236065476231822 0.0 + -4.171836641319677 -0.0 -0.0 -0.0 -0.0 -0.0 -4.171836641319677 + -3.9882995353410657 -0.0 -0.0 -0.0 -0.0 -0.0 -0.0 + -3.0399730926465205 -0.0 -0.0 -0.0 -0.0 -0.0 -0.0 + 1.309672612863431 0.0 0.0 0.0 0.0 0.0 0.0 + 10.661189363296968 0.0 0.0 0.0 0.0 0.0 0.0 + -3.7634043246260425 -0.0 -0.0 -3.7634043246260425 -0.0 -0.0 -3.7634043246260425 + -3.6605640772207546 -0.0 -0.0 -3.6605640772207546 -0.0 -0.0 -3.6605640772207546 + -3.6605640772207546 -0.0 -0.0 -3.6605640772207546 -0.0 -0.0 -3.6605640772207546 + -3.0720683496525485 -0.0 -0.0 -3.0720683496525485 -0.0 -0.0 -3.0720683496525485 + -1.885334027349047 -0.0 -0.0 -1.885334027349047 -0.0 -0.0 -1.885334027349047 + 2.106807550276347 0.0 0.0 2.106807550276347 0.0 0.0 2.106807550276347 + 3.0150038937286183 0.0 0.0 3.0150038937286183 0.0 0.0 3.0150038937286183 + 6.387064937752826 0.0 0.0 6.387064937752826 0.0 0.0 6.387064937752826 + 17.72394862307137 0.0 0.0 17.72394862307137 0.0 0.0 17.72394862307137 + 18.296957173355864 0.0 0.0 18.296957173355864 0.0 0.0 18.296957173355864 + -3.0375213985118954 -0.0 -0.0 -3.0375213985118954 -0.0 -0.0 -0.0 + -3.0375213985118954 -0.0 -0.0 -3.0375213985118954 -0.0 -0.0 -0.0 + -0.8508688349707806 -0.0 -0.0 -0.8508688349707806 -0.0 -0.0 -0.0 + 2.2977798382338515 0.0 0.0 2.2977798382338515 0.0 0.0 0.0 + 3.4686807301080997 0.0 0.0 3.4686807301080997 0.0 0.0 0.0 + -4.658715933989554 -0.0 -0.0 -0.0 -4.658715933989554 -0.0 -4.658715933989554 + -4.187227633826471 -0.0 -0.0 -0.0 -4.187227633826471 -0.0 -4.187227633826471 + -4.04126740785402 -0.0 -0.0 -0.0 -4.04126740785402 -0.0 -4.04126740785402 + -2.940568463040927 -0.0 -0.0 -0.0 -2.940568463040927 -0.0 -2.940568463040927 + 4.342318636532548 0.0 0.0 0.0 4.342318636532548 0.0 4.342318636532548 + 4.653011109293142 0.0 0.0 0.0 4.653011109293142 0.0 4.653011109293142 + 8.523536317826032 0.0 0.0 0.0 8.523536317826032 0.0 8.523536317826032 + 15.787943104351504 0.0 0.0 0.0 15.787943104351504 0.0 15.787943104351504 + -3.6818016272511183 -0.0 -0.0 -0.0 -3.6818016272511183 -0.0 -0.0 + -2.057196136670586 -0.0 -0.0 -0.0 -0.0 -2.057196136670586 -0.0 + -3.834339745304657 -0.0 -0.0 -0.0 -0.0 -3.834339745304657 -0.0 + -4.1780090350069425 -0.0 -0.0 -0.0 -0.0 -4.1780090350069425 -0.0 + -4.491340364181187 -0.0 -0.0 -0.0 -0.0 -4.491340364181187 -0.0 + -4.3190736545666875 -0.0 -0.0 -0.0 -0.0 -4.3190736545666875 -0.0 + -3.731819061288569 -0.0 -0.0 -0.0 -0.0 -3.731819061288569 -0.0 + -2.238272513055515 -0.0 -0.0 -0.0 -0.0 -2.238272513055515 -0.0 + 1.9859737921268132 0.0 0.0 0.0 0.0 1.9859737921268132 0.0 + 3.2559592797891495 0.0 0.0 0.0 0.0 3.2559592797891495 0.0 + -3.8426774654770597 -3.8426774654770597 -3.8426774654770597 -0.0 -0.0 -0.0 -3.8426774654770597 + -0.9876822943882244 -0.9876822943882244 -0.9876822943882244 -0.0 -0.0 -0.0 -0.9876822943882244 + 23.20842027925341 23.20842027925341 23.20842027925341 0.0 0.0 0.0 23.20842027925341 + -1.920845416870046 -1.920845416870046 -1.920845416870046 -0.0 -0.0 -0.0 -0.0 + -1.920845416870046 -1.920845416870046 -1.920845416870046 -0.0 -0.0 -0.0 -0.0 + -3.2888901738202923 -3.2888901738202923 -3.2888901738202923 -0.0 -0.0 -0.0 -0.0 + -2.758113321833414 -2.758113321833414 -2.758113321833414 -0.0 -0.0 -0.0 -0.0 + -1.306843142455193 -1.306843142455193 -1.306843142455193 -0.0 -0.0 -0.0 -0.0 + -0.8751747276035264 -0.8751747276035264 -0.8751747276035264 -0.0 -0.0 -0.0 -0.0 + -1.8877508012966644 -1.8877508012966644 -1.8877508012966644 -1.8877508012966644 -0.0 -0.0 -1.8877508012966644 + -1.8877508012966644 -1.8877508012966644 -1.8877508012966644 -1.8877508012966644 -0.0 -0.0 -1.8877508012966644 + -2.9308943363443847 -2.9308943363443847 -2.9308943363443847 -2.9308943363443847 -0.0 -0.0 -2.9308943363443847 + -2.9308943363443847 -2.9308943363443847 -2.9308943363443847 -2.9308943363443847 -0.0 -0.0 -2.9308943363443847 + -2.9308943363443847 -2.9308943363443847 -2.9308943363443847 -2.9308943363443847 -0.0 -0.0 -2.9308943363443847 + -0.6051770620747217 -0.6051770620747217 -0.6051770620747217 -0.6051770620747217 -0.0 -0.0 -0.6051770620747217 + 2.697606708942873 2.697606708942873 2.697606708942873 2.697606708942873 0.0 0.0 2.697606708942873 + -2.628558928820721 -2.628558928820721 -2.628558928820721 -2.628558928820721 -0.0 -0.0 -0.0 + -2.3542975772061085 -2.3542975772061085 -2.3542975772061085 -2.3542975772061085 -0.0 -0.0 -0.0 + 0.11268811798135936 0.11268811798135936 0.11268811798135936 0.0 0.11268811798135936 0.0 0.11268811798135936 + 3.1826005245112854 3.1826005245112854 3.1826005245112854 0.0 3.1826005245112854 0.0 3.1826005245112854 + 5.692953263520725 5.692953263520725 5.692953263520725 0.0 5.692953263520725 0.0 5.692953263520725 + -2.7839804243079254 -2.7839804243079254 -2.7839804243079254 -0.0 -2.7839804243079254 -0.0 -0.0 + -1.9433894208611948 -1.9433894208611948 -1.9433894208611948 -0.0 -1.9433894208611948 -0.0 -0.0 + -2.962526696741388 -2.962526696741388 -2.962526696741388 -0.0 -2.962526696741388 -0.0 -0.0 + -3.4432739212266052 -3.4432739212266052 -3.4432739212266052 -0.0 -3.4432739212266052 -0.0 -0.0 + -3.0516553688541084 -3.0516553688541084 -3.0516553688541084 -0.0 -3.0516553688541084 -0.0 -0.0 + 0.3128048727055356 0.3128048727055356 0.3128048727055356 0.0 0.3128048727055356 0.0 0.0 + 5.983398649554576 5.983398649554576 5.983398649554576 0.0 5.983398649554576 0.0 0.0 + -1.9961184031161041 -1.9961184031161041 -1.9961184031161041 -0.0 -0.0 -1.9961184031161041 -0.0 + 4.212201806010905 4.212201806010905 4.212201806010905 0.0 0.0 4.212201806010905 0.0 + -3.152192412974143 -3.152192412974143 -3.152192412974143 -0.0 -0.0 -3.152192412974143 -0.0 + -2.03792823060008 -2.03792823060008 -2.03792823060008 -0.0 -0.0 -2.03792823060008 -0.0 + 2.9007973162738843 2.9007973162738843 2.9007973162738843 0.0 0.0 2.9007973162738843 0.0 + 9.364366020386104 9.364366020386104 9.364366020386104 0.0 0.0 9.364366020386104 0.0 + 24.059031354439128 24.059031354439128 24.059031354439128 0.0 0.0 24.059031354439128 0.0 + 2.864621620127876 2.864621620127876 0.0 0.0 0.0 0.0 2.864621620127876 + -1.374372490365048 -1.374372490365048 -0.0 -0.0 -0.0 -0.0 -0.0 + -0.9287032240778311 -0.9287032240778311 -0.0 -0.0 -0.0 -0.0 -0.0 + 3.919550403175515 3.919550403175515 0.0 0.0 0.0 0.0 0.0 + 12.426707944681816 12.426707944681816 0.0 0.0 0.0 0.0 0.0 + -2.750339462985501 -2.750339462985501 -0.0 -2.750339462985501 -0.0 -0.0 -2.750339462985501 + -2.0720837572297617 -2.0720837572297617 -0.0 -2.0720837572297617 -0.0 -0.0 -2.0720837572297617 + -1.8681224147832116 -1.8681224147832116 -0.0 -1.8681224147832116 -0.0 -0.0 -1.8681224147832116 + -2.778411659017331 -2.778411659017331 -0.0 -2.778411659017331 -0.0 -0.0 -2.778411659017331 + -2.750339462985501 -2.750339462985501 -0.0 -2.750339462985501 -0.0 -0.0 -2.750339462985501 + -2.750339462985501 -2.750339462985501 -0.0 -2.750339462985501 -0.0 -0.0 -2.750339462985501 + -2.750339462985501 -2.750339462985501 -0.0 -2.750339462985501 -0.0 -0.0 -2.750339462985501 + -2.750339462985501 -2.750339462985501 -0.0 -2.750339462985501 -0.0 -0.0 -2.750339462985501 + -2.0720837572297617 -2.0720837572297617 -0.0 -2.0720837572297617 -0.0 -0.0 -2.0720837572297617 + -0.18952790670731487 -0.18952790670731487 -0.0 -0.18952790670731487 -0.0 -0.0 -0.18952790670731487 + 2.1145280030507307 2.1145280030507307 0.0 2.1145280030507307 0.0 0.0 2.1145280030507307 + -1.7407825357737137 -1.7407825357737137 -0.0 -1.7407825357737137 -0.0 -0.0 -0.0 + 4.548120970699322 4.548120970699322 0.0 4.548120970699322 0.0 0.0 0.0 + -1.2257166987183963 -1.2257166987183963 -0.0 -1.2257166987183963 -0.0 -0.0 -0.0 + -1.2257166987183963 -1.2257166987183963 -0.0 -1.2257166987183963 -0.0 -0.0 -0.0 + -0.6449075179371568 -0.6449075179371568 -0.0 -0.6449075179371568 -0.0 -0.0 -0.0 + 17.819813171012125 17.819813171012125 0.0 17.819813171012125 0.0 0.0 0.0 + -1.999110422648601 -1.999110422648601 -0.0 -0.0 -1.999110422648601 -0.0 -1.999110422648601 + -3.9564518053768536 -3.9564518053768536 -0.0 -0.0 -3.9564518053768536 -0.0 -3.9564518053768536 + -2.1216196203872557 -2.1216196203872557 -0.0 -0.0 -2.1216196203872557 -0.0 -2.1216196203872557 + -3.601990642806918 -3.601990642806918 -0.0 -0.0 -3.601990642806918 -0.0 -3.601990642806918 + -3.601990642806918 -3.601990642806918 -0.0 -0.0 -3.601990642806918 -0.0 -3.601990642806918 + -3.8495441274063715 -3.8495441274063715 -0.0 -0.0 -3.8495441274063715 -0.0 -3.8495441274063715 + -3.6199500530041027 -3.6199500530041027 -0.0 -0.0 -3.6199500530041027 -0.0 -3.6199500530041027 + -3.209822061567088 -3.209822061567088 -0.0 -0.0 -3.209822061567088 -0.0 -3.209822061567088 + -2.702521155801149 -2.702521155801149 -0.0 -0.0 -2.702521155801149 -0.0 -2.702521155801149 + -2.921923505820458 -2.921923505820458 -0.0 -0.0 -2.921923505820458 -0.0 -0.0 + -3.058405902935942 -3.058405902935942 -0.0 -0.0 -0.0 -3.058405902935942 -0.0 + -3.1473667781351766 -3.1473667781351766 -0.0 -0.0 -0.0 -3.1473667781351766 -0.0 + 1.4593378269316923 1.4593378269316923 0.0 0.0 0.0 1.4593378269316923 0.0 + -3.7560337640183694 -3.7560337640183694 -0.0 -0.0 -0.0 -3.7560337640183694 -0.0 + -3.7560337640183694 -3.7560337640183694 -0.0 -0.0 -0.0 -3.7560337640183694 -0.0 + -3.8041614268127484 -3.8041614268127484 -0.0 -0.0 -0.0 -3.8041614268127484 -0.0 + -1.3131162740760067 -1.3131162740760067 -0.0 -0.0 -0.0 -1.3131162740760067 -0.0 + -0.18645252170591944 -0.18645252170591944 -0.0 -0.0 -0.0 -0.18645252170591944 -0.0 + 1.4593378269316923 1.4593378269316923 0.0 0.0 0.0 1.4593378269316923 0.0 + 8.572921389223637 8.572921389223637 0.0 0.0 0.0 8.572921389223637 0.0 + ] rtol = 1e-04 +end + +@testset "GLM: NegativeBinomial with LogLink link - AnalyticWeights with $dmethod method" for (dmethod, drop) ∈ itr + model = glm(@formula(Days ~ Eth + Sex + Age + Lrn), quine, + NegativeBinomial(2), LogLink(), wts=aweights(quine.aweights), + method=dmethod, dropcollinear=drop, rtol=1e-08, atol=1e-08) + @test deviance(model) ≈ 624.7631999565588 rtol = 1e-07 + @test loglikelihood(model) ≈ -2004.5939464322778 rtol = 1e-07 + @test coef(model) ≈ [3.02411915515531, -0.4641576651688563, 0.0718560942992554, + -0.47848540911607984, 0.09677889908013552, 0.3562972562034356, + 0.3480161821981514] rtol = 1e-07 + @test stderror(model) ≈ [0.1950707397084349, 0.13200639191036218, 0.1373161597645507, + 0.2088476016141468, 0.20252412726336674, 0.21060778935484836, + 0.16126722793064027] rtol = 1e-07 + @test_broken GLM.aic(model) ≈ 4023.1878928645556 rtol = 1e-07 + @test_broken GLM.bic(model) ≈ 4044.073139216514 rtol = 1e-07 + @test GLM.momentmatrix(model) ≈ [ + -3.866780529709063 -0.0 -3.866780529709063 -0.0 -0.0 -0.0 -3.866780529709063 + -4.370085797122667 -0.0 -4.370085797122667 -0.0 -0.0 -0.0 -4.370085797122667 + -3.956562495375882 -0.0 -3.956562495375882 -0.0 -0.0 -0.0 -3.956562495375882 + -4.102299119258251 -0.0 -4.102299119258251 -0.0 -0.0 -0.0 -0.0 + -4.102299119258251 -0.0 -4.102299119258251 -0.0 -0.0 -0.0 -0.0 + -2.8243330916399567 -0.0 -2.8243330916399567 -0.0 -0.0 -0.0 -0.0 + -0.7247974261272416 -0.0 -0.7247974261272416 -0.0 -0.0 -0.0 -0.0 + -0.0382123316932152 -0.0 -0.0382123316932152 -0.0 -0.0 -0.0 -0.0 + -3.813241073891047 -0.0 -3.813241073891047 -3.813241073891047 -0.0 -0.0 -3.813241073891047 + -3.813241073891047 -0.0 -3.813241073891047 -3.813241073891047 -0.0 -0.0 -3.813241073891047 + -1.593192001014045 -0.0 -1.593192001014045 -1.593192001014045 -0.0 -0.0 -1.593192001014045 + -2.7127578570401822 -0.0 -2.7127578570401822 -2.7127578570401822 -0.0 -0.0 -0.0 + 0.14484002662039835 0.0 0.14484002662039835 0.14484002662039835 0.0 0.0 0.0 + -4.754224412754331 -0.0 -4.754224412754331 -0.0 -4.754224412754331 -0.0 -4.754224412754331 + -0.6279394841753847 -0.0 -0.6279394841753847 -0.0 -0.6279394841753847 -0.0 -0.6279394841753847 + 5.160032033317412 0.0 5.160032033317412 0.0 5.160032033317412 0.0 5.160032033317412 + 6.363463014626628 0.0 6.363463014626628 0.0 6.363463014626628 0.0 6.363463014626628 + -2.991376095035898 -0.0 -2.991376095035898 -0.0 -2.991376095035898 -0.0 -0.0 + -2.492994950052581 -0.0 -2.492994950052581 -0.0 -2.492994950052581 -0.0 -0.0 + -2.492994950052581 -0.0 -2.492994950052581 -0.0 -2.492994950052581 -0.0 -0.0 + -2.226530220466526 -0.0 -2.226530220466526 -0.0 -2.226530220466526 -0.0 -0.0 + 5.713017320814697 0.0 5.713017320814697 0.0 5.713017320814697 0.0 0.0 + 6.908456992944485 0.0 6.908456992944485 0.0 6.908456992944485 0.0 0.0 + 8.12839634400043 0.0 8.12839634400043 0.0 8.12839634400043 0.0 0.0 + -4.628254089687799 -0.0 -4.628254089687799 -0.0 -0.0 -4.628254089687799 -0.0 + -2.183958840253964 -0.0 -2.183958840253964 -0.0 -0.0 -2.183958840253964 -0.0 + -2.183958840253964 -0.0 -2.183958840253964 -0.0 -0.0 -2.183958840253964 -0.0 + -0.9503472567532946 -0.0 -0.9503472567532946 -0.0 -0.0 -0.9503472567532946 -0.0 + 0.6731546773300909 0.0 0.6731546773300909 0.0 0.0 0.6731546773300909 0.0 + 1.2423198758199778 0.0 1.2423198758199778 0.0 0.0 1.2423198758199778 0.0 + 1.8236065476231822 0.0 1.8236065476231822 0.0 0.0 1.8236065476231822 0.0 + -4.171836641319677 -0.0 -0.0 -0.0 -0.0 -0.0 -4.171836641319677 + -3.9882995353410657 -0.0 -0.0 -0.0 -0.0 -0.0 -0.0 + -3.0399730926465205 -0.0 -0.0 -0.0 -0.0 -0.0 -0.0 + 1.309672612863431 0.0 0.0 0.0 0.0 0.0 0.0 + 10.661189363296968 0.0 0.0 0.0 0.0 0.0 0.0 + -3.7634043246260425 -0.0 -0.0 -3.7634043246260425 -0.0 -0.0 -3.7634043246260425 + -3.6605640772207546 -0.0 -0.0 -3.6605640772207546 -0.0 -0.0 -3.6605640772207546 + -3.6605640772207546 -0.0 -0.0 -3.6605640772207546 -0.0 -0.0 -3.6605640772207546 + -3.0720683496525485 -0.0 -0.0 -3.0720683496525485 -0.0 -0.0 -3.0720683496525485 + -1.885334027349047 -0.0 -0.0 -1.885334027349047 -0.0 -0.0 -1.885334027349047 + 2.106807550276347 0.0 0.0 2.106807550276347 0.0 0.0 2.106807550276347 + 3.0150038937286183 0.0 0.0 3.0150038937286183 0.0 0.0 3.0150038937286183 + 6.387064937752826 0.0 0.0 6.387064937752826 0.0 0.0 6.387064937752826 + 17.72394862307137 0.0 0.0 17.72394862307137 0.0 0.0 17.72394862307137 + 18.296957173355864 0.0 0.0 18.296957173355864 0.0 0.0 18.296957173355864 + -3.0375213985118954 -0.0 -0.0 -3.0375213985118954 -0.0 -0.0 -0.0 + -3.0375213985118954 -0.0 -0.0 -3.0375213985118954 -0.0 -0.0 -0.0 + -0.8508688349707806 -0.0 -0.0 -0.8508688349707806 -0.0 -0.0 -0.0 + 2.2977798382338515 0.0 0.0 2.2977798382338515 0.0 0.0 0.0 + 3.4686807301080997 0.0 0.0 3.4686807301080997 0.0 0.0 0.0 + -4.658715933989554 -0.0 -0.0 -0.0 -4.658715933989554 -0.0 -4.658715933989554 + -4.187227633826471 -0.0 -0.0 -0.0 -4.187227633826471 -0.0 -4.187227633826471 + -4.04126740785402 -0.0 -0.0 -0.0 -4.04126740785402 -0.0 -4.04126740785402 + -2.940568463040927 -0.0 -0.0 -0.0 -2.940568463040927 -0.0 -2.940568463040927 + 4.342318636532548 0.0 0.0 0.0 4.342318636532548 0.0 4.342318636532548 + 4.653011109293142 0.0 0.0 0.0 4.653011109293142 0.0 4.653011109293142 + 8.523536317826032 0.0 0.0 0.0 8.523536317826032 0.0 8.523536317826032 + 15.787943104351504 0.0 0.0 0.0 15.787943104351504 0.0 15.787943104351504 + -3.6818016272511183 -0.0 -0.0 -0.0 -3.6818016272511183 -0.0 -0.0 + -2.057196136670586 -0.0 -0.0 -0.0 -0.0 -2.057196136670586 -0.0 + -3.834339745304657 -0.0 -0.0 -0.0 -0.0 -3.834339745304657 -0.0 + -4.1780090350069425 -0.0 -0.0 -0.0 -0.0 -4.1780090350069425 -0.0 + -4.491340364181187 -0.0 -0.0 -0.0 -0.0 -4.491340364181187 -0.0 + -4.3190736545666875 -0.0 -0.0 -0.0 -0.0 -4.3190736545666875 -0.0 + -3.731819061288569 -0.0 -0.0 -0.0 -0.0 -3.731819061288569 -0.0 + -2.238272513055515 -0.0 -0.0 -0.0 -0.0 -2.238272513055515 -0.0 + 1.9859737921268132 0.0 0.0 0.0 0.0 1.9859737921268132 0.0 + 3.2559592797891495 0.0 0.0 0.0 0.0 3.2559592797891495 0.0 + -3.8426774654770597 -3.8426774654770597 -3.8426774654770597 -0.0 -0.0 -0.0 -3.8426774654770597 + -0.9876822943882244 -0.9876822943882244 -0.9876822943882244 -0.0 -0.0 -0.0 -0.9876822943882244 + 23.20842027925341 23.20842027925341 23.20842027925341 0.0 0.0 0.0 23.20842027925341 + -1.920845416870046 -1.920845416870046 -1.920845416870046 -0.0 -0.0 -0.0 -0.0 + -1.920845416870046 -1.920845416870046 -1.920845416870046 -0.0 -0.0 -0.0 -0.0 + -3.2888901738202923 -3.2888901738202923 -3.2888901738202923 -0.0 -0.0 -0.0 -0.0 + -2.758113321833414 -2.758113321833414 -2.758113321833414 -0.0 -0.0 -0.0 -0.0 + -1.306843142455193 -1.306843142455193 -1.306843142455193 -0.0 -0.0 -0.0 -0.0 + -0.8751747276035264 -0.8751747276035264 -0.8751747276035264 -0.0 -0.0 -0.0 -0.0 + -1.8877508012966644 -1.8877508012966644 -1.8877508012966644 -1.8877508012966644 -0.0 -0.0 -1.8877508012966644 + -1.8877508012966644 -1.8877508012966644 -1.8877508012966644 -1.8877508012966644 -0.0 -0.0 -1.8877508012966644 + -2.9308943363443847 -2.9308943363443847 -2.9308943363443847 -2.9308943363443847 -0.0 -0.0 -2.9308943363443847 + -2.9308943363443847 -2.9308943363443847 -2.9308943363443847 -2.9308943363443847 -0.0 -0.0 -2.9308943363443847 + -2.9308943363443847 -2.9308943363443847 -2.9308943363443847 -2.9308943363443847 -0.0 -0.0 -2.9308943363443847 + -0.6051770620747217 -0.6051770620747217 -0.6051770620747217 -0.6051770620747217 -0.0 -0.0 -0.6051770620747217 + 2.697606708942873 2.697606708942873 2.697606708942873 2.697606708942873 0.0 0.0 2.697606708942873 + -2.628558928820721 -2.628558928820721 -2.628558928820721 -2.628558928820721 -0.0 -0.0 -0.0 + -2.3542975772061085 -2.3542975772061085 -2.3542975772061085 -2.3542975772061085 -0.0 -0.0 -0.0 + 0.11268811798135936 0.11268811798135936 0.11268811798135936 0.0 0.11268811798135936 0.0 0.11268811798135936 + 3.1826005245112854 3.1826005245112854 3.1826005245112854 0.0 3.1826005245112854 0.0 3.1826005245112854 + 5.692953263520725 5.692953263520725 5.692953263520725 0.0 5.692953263520725 0.0 5.692953263520725 + -2.7839804243079254 -2.7839804243079254 -2.7839804243079254 -0.0 -2.7839804243079254 -0.0 -0.0 + -1.9433894208611948 -1.9433894208611948 -1.9433894208611948 -0.0 -1.9433894208611948 -0.0 -0.0 + -2.962526696741388 -2.962526696741388 -2.962526696741388 -0.0 -2.962526696741388 -0.0 -0.0 + -3.4432739212266052 -3.4432739212266052 -3.4432739212266052 -0.0 -3.4432739212266052 -0.0 -0.0 + -3.0516553688541084 -3.0516553688541084 -3.0516553688541084 -0.0 -3.0516553688541084 -0.0 -0.0 + 0.3128048727055356 0.3128048727055356 0.3128048727055356 0.0 0.3128048727055356 0.0 0.0 + 5.983398649554576 5.983398649554576 5.983398649554576 0.0 5.983398649554576 0.0 0.0 + -1.9961184031161041 -1.9961184031161041 -1.9961184031161041 -0.0 -0.0 -1.9961184031161041 -0.0 + 4.212201806010905 4.212201806010905 4.212201806010905 0.0 0.0 4.212201806010905 0.0 + -3.152192412974143 -3.152192412974143 -3.152192412974143 -0.0 -0.0 -3.152192412974143 -0.0 + -2.03792823060008 -2.03792823060008 -2.03792823060008 -0.0 -0.0 -2.03792823060008 -0.0 + 2.9007973162738843 2.9007973162738843 2.9007973162738843 0.0 0.0 2.9007973162738843 0.0 + 9.364366020386104 9.364366020386104 9.364366020386104 0.0 0.0 9.364366020386104 0.0 + 24.059031354439128 24.059031354439128 24.059031354439128 0.0 0.0 24.059031354439128 0.0 + 2.864621620127876 2.864621620127876 0.0 0.0 0.0 0.0 2.864621620127876 + -1.374372490365048 -1.374372490365048 -0.0 -0.0 -0.0 -0.0 -0.0 + -0.9287032240778311 -0.9287032240778311 -0.0 -0.0 -0.0 -0.0 -0.0 + 3.919550403175515 3.919550403175515 0.0 0.0 0.0 0.0 0.0 + 12.426707944681816 12.426707944681816 0.0 0.0 0.0 0.0 0.0 + -2.750339462985501 -2.750339462985501 -0.0 -2.750339462985501 -0.0 -0.0 -2.750339462985501 + -2.0720837572297617 -2.0720837572297617 -0.0 -2.0720837572297617 -0.0 -0.0 -2.0720837572297617 + -1.8681224147832116 -1.8681224147832116 -0.0 -1.8681224147832116 -0.0 -0.0 -1.8681224147832116 + -2.778411659017331 -2.778411659017331 -0.0 -2.778411659017331 -0.0 -0.0 -2.778411659017331 + -2.750339462985501 -2.750339462985501 -0.0 -2.750339462985501 -0.0 -0.0 -2.750339462985501 + -2.750339462985501 -2.750339462985501 -0.0 -2.750339462985501 -0.0 -0.0 -2.750339462985501 + -2.750339462985501 -2.750339462985501 -0.0 -2.750339462985501 -0.0 -0.0 -2.750339462985501 + -2.750339462985501 -2.750339462985501 -0.0 -2.750339462985501 -0.0 -0.0 -2.750339462985501 + -2.0720837572297617 -2.0720837572297617 -0.0 -2.0720837572297617 -0.0 -0.0 -2.0720837572297617 + -0.18952790670731487 -0.18952790670731487 -0.0 -0.18952790670731487 -0.0 -0.0 -0.18952790670731487 + 2.1145280030507307 2.1145280030507307 0.0 2.1145280030507307 0.0 0.0 2.1145280030507307 + -1.7407825357737137 -1.7407825357737137 -0.0 -1.7407825357737137 -0.0 -0.0 -0.0 + 4.548120970699322 4.548120970699322 0.0 4.548120970699322 0.0 0.0 0.0 + -1.2257166987183963 -1.2257166987183963 -0.0 -1.2257166987183963 -0.0 -0.0 -0.0 + -1.2257166987183963 -1.2257166987183963 -0.0 -1.2257166987183963 -0.0 -0.0 -0.0 + -0.6449075179371568 -0.6449075179371568 -0.0 -0.6449075179371568 -0.0 -0.0 -0.0 + 17.819813171012125 17.819813171012125 0.0 17.819813171012125 0.0 0.0 0.0 + -1.999110422648601 -1.999110422648601 -0.0 -0.0 -1.999110422648601 -0.0 -1.999110422648601 + -3.9564518053768536 -3.9564518053768536 -0.0 -0.0 -3.9564518053768536 -0.0 -3.9564518053768536 + -2.1216196203872557 -2.1216196203872557 -0.0 -0.0 -2.1216196203872557 -0.0 -2.1216196203872557 + -3.601990642806918 -3.601990642806918 -0.0 -0.0 -3.601990642806918 -0.0 -3.601990642806918 + -3.601990642806918 -3.601990642806918 -0.0 -0.0 -3.601990642806918 -0.0 -3.601990642806918 + -3.8495441274063715 -3.8495441274063715 -0.0 -0.0 -3.8495441274063715 -0.0 -3.8495441274063715 + -3.6199500530041027 -3.6199500530041027 -0.0 -0.0 -3.6199500530041027 -0.0 -3.6199500530041027 + -3.209822061567088 -3.209822061567088 -0.0 -0.0 -3.209822061567088 -0.0 -3.209822061567088 + -2.702521155801149 -2.702521155801149 -0.0 -0.0 -2.702521155801149 -0.0 -2.702521155801149 + -2.921923505820458 -2.921923505820458 -0.0 -0.0 -2.921923505820458 -0.0 -0.0 + -3.058405902935942 -3.058405902935942 -0.0 -0.0 -0.0 -3.058405902935942 -0.0 + -3.1473667781351766 -3.1473667781351766 -0.0 -0.0 -0.0 -3.1473667781351766 -0.0 + 1.4593378269316923 1.4593378269316923 0.0 0.0 0.0 1.4593378269316923 0.0 + -3.7560337640183694 -3.7560337640183694 -0.0 -0.0 -0.0 -3.7560337640183694 -0.0 + -3.7560337640183694 -3.7560337640183694 -0.0 -0.0 -0.0 -3.7560337640183694 -0.0 + -3.8041614268127484 -3.8041614268127484 -0.0 -0.0 -0.0 -3.8041614268127484 -0.0 + -1.3131162740760067 -1.3131162740760067 -0.0 -0.0 -0.0 -1.3131162740760067 -0.0 + -0.18645252170591944 -0.18645252170591944 -0.0 -0.0 -0.0 -0.18645252170591944 -0.0 + 1.4593378269316923 1.4593378269316923 0.0 0.0 0.0 1.4593378269316923 0.0 + 8.572921389223637 8.572921389223637 0.0 0.0 0.0 8.572921389223637 0.0 + ] rtol = 1e-04 +end + +@testset "GLM: NegativeBinomial with SqrtLink link - AnalyticWeights with $dmethod method" for (dmethod, drop) ∈ itr + model = glm(@formula(Days ~ Eth + Sex + Age + Lrn), quine, NegativeBinomial(2), + SqrtLink(), wts=aweights(quine.aweights), + method=dmethod, dropcollinear=drop, rtol=1e-08, atol=1e-09) + @test deviance(model) ≈ 626.6464732988984 rtol = 1e-07 + @test loglikelihood(model) ≈ -2005.5355831034462 rtol = 1e-07 + @test coef(model) ≈ [4.733877229152363, -1.007977895471349, 0.02522392818548873, + -0.9859743168046422, 0.2132095063819721, 0.7456070470961186, + 0.5840284357554036] rtol = 1e-07 + @test stderror(model) ≈ [0.42307979153860564, 0.286636744566765, 0.29612422536777805, + 0.42042723748229144, 0.45565954626859695, 0.4766324296069839, + 0.3235019638755972] rtol = 1e-06 + @test_broken GLM.aic(model) ≈ 4025.0711662068925 rtol = 1e-07 + @test_broken GLM.bic(model) ≈ 4045.956412558851 rtol = 1e-07 + @test GLM.momentmatrix(model) ≈ [ + -1.4294351675636041 -0.0 -1.4294351675636041 -0.0 -0.0 -0.0 -1.4294351675636041 + -1.5410055711037194 -0.0 -1.5410055711037194 -0.0 -0.0 -0.0 -1.5410055711037194 + -1.3571249039047424 -0.0 -1.3571249039047424 -0.0 -0.0 -0.0 -1.3571249039047424 + -1.7394058711709879 -0.0 -1.7394058711709879 -0.0 -0.0 -0.0 -0.0 + -1.7394058711709879 -0.0 -1.7394058711709879 -0.0 -0.0 -0.0 -0.0 + -1.229734152157926 -0.0 -1.229734152157926 -0.0 -0.0 -0.0 -0.0 + -0.3742348640443611 -0.0 -0.3742348640443611 -0.0 -0.0 -0.0 -0.0 + -0.09370480172054219 -0.0 -0.09370480172054219 -0.0 -0.0 -0.0 -0.0 + -1.7293809063089827 -0.0 -1.7293809063089827 -1.7293809063089827 -0.0 -0.0 -1.7293809063089827 + -1.7293809063089827 -0.0 -1.7293809063089827 -1.7293809063089827 -0.0 -0.0 -1.7293809063089827 + -0.6748210571645206 -0.0 -0.6748210571645206 -0.6748210571645206 -0.0 -0.0 -0.6748210571645206 + -1.5016227445218024 -0.0 -1.5016227445218024 -1.5016227445218024 -0.0 -0.0 -0.0 + -0.058778966482651636 -0.0 -0.058778966482651636 -0.058778966482651636 -0.0 -0.0 -0.0 + -1.6582836355486288 -0.0 -1.6582836355486288 -0.0 -1.6582836355486288 -0.0 -1.6582836355486288 + 0.11341508381030255 0.0 0.11341508381030255 0.0 0.11341508381030255 0.0 0.11341508381030255 + 2.4651888863431344 0.0 2.4651888863431344 0.0 2.4651888863431344 0.0 2.4651888863431344 + 2.9517152556309942 0.0 2.9517152556309942 0.0 2.9517152556309942 0.0 2.9517152556309942 + -1.2288386266845785 -0.0 -1.2288386266845785 -0.0 -1.2288386266845785 -0.0 -0.0 + -1.0325293533841053 -0.0 -1.0325293533841053 -0.0 -1.0325293533841053 -0.0 -0.0 + -1.0325293533841053 -0.0 -1.0325293533841053 -0.0 -1.0325293533841053 -0.0 -0.0 + -0.9274622643228648 -0.0 -0.9274622643228648 -0.0 -0.9274622643228648 -0.0 -0.0 + 2.212861910664014 0.0 2.212861910664014 0.0 2.212861910664014 0.0 0.0 + 2.6862849076558937 0.0 2.6862849076558937 0.0 2.6862849076558937 0.0 0.0 + 3.1694781034873523 0.0 3.1694781034873523 0.0 3.1694781034873523 0.0 0.0 + -1.6534192741665588 -0.0 -1.6534192741665588 -0.0 -0.0 -1.6534192741665588 -0.0 + -0.702446330017668 -0.0 -0.702446330017668 -0.0 -0.0 -0.702446330017668 -0.0 + -0.702446330017668 -0.0 -0.702446330017668 -0.0 -0.0 -0.702446330017668 -0.0 + -0.23123674216762394 -0.0 -0.23123674216762394 -0.0 -0.0 -0.23123674216762394 -0.0 + 0.3871584524726257 0.0 0.3871584524726257 0.0 0.0 0.3871584524726257 0.0 + 0.6036586921589513 0.0 0.6036586921589513 0.0 0.0 0.6036586921589513 0.0 + 0.8246522973739006 0.0 0.8246522973739006 0.0 0.0 0.8246522973739006 0.0 + -1.560441651521342 -0.0 -0.0 -0.0 -0.0 -0.0 -1.560441651521342 + -1.7419685003857353 -0.0 -0.0 -0.0 -0.0 -0.0 -0.0 + -1.4153955925789807 -0.0 -0.0 -0.0 -0.0 -0.0 -0.0 + 0.23770439864218734 0.0 0.0 0.0 0.0 0.0 0.0 + 3.853247675936175 0.0 0.0 0.0 0.0 0.0 0.0 + -1.7692672149493731 -0.0 -0.0 -1.7692672149493731 -0.0 -0.0 -1.7692672149493731 + -1.7282440188407218 -0.0 -0.0 -1.7282440188407218 -0.0 -0.0 -1.7282440188407218 + -1.7282440188407218 -0.0 -0.0 -1.7282440188407218 -0.0 -0.0 -1.7282440188407218 + -1.4769837741682987 -0.0 -0.0 -1.4769837741682987 -0.0 -0.0 -1.4769837741682987 + -0.9582774689727417 -0.0 -0.0 -0.9582774689727417 -0.0 -0.0 -0.9582774689727417 + 0.8052632685284861 0.0 0.0 0.8052632685284861 0.0 0.0 0.8052632685284861 + 1.2077994352773953 0.0 0.0 1.2077994352773953 0.0 0.0 1.2077994352773953 + 2.7042310768987665 0.0 0.0 2.7042310768987665 0.0 0.0 2.7042310768987665 + 7.744950633464035 0.0 0.0 7.744950633464035 0.0 0.0 7.744950633464035 + 7.999933021719232 0.0 0.0 7.999933021719232 0.0 0.0 7.999933021719232 + -1.7392669278461907 -0.0 -0.0 -1.7392669278461907 -0.0 -0.0 -0.0 + -1.7392669278461907 -0.0 -0.0 -1.7392669278461907 -0.0 -0.0 -0.0 + -0.7262212443523531 -0.0 -0.0 -0.7262212443523531 -0.0 -0.0 -0.0 + 0.7835691668207807 0.0 0.0 0.7835691668207807 0.0 0.0 0.0 + 1.3489349925379315 0.0 0.0 1.3489349925379315 0.0 0.0 0.0 + -1.6522100272913283 -0.0 -0.0 -0.0 -1.6522100272913283 -0.0 -1.6522100272913283 + -1.4590418232851277 -0.0 -0.0 -0.0 -1.4590418232851277 -0.0 -1.4590418232851277 + -1.4015111702500997 -0.0 -0.0 -0.0 -1.4015111702500997 -0.0 -1.4015111702500997 + -0.9738202253475602 -0.0 -0.0 -0.0 -0.9738202253475602 -0.0 -0.9738202253475602 + 1.8091899079230156 0.0 0.0 0.0 1.8091899079230156 0.0 1.8091899079230156 + 1.9274245415701026 0.0 0.0 0.0 1.9274245415701026 0.0 1.9274245415701026 + 3.399094699981504 0.0 0.0 0.0 3.399094699981504 0.0 3.399094699981504 + 6.157344170373497 0.0 0.0 0.0 6.157344170373497 0.0 6.157344170373497 + -1.5082203700488148 -0.0 -0.0 -0.0 -1.5082203700488148 -0.0 -0.0 + -0.7518968254083281 -0.0 -0.0 -0.0 -0.0 -0.7518968254083281 -0.0 + -1.403623374340758 -0.0 -0.0 -0.0 -0.0 -1.403623374340758 -0.0 + -1.5307566638052945 -0.0 -0.0 -0.0 -0.0 -1.5307566638052945 -0.0 + -1.6487615285777935 -0.0 -0.0 -0.0 -0.0 -1.6487615285777935 -0.0 + -1.5960112869101046 -0.0 -0.0 -0.0 -0.0 -1.5960112869101046 -0.0 + -1.3904968459197917 -0.0 -0.0 -0.0 -0.0 -1.3904968459197917 -0.0 + -0.8618818687491527 -0.0 -0.0 -0.0 -0.0 -0.8618818687491527 -0.0 + 0.6414580291618693 0.0 0.0 0.0 0.0 0.6414580291618693 0.0 + 1.0942097094869556 0.0 0.0 0.0 0.0 1.0942097094869556 0.0 + -1.7282583231217719 -1.7282583231217719 -1.7282583231217719 -0.0 -0.0 -0.0 -1.7282583231217719 + -0.31744768418403196 -0.31744768418403196 -0.31744768418403196 -0.0 -0.0 -0.0 -0.31744768418403196 + 11.375280355235768 11.375280355235768 11.375280355235768 0.0 0.0 0.0 11.375280355235768 + -1.0256901959548927 -1.0256901959548927 -1.0256901959548927 -0.0 -0.0 -0.0 -0.0 + -1.0256901959548927 -1.0256901959548927 -1.0256901959548927 -0.0 -0.0 -0.0 -0.0 + -1.7598032161223125 -1.7598032161223125 -1.7598032161223125 -0.0 -0.0 -0.0 -0.0 + -1.491030768800334 -1.491030768800334 -1.491030768800334 -0.0 -0.0 -0.0 -0.0 + -0.7301769140610584 -0.7301769140610584 -0.7301769140610584 -0.0 -0.0 -0.0 -0.0 + -0.5034045168716083 -0.5034045168716083 -0.5034045168716083 -0.0 -0.0 -0.0 -0.0 + -1.113506915630734 -1.113506915630734 -1.113506915630734 -1.113506915630734 -0.0 -0.0 -1.113506915630734 + -1.113506915630734 -1.113506915630734 -1.113506915630734 -1.113506915630734 -0.0 -0.0 -1.113506915630734 + -1.6237007081122545 -1.6237007081122545 -1.6237007081122545 -1.6237007081122545 -0.0 -0.0 -1.6237007081122545 + -1.6237007081122545 -1.6237007081122545 -1.6237007081122545 -1.6237007081122545 -0.0 -0.0 -1.6237007081122545 + -1.6237007081122545 -1.6237007081122545 -1.6237007081122545 -1.6237007081122545 -0.0 -0.0 -1.6237007081122545 + -0.07026189294137537 -0.07026189294137537 -0.07026189294137537 -0.07026189294137537 -0.0 -0.0 -0.07026189294137537 + 2.0844355685058127 2.0844355685058127 2.0844355685058127 2.0844355685058127 0.0 0.0 2.0844355685058127 + -1.7313903927438412 -1.7313903927438412 -1.7313903927438412 -1.7313903927438412 -0.0 -0.0 -0.0 + -1.480745232754872 -1.480745232754872 -1.480745232754872 -1.480745232754872 -0.0 -0.0 -0.0 + 0.21539031393949493 0.21539031393949493 0.21539031393949493 0.0 0.21539031393949493 0.0 0.21539031393949493 + 1.6360787089859707 1.6360787089859707 1.6360787089859707 0.0 1.6360787089859707 0.0 1.6360787089859707 + 2.7952193074887086 2.7952193074887086 2.7952193074887086 0.0 2.7952193074887086 0.0 2.7952193074887086 + -1.448364418208364 -1.448364418208364 -1.448364418208364 -0.0 -1.448364418208364 -0.0 -0.0 + -0.9833503482488964 -0.9833503482488964 -0.9833503482488964 -0.0 -0.9833503482488964 -0.0 -0.0 + -1.5017276161539084 -1.5017276161539084 -1.5017276161539084 -0.0 -1.5017276161539084 -0.0 -0.0 + -1.7640356839137032 -1.7640356839137032 -1.7640356839137032 -0.0 -1.7640356839137032 -0.0 -0.0 + -1.5776069676233444 -1.5776069676233444 -1.5776069676233444 -0.0 -1.5776069676233444 -0.0 -0.0 + 0.06361165131312438 0.06361165131312438 0.06361165131312438 0.0 0.06361165131312438 0.0 0.0 + 2.8475608847598153 2.8475608847598153 2.8475608847598153 0.0 2.8475608847598153 0.0 0.0 + -0.8892460264142052 -0.8892460264142052 -0.8892460264142052 -0.0 -0.0 -0.8892460264142052 -0.0 + 1.7743695974457907 1.7743695974457907 1.7743695974457907 0.0 0.0 1.7743695974457907 0.0 + -1.4305200814192562 -1.4305200814192562 -1.4305200814192562 -0.0 -0.0 -1.4305200814192562 -0.0 + -0.9478929479399423 -0.9478929479399423 -0.9478929479399423 -0.0 -0.0 -0.9478929479399423 -0.0 + 1.2024302930353608 1.2024302930353608 1.2024302930353608 0.0 0.0 1.2024302930353608 0.0 + 4.02280289664674 4.02280289664674 4.02280289664674 0.0 0.0 4.02280289664674 0.0 + 10.440933185941839 10.440933185941839 10.440933185941839 0.0 0.0 10.440933185941839 0.0 + 1.262517093518885 1.262517093518885 0.0 0.0 0.0 0.0 1.262517093518885 + -0.9176184029771589 -0.9176184029771589 -0.0 -0.0 -0.0 -0.0 -0.0 + -0.6982138187318754 -0.6982138187318754 -0.0 -0.0 -0.0 -0.0 -0.0 + 1.7133696015602422 1.7133696015602422 0.0 0.0 0.0 0.0 0.0 + 5.976953806399672 5.976953806399672 0.0 0.0 0.0 0.0 0.0 + -1.6123792319065735 -1.6123792319065735 -0.0 -1.6123792319065735 -0.0 -0.0 -1.6123792319065735 + -1.1866621929181271 -1.1866621929181271 -0.0 -1.1866621929181271 -0.0 -0.0 -1.1866621929181271 + -1.1194589330024307 -1.1194589330024307 -0.0 -1.1194589330024307 -0.0 -0.0 -1.1194589330024307 + -1.6605118926433484 -1.6605118926433484 -0.0 -1.6605118926433484 -0.0 -0.0 -1.6605118926433484 + -1.6123792319065735 -1.6123792319065735 -0.0 -1.6123792319065735 -0.0 -0.0 -1.6123792319065735 + -1.6123792319065735 -1.6123792319065735 -0.0 -1.6123792319065735 -0.0 -0.0 -1.6123792319065735 + -1.6123792319065735 -1.6123792319065735 -0.0 -1.6123792319065735 -0.0 -0.0 -1.6123792319065735 + -1.6123792319065735 -1.6123792319065735 -0.0 -1.6123792319065735 -0.0 -0.0 -1.6123792319065735 + -1.1866621929181271 -1.1866621929181271 -0.0 -1.1866621929181271 -0.0 -0.0 -1.1866621929181271 + -0.016084003453250676 -0.016084003453250676 -0.0 -0.016084003453250676 -0.0 -0.0 -0.016084003453250676 + 1.4107278812149031 1.4107278812149031 0.0 1.4107278812149031 0.0 0.0 1.4107278812149031 + -1.1128985115655265 -1.1128985115655265 -0.0 -1.1128985115655265 -0.0 -0.0 -0.0 + 3.7957001151581404 3.7957001151581404 0.0 3.7957001151581404 0.0 0.0 0.0 + -0.7046958095802869 -0.7046958095802869 -0.0 -0.7046958095802869 -0.0 -0.0 -0.0 + -0.7046958095802869 -0.7046958095802869 -0.0 -0.7046958095802869 -0.0 -0.0 -0.0 + -0.2475403067755282 -0.2475403067755282 -0.0 -0.2475403067755282 -0.0 -0.0 -0.0 + 14.054845699928913 14.054845699928913 0.0 14.054845699928913 0.0 0.0 0.0 + -0.8850373634971601 -0.8850373634971601 -0.0 -0.0 -0.8850373634971601 -0.0 -0.8850373634971601 + -1.7594068536637126 -1.7594068536637126 -0.0 -0.0 -1.7594068536637126 -0.0 -1.7594068536637126 + -0.9681259531090506 -0.9681259531090506 -0.0 -0.0 -0.9681259531090506 -0.0 -0.9681259531090506 + -1.5970364987524888 -1.5970364987524888 -0.0 -0.0 -1.5970364987524888 -0.0 -1.5970364987524888 + -1.5970364987524888 -1.5970364987524888 -0.0 -0.0 -1.5970364987524888 -0.0 -1.5970364987524888 + -1.7082890535667876 -1.7082890535667876 -0.0 -0.0 -1.7082890535667876 -0.0 -1.7082890535667876 + -1.6168827210404924 -1.6168827210404924 -0.0 -0.0 -1.6168827210404924 -0.0 -1.6168827210404924 + -1.4399676449006795 -1.4399676449006795 -0.0 -0.0 -1.4399676449006795 -0.0 -1.4399676449006795 + -1.2202487676722908 -1.2202487676722908 -0.0 -0.0 -1.2202487676722908 -0.0 -1.2202487676722908 + -1.5079358693315765 -1.5079358693315765 -0.0 -0.0 -1.5079358693315765 -0.0 -0.0 + -1.3842064467607202 -1.3842064467607202 -0.0 -0.0 -0.0 -1.3842064467607202 -0.0 + -1.5208922216041325 -1.5208922216041325 -0.0 -0.0 -0.0 -1.5208922216041325 -0.0 + 0.3453894161447818 0.3453894161447818 0.0 0.0 0.0 0.3453894161447818 0.0 + -1.717557999730545 -1.717557999730545 -0.0 -0.0 -0.0 -1.717557999730545 -0.0 + -1.717557999730545 -1.717557999730545 -0.0 -0.0 -0.0 -1.717557999730545 -0.0 + -1.76269912849327 -1.76269912849327 -0.0 -0.0 -0.0 -1.76269912849327 -0.0 + -0.7863622513628796 -0.7863622513628796 -0.0 -0.0 -0.0 -0.7863622513628796 -0.0 + -0.32795262618891574 -0.32795262618891574 -0.0 -0.0 -0.0 -0.32795262618891574 -0.0 + 0.3453894161447818 0.3453894161447818 0.0 0.0 0.0 0.3453894161447818 0.0 + 3.2758115948278728 3.2758115948278728 0.0 0.0 0.0 3.2758115948278728 0.0] rtol = 1e-04 +end diff --git a/test/probability_weights.jl b/test/probability_weights.jl new file mode 100644 index 00000000..ed21f416 --- /dev/null +++ b/test/probability_weights.jl @@ -0,0 +1,285 @@ +rng = StableRNG(123) +x1 = rand(rng, 50) +x2 = ifelse.(randn(rng, 50) .> 0, 1, 0) +y = ifelse.(0.004 .- 0.01 .* x1 .+ 1.5 .* x2 .+ randn(rng, 50) .> 0, 1, 0) +w = rand(rng, 50) * 6 +w = floor.(w) .+ 1 +df = DataFrame(y = y, x1 = x1, x2 = x2, w = w) +df.pweights = size(df, 1) .* (df.w ./ sum(df.w)) + +clotting = DataFrame( + u = log.([5, 10, 15, 20, 30, 40, 60, 80, 100]), + lot1 = [118, 58, 42, 35, 27, 25, 21, 19, 18], + w = [1.5, 2.0, 1.1, 4.5, 2.4, 3.5, 5.6, 5.4, 6.7], +) + +clotting.pweights = (clotting.w ./ sum(clotting.w)) + +quine = RDatasets.dataset("MASS", "quine") +quine.aweights = log.(3 .+ 3 .* quine.Days) +quine.pweights = size(quine, 1) .* (quine.aweights ./ sum(quine.aweights)) + +dobson = DataFrame( + Counts = [18.0, 17, 15, 20, 10, 20, 25, 13, 12], + Outcome = categorical(repeat(string.('A':'C'), outer = 3)), + Treatment = categorical(repeat(string.('a':'c'), inner = 3)), + w = [1, 2, 1, 2, 3, 4, 3, 2, 1], +) + +dobson.pweights = size(dobson, 1) .* (dobson.w ./ sum(dobson.w)) + +itr = Iterators.product((:qr, :cholesky), (true, false)) + + +@testset "Linear Model ftest/loglikelihod with $dmethod method with dropcollinear=$drop" for (dmethod, drop) ∈ itr + model_1 = lm(@formula(y ~ x1 + x2), df; wts=pweights(df.pweights), method = dmethod) + X = hcat(ones(length(df.y)), df.x1, df.x2) + model_2 = lm(X, y; wts=pweights(df.pweights)) + @test_throws ArgumentError ftest(model_1) + @test_throws ArgumentError ftest(model_2) + @test_throws ArgumentError loglikelihood(model_1) + @test_throws ArgumentError loglikelihood(model_2) +end + +@testset "GLM: Binomial with LogitLink link - ProbabilityWeights with $dmethod method with dropcollinear=$drop" for (dmethod, drop) ∈ itr + model = glm( + @formula(y ~ 1 + x1 + x2), + df, + Binomial(), + LogitLink(), + wts = pweights(df.pweights), + method = dmethod, + dropcollinear = drop, + rtol = 1e-07, + ) + @test_throws ArgumentError loglikelihood(model) + @test deviance(model) ≈ 47.311214978934785 rtol = 1e-07 + @test nulldeviance(model) ≈ 60.82748267747685 rtol = 1e-07 + @test coef(model) ≈ [-0.5241460813701, 0.14468927249342, 2.487500063309] rtol = 1e-06 + @test dof_residual(model) == 47.0 + @test stderror(model) ≈ [1.07077535201799, 1.4966446912323, 0.7679252464101] rtol = 1e-05 +end + +@testset "GLM: Binomial with ProbitLink link - ProbabilityWeights with $dmethod method with dropcollinear=$drop" for (dmethod, drop) ∈ itr + model = glm( + @formula(y ~ 1 + x1 + x2), + df, + Binomial(), + ProbitLink(), + wts = pweights(df.pweights), + method = dmethod, + dropcollinear = drop, + rtol = 1e-09, + ) + @test_throws ArgumentError loglikelihood(model) + @test deviance(model) ≈ 47.280413566179 rtol = 1e-07 + @test nulldeviance(model) ≈ 60.82748267747685 rtol = 1e-07 + @test coef(model) ≈ [-0.379823362118, 0.17460125170132, 1.4927538978259] rtol = 1e-07 + @test dof_residual(model) == 47.0 + @test stderror(model) ≈ [0.6250657160317, 0.851366312489, 0.4423686640689] rtol = 1e-05 +end + +@testset "GLM: Binomial with CauchitLink link - ProbabilityWeights with $dmethod method with dropcollinear=$drop" for (dmethod, drop) ∈ itr + model = glm( + @formula(y ~ 1 + x1 + x2), + df, + Binomial(), + CauchitLink(), + wts = pweights(df.pweights), + method = dmethod, + dropcollinear = drop, + rtol = 1e-07, + ) + @test_throws ArgumentError loglikelihood(model) + @test deviance(model) ≈ 47.17915872474391 rtol = 1e-07 + @test nulldeviance(model) ≈ 60.82748267747685 rtol = 1e-07 + @test coef(model) ≈ [-0.007674579802284, -0.5378132620063, 2.994759904353] rtol = 1e-06 + @test dof_residual(model) == 47.0 + @test stderror(model) ≈ [1.020489214335, 1.5748610330014, 1.5057621596148] rtol = 1e-03 +end + +@testset "GLM: Binomial with CloglogLink link - ProbabilityWeights with $dmethod method with dropcollinear=$drop" for (dmethod, drop) ∈ itr + model = glm( + @formula(y ~ 1 + x1 + x2), + df, + Binomial(), + CloglogLink(), + wts = pweights(df.pweights), + method = dmethod, + dropcollinear = drop, + rtol = 1e-09, + ) + @test_throws ArgumentError loglikelihood(model) + @test deviance(model) ≈ 47.063354817529856 rtol = 1e-07 + @test nulldeviance(model) ≈ 60.82748267747685 rtol = 1e-07 + @test coef(model) ≈ [-0.9897210433718, 0.449902058467, 1.5467108410611] rtol = 1e-07 + ## Test broken because of https://github.com/JuliaStats/GLM.jl/issues/509 + @test dof_residual(model) == 47.0 + @test stderror(model) ≈ [0.647026270959, 0.74668663622095, 0.49056337945919] rtol = 1e-04 +end + +@testset "GLM: Gamma with LogLink link - ProbabilityWeights with $dmethod method with dropcollinear=$drop" for (dmethod, drop) ∈ itr + model = glm( + @formula(lot1 ~ 1 + u), + clotting, + Gamma(), + LogLink(), + wts = pweights(clotting.pweights), + method = dmethod, + dropcollinear = drop, + rtol = 1e-12, + atol = 1e-9, + ) + @test_throws ArgumentError loglikelihood(model) + @test deviance(model) ≈ 0.012601328117859285 rtol = 1e-07 + @test nulldeviance(model) ≈ 0.28335799805430917 rtol = 1e-07 + @test coef(model) ≈ [5.325098274654255, -0.5495659110653159] rtol = 1e-5 + ## Test broken because of https://github.com/JuliaStats/GLM.jl/issues/509 + @test dof_residual(model) == 7.0 + @test stderror(model) ≈ [0.2651749940925478, 0.06706321966020713] rtol = 1e-07 +end + +@testset "GLM: NegativeBinomial(2) with LogLink link - ProbabilityWeights with $dmethod method with dropcollinear=$drop" for (dmethod, drop) ∈ itr + model = glm( + @formula(Days ~ Eth + Sex + Age + Lrn), + quine, + NegativeBinomial(2), + LogLink(), + wts = pweights(quine.pweights), + method = dmethod, + dropcollinear = drop, + atol = 1e-09, + ) + @test_throws ArgumentError loglikelihood(model) + @test deviance(model) ≈ 178.46174895746665 rtol = 1e-07 + @test nulldeviance(model) ≈ 214.52243528092782 rtol = 1e-07 + @test coef(model) ≈ [ + 3.0241191551553044, + -0.46415766516885565, + 0.07185609429925505, + -0.47848540911607695, + 0.09677889908013788, + 0.3562972562034377, + 0.34801618219815034, + ] rtol = 1e-04 + @test dof_residual(model) == 139.0 + @test_broken stderror(model) ≈ [ + 0.20080246284436692, + 0.14068933863735536, + 0.1440710375321996, + 0.2533527583247213, + 0.2401168459633955, + 0.23210823521812646, + 0.19039099362430775, + ] rtol = 1e-05 + @test stderror(model) ≈ [ + 0.20080246284436692, + 0.14068933863735536, + 0.1440710375321996, + 0.2533527583247213, + 0.2401168459633955, + 0.23210823521812646, + 0.19039099362430775, + ] rtol = 1e-04 +end + +@testset "GLM: with LogLink link - ProbabilityWeights with $dmethod method with dropcollinear=$drop" for (dmethod, drop) ∈ itr + model = glm( + @formula(Days ~ Eth + Sex + Age + Lrn), + quine, + NegativeBinomial(2), + LogLink(), + wts = pweights(quine.pweights), + method = dmethod, + dropcollinear = drop, + rtol = 1e-09, + ) + @test_throws ArgumentError loglikelihood(model) + @test deviance(model) ≈ 178.46174895746665 rtol = 1e-07 + @test nulldeviance(model) ≈ 214.52243528092782 rtol = 1e-07 + @test coef(model) ≈ [ + 3.0241191551553044, + -0.46415766516885565, + 0.07185609429925505, + -0.47848540911607695, + 0.09677889908013788, + 0.3562972562034377, + 0.34801618219815034, + ] rtol = 1e-04 + @test dof_residual(model) == 139.0 + @test stderror(model) ≈ [ + 0.20080246284436692, + 0.14068933863735536, + 0.1440710375321996, + 0.2533527583247213, + 0.2401168459633955, + 0.23210823521812646, + 0.19039099362430775, + ] rtol = 1e-04 +end + +@testset "GLM: NegaiveBinomial(2) with SqrtLink link - ProbabilityWeights with $dmethod method with dropcollinear=$drop" for (dmethod, drop) ∈ itr + model = glm( + @formula(Days ~ Eth + Sex + Age + Lrn), + quine, + NegativeBinomial(2), + SqrtLink(), + wts = pweights(quine.pweights), + method = dmethod, + dropcollinear = drop, + rtol = 1e-08, + ) + @test_throws ArgumentError loglikelihood(model) + @test deviance(model) ≈ 178.99970038364276 rtol = 1e-07 + @test nulldeviance(model) ≈ 214.52243528092782 rtol = 1e-07 + @test coef(model) ≈ [ + 4.733877229152367, + -1.0079778954713488, + 0.025223928185488836, + -0.985974316804644, + 0.2132095063819702, + 0.7456070470961171, + 0.5840284357554048, + ] rtol = 1e-07 + + @test dof_residual(model) == 139.0 + @test stderror(model) ≈ [ + 0.4156607040373307, + 0.30174203746555045, + 0.30609799754882105, + 0.526030598769091, + 0.5384102946567921, + 0.5328456049279787, + 0.4065359817407846, + ] rtol = 1e-04 +end + +@testset "GLM: Poisson with LogLink link - ProbabilityWeights with $dmethod method with dropcollinear=$drop" for (dmethod, drop) ∈ itr + model = glm( + @formula(Counts ~ 1 + Outcome + Treatment), + dobson, + Poisson(), + LogLink(), + wts = pweights(dobson.pweights), + method = dmethod, + dropcollinear = drop, + ) + @test_throws ArgumentError loglikelihood(model) + @test deviance(model) ≈ 4.837327189925912 rtol = 1e-07 + @test nulldeviance(model) ≈ 12.722836814903907 rtol = 1e-07 + @test coef(model) ≈ [ + 3.1097109912423444, + -0.5376892683400354, + -0.19731134600684794, + -0.05011966661241072, + 0.010415729161988225, + ] rtol = 1e-07 + @test dof_residual(model) == 4.0 + @test stderror(model) ≈ [ + 0.15474638805584298, + 0.13467582259453692, + 0.1482320418486368, + 0.17141304156534284, + 0.17488650070332398, + ] rtol = 1e-06 +end diff --git a/test/runtests.jl b/test/runtests.jl index fb0115f5..0cb468b3 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -58,7 +58,7 @@ linreg(x::AbstractVecOrMat, y::AbstractVector) = qr!(simplemm(x)) \ y @test lm1.model === lm1 @test lm1.mf.f == formula(lm1) - @testset "low level constructors" begin +@testset "low level constructors" begin X = [ones(10) randn(10)] y = X*ones(2) + randn(10)*0.1 r = GLM.LmResp(y) @@ -87,6 +87,7 @@ end # linear regression t_lm_base = lm(@formula(Y ~ XA), st_df; method=dmethod) @test isapprox(st_df.CooksD_base, cooksdistance(t_lm_base)) + @test GLM.momentmatrix(t_lm_base) == modelmatrix(t_lm_base).*residuals(t_lm_base) # linear regression, no intercept t_lm_noint = lm(@formula(Y ~ XA +0), st_df; method=dmethod) @@ -98,9 +99,10 @@ end # linear regression, two full collinear variables (XC = 2 XA) hence should get the same results as the original # after pivoting - t_lm_colli = lm(@formula(Y ~ XA + XC), st_df; dropcollinear=true, method=dmethod) - # Currently fails as the collinear variable is not dropped from `modelmatrix(obj)` - @test_throws ArgumentError isapprox(st_df.CooksD_base, cooksdistance(t_lm_colli)) + t_lm_colli = lm(@formula(Y ~ XA + XC), st_df, dropcollinear=true) + t_lm_colli_b = lm(@formula(Y ~ XC), st_df, dropcollinear=true) + @test isapprox(cooksdistance(t_lm_colli), cooksdistance(t_lm_colli_b)) + end @testset "Linear model with weights and $dmethod" for dmethod in (:cholesky, :qr) @@ -209,9 +211,8 @@ end @test isapprox(coef(m2p_dep_pos_kw), coef(m2p)) end -@testset "Saturated linear model with $dmethod" for dmethod in (:cholesky, :qr) +@testset "saturated linear model" for dmethod in (:cholesky, :qr) df1 = DataFrame(x=["a", "b", "c"], y=[1, 2, 3]) - model = lm(@formula(y ~ x), df1; method=dmethod) ct = coeftable(model) @test dof_residual(model) == 0 @@ -524,7 +525,7 @@ end @testset "Poisson LogLink offset with weights with $dmethod" for dmethod in (:cholesky, :qr) gm7pw = fit(GeneralizedLinearModel, @formula(round(Postwt) ~ 1 + Prewt + Treat), anorexia, Poisson(), LogLink(), method=dmethod, offset=log.(anorexia.Prewt), - wts=repeat(1:4, outer=18), rtol=1e-8) + wts=fweights(repeat(1:4, outer=18)), rtol=1e-8) @test GLM.cancancel(gm7pw.rr) test_show(gm7pw) @@ -620,10 +621,10 @@ admit_agr = DataFrame(count = [28., 97, 93, 55, 33, 54, 28, 12], admit = repeat([false, true], inner=[4]), rank = categorical(repeat(1:4, outer=2))) -@testset "Aggregated Binomial LogitLink with $dmethod" for dmethod in (:cholesky, :qr) +@testset "Aggregated Binomial LogitLink" begin for distr in (Binomial, Bernoulli) - gm14 = fit(GeneralizedLinearModel, @formula(admit ~ 1 + rank), admit_agr, distr(); - method=dmethod, wts=Array(admit_agr.count)) + gm14 = fit(GeneralizedLinearModel, @formula(admit ~ 1 + rank), admit_agr, distr(), + wts=fweights(Array(admit_agr.count))) @test dof(gm14) == 4 @test nobs(gm14) == 400 @test isapprox(deviance(gm14), 474.9667184280627) @@ -636,8 +637,25 @@ admit_agr = DataFrame(count = [28., 97, 93, 55, 33, 54, 28, 12], @test isapprox(coef(gm14), [0.164303051291, -0.7500299832, -1.36469792994, -1.68672866457], atol=1e-5) end + end +@testset "Aggregated Binomial LogitLink (AnalyticWeights)" begin + for distr in (Binomial, Bernoulli) + gm14 = fit(GeneralizedLinearModel, @formula(admit ~ 1 + rank), admit_agr, distr(), + wts=aweights(admit_agr.count)) + @test dof(gm14) == 4 + @test nobs(gm14) == 8 + @test isapprox(deviance(gm14), 474.9667184280627) + @test isapprox(loglikelihood(gm14), -237.48335921403134) + @test isapprox(aic(gm14), 482.96671842822883) + @test isapprox(aicc(gm14), 496.3000517613874) + @test isapprox(bic(gm14), 483.28448459477346) + @test isapprox(coef(gm14), + [0.164303051291, -0.7500299832, -1.36469792994, -1.68672866457], atol=1e-5) + end + +end # Logistic regression using aggregated data with proportions of successes and weights admit_agr2 = DataFrame(Any[[61., 151, 121, 67], [33., 54, 28, 12], categorical(1:4)], [:count, :admit, :rank]) @@ -646,7 +664,7 @@ admit_agr2.p = admit_agr2.admit ./ admit_agr2.count ## The model matrix here is singular so tests like the deviance are just round off error @testset "Binomial LogitLink aggregated with $dmethod" for dmethod in (:cholesky, :qr) gm15 = fit(GeneralizedLinearModel, @formula(p ~ rank), admit_agr2, Binomial(), - method=dmethod, wts=admit_agr2.count) + wts=fweights(admit_agr2.count)) test_show(gm15) @test dof(gm15) == 4 @test nobs(gm15) == 400 @@ -663,7 +681,7 @@ end # Weighted Gamma example (weights are totally made up) @testset "Gamma InverseLink Weights with $dmethod" for dmethod in (:cholesky, :qr) gm16 = fit(GeneralizedLinearModel, @formula(lot1 ~ 1 + u), clotting, Gamma(), - method=dmethod, wts=[1.5,2.0,1.1,4.5,2.4,3.5,5.6,5.4,6.7]) + wts=fweights([1.5,2.0,1.1,4.5,2.4,3.5,5.6,5.4,6.7])) test_show(gm16) @test dof(gm16) == 3 @test nobs(gm16) == 32.7 @@ -678,9 +696,9 @@ end end # Weighted Poisson example (weights are totally made up) -@testset "Poisson LogLink Weights with $dmethod" for dmethod in (:cholesky, :qr) - gm17 = glm(@formula(Counts ~ Outcome + Treatment), dobson, Poisson(), - method=dmethod, wts = [1.5,2.0,1.1,4.5,2.4,3.5,5.6,5.4,6.7]) +@testset "Poisson LogLink Weights" begin + gm17 = fit(GeneralizedLinearModel, @formula(Counts ~ Outcome + Treatment), dobson, Poisson(), + wts = fweights([1.5,2.0,1.1,4.5,2.4,3.5,5.6,5.4,6.7])) test_show(gm17) @test dof(gm17) == 5 @test isapprox(deviance(gm17), 17.699857821414266) @@ -771,7 +789,7 @@ end @testset "Weighted NegativeBinomial LogLink, θ to be estimated with Cholesky" begin halfn = round(Int, 0.5*size(quine, 1)) wts = vcat(fill(0.8, halfn), fill(1.2, size(quine, 1) - halfn)) - gm20a = negbin(@formula(Days ~ Eth+Sex+Age+Lrn), quine, LogLink(); wts=wts) + gm20a = negbin(@formula(Days ~ Eth+Sex+Age+Lrn), quine, LogLink(); wts=fweights(wts)) test_show(gm20a) @test dof(gm20a) == 8 @test isapprox(deviance(gm20a), 164.45910399188858, rtol = 1e-7) @@ -893,7 +911,7 @@ end # Poisson with categorical predictors, weights and offset nointglm3 = fit(GeneralizedLinearModel, @formula(round(Postwt) ~ 0 + Prewt + Treat), anorexia, Poisson(), LogLink(); offset=log.(anorexia.Prewt), - wts=repeat(1:4, outer=18), rtol=1e-8, dropcollinear=false) + wts=fweights(repeat(1:4, outer=18)), rtol=1e-8, dropcollinear=false) @test !hasintercept(nointglm3) @test GLM.cancancel(nointglm3.rr) test_show(nointglm3) @@ -948,7 +966,7 @@ end # Poisson with categorical predictors, weights and offset nointglm3 = fit(GeneralizedLinearModel, @formula(round(Postwt) ~ 0 + Prewt + Treat), anorexia, Poisson(), LogLink(); method=dmethod, offset=log.(anorexia.Prewt), - wts=repeat(1:4, outer=18), rtol=1e-8, dropcollinear=false) + wts=fweights(repeat(1:4, outer=18)), rtol=1e-8, dropcollinear=false) @test !hasintercept(nointglm3) @test GLM.cancancel(nointglm3.rr) test_show(nointglm3) @@ -995,7 +1013,7 @@ end end end -@testset "Predict with $dmethod" for dmethod in (:cholesky, :qr) +@testset "Predict" for dmethod in (:cholesky, :qr) # Binomial GLM rng = StableRNG(123) X = rand(rng, 10, 2) @@ -1592,14 +1610,14 @@ end lm4 = lm(view(x, :, :), view(y, :); method=dmethod) @test coef(lm1) == coef(lm2) == coef(lm3) == coef(lm4) - lm5 = lm(x, y, wts=w, method=dmethod) - lm6 = lm(x, view(y, :), method=dmethod, wts=w) - lm7 = lm(view(x, :, :), y, method=dmethod, wts=w) - lm8 = lm(view(x, :, :), view(y, :), method=dmethod, wts=w) - lm9 = lm(x, y, method=dmethod, wts=view(w, :)) - lm10 = lm(x, view(y, :), method=dmethod, wts=view(w, :)) - lm11 = lm(view(x, :, :), y, method=dmethod, wts=view(w, :)) - lm12 = lm(view(x, :, :), view(y, :), method=dmethod, wts=view(w, :)) + lm5 = lm(x, y, wts=fweights(w), method=dmethod) + lm6 = lm(x, view(y, :), method=dmethod, wts=fweights(w)) + lm7 = lm(view(x, :, :), y, method=dmethod, wts=fweights(w)) + lm8 = lm(view(x, :, :), view(y, :), method=dmethod, wts=fweights(w)) + lm9 = lm(x, y, method=dmethod, wts=fweights(view(w, :))) + lm10 = lm(x, view(y, :), method=dmethod, wts=fweights(view(w, :))) + lm11 = lm(view(x, :, :), y, method=dmethod, wts=fweights(view(w, :))) + lm12 = lm(view(x, :, :), view(y, :), method=dmethod, wts=fweights(view(w, :))) @test coef(lm5) == coef(lm6) == coef(lm7) == coef(lm8) == coef(lm9) == coef(lm10) == coef(lm11) == coef(lm12) @@ -1610,14 +1628,14 @@ end glm4 = glm(view(x, :, :), view(y, :), Binomial(), method=dmethod) @test coef(glm1) == coef(glm2) == coef(glm3) == coef(glm4) - glm5 = glm(x, y, Binomial(), method=dmethod, wts=w) - glm6 = glm(x, view(y, :), Binomial(), method=dmethod, wts=w) - glm7 = glm(view(x, :, :), y, Binomial(), method=dmethod, wts=w) - glm8 = glm(view(x, :, :), view(y, :), Binomial(), method=dmethod, wts=w) - glm9 = glm(x, y, Binomial(), method=dmethod, wts=view(w, :)) - glm10 = glm(x, view(y, :), Binomial(), method=dmethod, wts=view(w, :)) - glm11 = glm(view(x, :, :), y, Binomial(), method=dmethod, wts=view(w, :)) - glm12 = glm(view(x, :, :), view(y, :), Binomial(), method=dmethod, wts=view(w, :)) + glm5 = glm(x, y, Binomial(), wts=fweights(w)) + glm6 = glm(x, view(y, :), Binomial(), wts=fweights(w)) + glm7 = glm(view(x, :, :), y, Binomial(), wts=fweights(w)) + glm8 = glm(view(x, :, :), view(y, :), Binomial(), wts=fweights(w)) + glm9 = glm(x, y, Binomial(), wts=fweights(view(w, :))) + glm10 = glm(x, view(y, :), Binomial(), wts=fweights(view(w, :))) + glm11 = glm(view(x, :, :), y, Binomial(), wts=fweights(view(w, :))) + glm12 = glm(view(x, :, :), view(y, :), Binomial(), wts=fweights(view(w, :))) @test coef(glm5) == coef(glm6) == coef(glm7) == coef(glm8) == coef(glm9) == coef(glm10) == coef(glm11) == coef(glm12) end @@ -1731,6 +1749,105 @@ end end end +@testset "momentmatrix" begin + @testset "Poisson" begin + dobson = DataFrame( + Counts = [18.,17,15,20,10,20,25,13,12], + Outcome = categorical(repeat(string.('A':'C'), outer = 3)), + Treatment = categorical(repeat(string.('a':'c'), inner = 3)), + Weights = [0.3, 0.2, .9, .8, .2, .3, .4, .8, .9] + ) + + f = @formula(Counts ~ 1 + Outcome + Treatment) + + gm_pois = fit(GeneralizedLinearModel, f, dobson, Poisson()) + + mm0_pois = [-2.9999999792805436 -0.0 -0.0 -0.0 -0.0; + 3.666666776430482 3.666666776430482 0.0 0.0 0.0; + -0.6666666790442577 -0.0 -0.6666666790442577 -0.0 -0.0; + -1.0000000123284563 -0.0 -0.0 -1.0000000123284563 -0.0; + -3.3333334972350723 -3.3333334972350723 -0.0 -3.3333334972350723 -0.0; + 4.333333497138949 0.0 4.333333497138949 4.333333497138949 0.0; + 4.000000005907649 0.0 0.0 0.0 4.000000005907649; + -0.33333334610634496 -0.33333334610634496 -0.0 -0.0 -0.33333334610634496; + -3.6666667654825043 -0.0 -3.6666667654825043 -0.0 -3.6666667654825043] + + gm_poisw = fit(GeneralizedLinearModel, f, dobson, Poisson(), wts = fweights(dobson.Weights)) + + mm0_poisw = [-0.9624647521850039 -0.0 -0.0 -0.0 -0.0; + 0.6901050904949885 0.6901050904949885 0.0 0.0 0.0; + 0.2723596655008255 0.0 0.2723596655008255 0.0 0.0; + -0.9062167634177802 -0.0 -0.0 -0.9062167634177802 -0.0; + -0.7002548908882033 -0.7002548908882033 -0.0 -0.7002548908882033 -0.0; + 1.606471661159352 0.0 1.606471661159352 1.606471661159352 0.0; + 1.8686815106332157 0.0 0.0 0.0 1.8686815106332157; + 0.010149793505874801 0.010149793505874801 0.0 0.0 0.010149793505874801; + -1.8788313148033928 -0.0 -1.8788313148033928 -0.0 -1.8788313148033928] + @test mm0_pois ≈ GLM.momentmatrix(gm_pois) atol=1e-06 + @test mm0_poisw ≈ GLM.momentmatrix(gm_poisw) atol=1e-06 + end + @testset "Binomial" begin + f = @formula(admit ~ 1 + rank) + gm_bin = fit(GeneralizedLinearModel, f, admit_agr, Binomial(); rtol=1e-8) + gm_binw = fit(GeneralizedLinearModel, f, admit_agr, Binomial(), + wts=fweights(admit_agr.count); rtol=1e-08) + + mm0_bin = [-0.5 -0.0 -0.0 -0.0 + -0.5 -0.5 -0.0 -0.0 + -0.5 -0.0 -0.5 -0.0 + -0.5 -0.0 -0.0 -0.5 + 0.5 0.0 0.0 0.0 + 0.5 0.5 0.0 0.0 + 0.5 0.0 0.5 0.0 + 0.5 0.0 0.0 0.5] + + mm0_binw = [-15.1475 -0.0 -0.0 -0.0 + -34.6887 -34.6887 -0.0 -0.0 + -21.5207 -0.0 -21.5207 -0.0 + -9.85075 -0.0 -0.0 -9.85075 + 15.1475 0.0 0.0 0.0 + 34.6887 34.6887 0.0 0.0 + 21.5207 0.0 21.5207 0.0 + 9.85075 0.0 0.0 9.85075] + + @test mm0_bin ≈ GLM.momentmatrix(gm_bin) + @test mm0_binw ≈ GLM.momentmatrix(gm_binw) atol=1e-03 + + end + + @testset "Binomial ProbitLink" begin + f = @formula(admit ~ 1 + rank) + gm_bin = fit(GeneralizedLinearModel, f, admit_agr, Binomial(), ProbitLink()) + gm_binw = fit(GeneralizedLinearModel, f, admit_agr, Binomial(), ProbitLink(), + wts=fweights(admit_agr.count), rtol=1e-8) + + mm0_bin = [-0.7978846 0.0000000 0.0000000 0.0000000 + -0.7978846 -0.7978846 0.0000000 0.0000000 + -0.7978846 0.0000000 -0.7978846 0.0000000 + -0.7978846 0.0000000 0.0000000 -0.7978846 + 0.7978846 0.0000000 0.0000000 0.0000000 + 0.7978846 0.7978846 0.0000000 0.0000000 + 0.7978846 0.0000000 0.7978846 0.0000000 + 0.7978846 0.0000000 0.0000000 0.7978846] + + mm0_binw = [ -24.20695 0.00000 0.00000 0.00000 + -56.36158 -56.36158 0.00000 0.00000 + -36.86681 0.00000 -36.86681 0.00000 + -17.52584 0.00000 0.00000 -17.52584 + 24.20695 0.00000 0.00000 0.00000 + 56.36158 56.36158 0.00000 0.00000 + 36.86681 0.00000 36.86681 0.00000 + 17.52584 0.00000 0.00000 17.52584] + + @test mm0_bin ≈ GLM.momentmatrix(gm_bin) rtol=1e-06 + @test mm0_binw ≈ GLM.momentmatrix(gm_binw) rtol=1e-05 + end + +end + +include("analytic_weights.jl") +include("probability_weights.jl") + @testset "contrasts argument" begin # DummyCoding (default) m = lm(@formula(y~x), (y=1:25, x=repeat(1:5, 5)), @@ -2044,3 +2161,87 @@ end @test coef(ft) ≈ [9.648767705301294, -0.11274823562143056, 0.1907889126252095, -0.8123086879222496] @test_throws DomainError glm(@formula(Column1 ~ Column2 + Column3 + Column4), df, Gamma(), LogLink(), start = fill(NaN, 4)) end + +@testset "Testing various weighting specific weighting" begin + # Test probability weighting with collinear columns + form.CarbC = form.Carb + form.awts = [0.6, 0.3, 0.6, 0.3, 0.6, 0.3] + form.fwts = [6, 3, 6, 3, 6, 3] + lm0 = fit(LinearModel, @formula(OptDen ~ Carb), form; wts = aweights(form.awts), method=:qr) + lm1 = fit(LinearModel, @formula(OptDen ~ Carb + CarbC), form; wts = aweights(form.awts), method=:qr) + @test coef(lm0) == coef(lm1)[1:2] + @test stderror(lm0) == stderror(lm1)[1:2] + @test isnan(stderror(lm1)[3]) + lm0 = fit(LinearModel, @formula(OptDen ~ Carb), form; wts = pweights(form.awts), method=:qr) + lm1 = fit(LinearModel, @formula(OptDen ~ Carb + CarbC), form; wts = pweights(form.awts), method=:qr) + @test coef(lm0) == coef(lm1)[1:2] + @test stderror(lm0) ≈ stderror(lm1)[1:2] + @test isnan(stderror(lm1)[3]) + lm0 = fit(LinearModel, @formula(OptDen ~ Carb), form; wts = fweights(form.fwts), method=:qr) + lm1 = fit(LinearModel, @formula(OptDen ~ Carb + CarbC), form; wts = fweights(form.fwts), method=:qr) + @test coef(lm0) ≈ coef(lm1)[1:2] + @test stderror(lm0) ≈ stderror(lm1)[1:2] + @test isnan(stderror(lm1)[3]) + ## Leverage with weights +end + +rng = StableRNG(123) +df = DataFrame(x_1 = randn(rng, 10), x_2 = randn(rng, 10), y = randn(rng, 10), ) +df.xx_1 = df.x_1 +df.xx_2 = df.x_2 +df.d = rand(rng, 0:1, 10) +df.w = rand(rng, 10) +frm0 = @formula(y ~ x_1 + x_2) +frm1 = @formula(y ~ x_1 + xx_2 + + x_2 + xx_1) +frmp0 = @formula(d ~ x_1 + x_2) +frmp1 = @formula(d ~ x_1 + xx_2 + + x_2 + xx_1) + +lev0 = [0.2346366962678214; 0.6633984457928059; 0.32460236947851806; + 0.1543698142163501; 0.3762092067703499; 0.4887705577596249; + 0.15170408132550545; 0.15279492673405848; 0.16851296355750492; + 0.28500093809746074] + +lev0_pr = [0.28923503046426724; 0.0006968399611682526; + 0.6040870179390236; 0.222176173808432; + 0.06247295465277078; 0.8226912702704338; + 0.21412449742521156; 0.2160509316358758; + 0.23269115629631995; 0.33577412754649705] + +@testset "Leverage unweighted" for method ∈ (:qr, :cholesky) + lm0 = fit(LinearModel, frm0, df, method=method) + lm1 = fit(LinearModel, frm1, df, method=method) + @test leverage(lm0) ≈ leverage(lm1) + @test lev0 ≈ leverage(lm1) + glm1 = fit(GeneralizedLinearModel, frm1, df, Normal(), IdentityLink(), method=method) + @test lev0 ≈ leverage(glm1) + probit0 = glm(frmp0, df, Binomial(), ProbitLink(), method=method) + probit = glm(frmp1, df, Binomial(), ProbitLink(), method=method) + @test leverage(probit) ≈ leverage(probit0) + @test lev0_pr ≈ leverage(probit0) rtol = 1e-03 +end + + +lev0 = [0.4546669409864052, 0.39220506613766826, 0.31067464842874659, + 0.16105201633463462, 0.45458434896240396, 0.43751245519667181, + 0.12193399045053441, 0.12312180988271218, 0.22090225888834489, + 0.32334646473187784] + +lev0_pr = [0.28660353231987495, 2.6405172015486347e-05, 0.62180044570022475, + 0.25772930451725845, 0.058608663568656121, 0.78417628710278586, + 0.16615273053689983, 0.16787702318659792, 0.30440874803670093, + 0.35261685985898611] + +@testset "Leverage weighted" for method ∈ (:qr, :cholesky) + lm0 = fit(LinearModel, frm0, df, method=method, wts=df.w) + lm1 = fit(LinearModel, frm1, df, method=method, wts=df.w) + @test leverage(lm0) ≈ leverage(lm1) + @test lev0 ≈ leverage(lm1) + glm1 = fit(GeneralizedLinearModel, frm1, df, Normal(), IdentityLink(), method=method, wts=df.w) + @test lev0 ≈ leverage(glm1) + probit0 = glm(frmp0, df, Binomial(), ProbitLink(), method=method, wts=df.w) + probit = glm(frmp1, df, Binomial(), ProbitLink(), method=method, wts=df.w) + @test leverage(probit) ≈ leverage(probit0) + @test lev0_pr ≈ leverage(probit0) rtol = 1e-03 +end + +