diff --git a/src/GLM.jl b/src/GLM.jl
index 019f80e3..8625a48b 100644
--- a/src/GLM.jl
+++ b/src/GLM.jl
@@ -12,14 +12,14 @@ module GLM
     import Statistics: cor
     import StatsBase: coef, coeftable, confint, deviance, nulldeviance, dof, dof_residual,
                       loglikelihood, nullloglikelihood, nobs, stderror, vcov, residuals, predict,
-                      fitted, fit, model_response, response, modelmatrix, r2, r², adjr2, adjr², PValue
+                      fitted, fit, model_response, response, modelmatrix, r2, r², adjr2, adjr², PValue, weights
     import StatsFuns: xlogy
     import SpecialFunctions: erfc, erfcinv, digamma, trigamma
     import StatsModels: hasintercept
     export coef, coeftable, confint, deviance, nulldeviance, dof, dof_residual,
            loglikelihood, nullloglikelihood, nobs, stderror, vcov, residuals, predict,
            fitted, fit, fit!, model_response, response, modelmatrix, r2, r², adjr2, adjr²,
-           cooksdistance, hasintercept
+           cooksdistance, hasintercept, weights, AnalyticWeights, ProbabilityWeights, FrequencyWeights
 
     export
         # types
@@ -52,17 +52,17 @@ module GLM
         LinearModel,
 
         # functions
-        canonicallink,  # canonical link function for a distribution
-        deviance,       # deviance of fitted and observed responses
-        devresid,       # vector of squared deviance residuals
-        formula,        # extract the formula from a model
-        glm,            # general interface
-        linpred,        # linear predictor
-        lm,             # linear model
-        negbin,         # interface to fitting negative binomial regression
-        nobs,           # total number of observations
-        predict,        # make predictions
-        ftest           # compare models with an F test
+        canonicallink,     # canonical link function for a distribution
+        deviance,          # deviance of fitted and observed responses
+        devresid,          # vector of squared deviance residuals
+        formula,           # extract the formula from a model
+        glm,               # general interface
+        linpred,           # linear predictor
+        lm,                # linear model
+        negbin,            # interface to fitting negative binomial regression
+        nobs,              # total number of observations
+        predict,           # make predictions
+        ftest              # compare models with an F test
 
     const FP = AbstractFloat
     const FPVector{T<:FP} = AbstractArray{T,1}
diff --git a/src/glmfit.jl b/src/glmfit.jl
index 0c108296..94058ef8 100644
--- a/src/glmfit.jl
+++ b/src/glmfit.jl
@@ -3,7 +3,7 @@
 
 The response vector and various derived vectors in a generalized linear model.
 """
-struct GlmResp{V<:FPVector,D<:UnivariateDistribution,L<:Link} <: ModResp
+struct GlmResp{V<:FPVector, D<:UnivariateDistribution,L<:Link,W<:AbstractWeights{<:Real}} <: ModResp
     "`y`: response vector"
     y::V
     d::D
@@ -17,15 +17,15 @@ struct GlmResp{V<:FPVector,D<:UnivariateDistribution,L<:Link} <: ModResp
     mu::V
     "`offset:` offset added to `Xβ` to form `eta`.  Can be of length 0"
     offset::V
-    "`wts:` prior case weights.  Can be of length 0."
-    wts::V
+    "`wts`: prior case weights.  Can be of length 0."
+    wts::W
     "`wrkwt`: working case weights for the Iteratively Reweighted Least Squares (IRLS) algorithm"
     wrkwt::V
     "`wrkresid`: working residuals for IRLS"
     wrkresid::V
 end
 
-function GlmResp(y::V, d::D, l::L, η::V, μ::V, off::V, wts::V) where {V<:FPVector, D, L}
+function GlmResp(y::V, d::D, l::L, η::V, μ::V, off::V, wts::W) where {V<:FPVector, D, L, W}
     n  = length(y)
     nη = length(η)
     nμ = length(μ)
@@ -41,36 +41,34 @@ function GlmResp(y::V, d::D, l::L, η::V, μ::V, off::V, wts::V) where {V<:FPVec
     end
 
     # Lengths of wts and off can be either n or 0
-    if lw != 0 && lw != n
-        throw(DimensionMismatch("wts must have length $n or length 0 but was $lw"))
+    if lw != n
+        throw(DimensionMismatch("wts must have length $n but was $lw"))
     end
     if lo != 0 && lo != n
         throw(DimensionMismatch("offset must have length $n or length 0 but was $lo"))
     end
 
-    return GlmResp{V,D,L}(y, d, l, similar(y), η, μ, off, wts, similar(y), similar(y))
+    return GlmResp{V,D,L,W}(y, d, l, similar(y), η, μ, off, wts, similar(y), similar(y))
 end
 
-function GlmResp(y::FPVector, d::Distribution, l::Link, off::FPVector, wts::FPVector)
+function GlmResp(y::FPVector, d::Distribution, l::Link, off::FPVector, wts::AbstractWeights{<:Real})
     # Instead of convert(Vector{Float64}, y) to be more ForwardDiff friendly
     _y   = convert(Vector{float(eltype(y))}, y)
     _off = convert(Vector{float(eltype(off))}, off)
-    _wts = convert(Vector{float(eltype(wts))}, wts)
     η    = similar(_y)
     μ    = similar(_y)
-    r    = GlmResp(_y, d, l, η, μ, _off, _wts)
-    initialeta!(r.eta, d, l, _y, _wts, _off)
+    r    = GlmResp(_y, d, l, η, μ, _off, wts)
+    initialeta!(r.eta, d, l, _y, wts, _off)
     updateμ!(r, r.eta)
     return r
 end
 
-function GlmResp(y::AbstractVector{<:Real}, d::D, l::L, off::AbstractVector{<:Real},
-                 wts::AbstractVector{<:Real}) where {D, L}
-        GlmResp(float(y), d, l, float(off), float(wts))
+function GlmResp(y::AbstractVector{<:Real}, d::D, l::L, off::AbstractVector{<:Real}, wts::AbstractWeights{<:Real}) where {D, L}
+    GlmResp(float(y), d, l, float(off), wts)
 end
 
 deviance(r::GlmResp) = sum(r.devresid)
-
+weights(r::GlmResp) = r.wts
 """
     cancancel(r::GlmResp{V,D,L})
 
@@ -288,7 +286,7 @@ function _fit!(m::AbstractGLM, verbose::Bool, maxiter::Integer, minstepfac::Real
     lp = r.mu
 
     # Initialize β, μ, and compute deviance
-    if start == nothing || isempty(start)
+    if start === nothing || isempty(start)
         # Compute beta update based on default response value
         # if no starting values have been passed
         delbeta!(p, wrkresp(r), r.wrkwt)
@@ -440,12 +438,9 @@ const FIT_GLM_DOC = """
 
     # Keyword Arguments
     - `dofit::Bool=true`: Determines whether model will be fit
-    - `wts::Vector=similar(y,0)`: Prior frequency (a.k.a. case) weights of observations.
-      Such weights are equivalent to repeating each observation a number of times equal
-      to its weight. Do note that this interpretation gives equal point estimates but
-      different standard errors from analytical (a.k.a. inverse variance) weights and
-      from probability (a.k.a. sampling) weights which are the default in some other
-      software.
+    - `wts::AbstractWeights=aweights(similar(y,0))`: Weights of observations.
+      Allowed weights are `AnalyticalWeights`, `FrequencyWeights`, or `ProbabilityWeights`.
+      If a vector is passed (deprecated) it is coerced to FrequencyWeights.
       Can be length 0 to indicate no weighting (default).
     - `offset::Vector=similar(y,0)`: offset added to `Xβ` to form `eta`.  Can be of
       length 0
@@ -476,17 +471,26 @@ function fit(::Type{M},
     d::UnivariateDistribution,
     l::Link = canonicallink(d);
     dofit::Bool = true,
-    wts::AbstractVector{<:Real}      = similar(y, 0),
-    offset::AbstractVector{<:Real}   = similar(y, 0),
+    wts::AbstractVector{<:Real} = uweights(length(y)),
+    offset::AbstractVector{<:Real} = similar(y, 0),
     fitargs...) where {M<:AbstractGLM}
-
     # Check that X and y have the same number of observations
     if size(X, 1) != size(y, 1)
         throw(DimensionMismatch("number of rows in X and y must match"))
     end
-
-    rr = GlmResp(y, d, l, offset, wts)
-    res = M(rr, cholpred(X), false)
+    # For backward compatibility accept wts as AbstractArray and coerce them to FrequencyWeights
+    _wts = if isa(wts, AbstractWeights)
+        wts
+    elseif isa(wts, AbstractVector)
+        Base.depwarn("Passing weights as vector is deprecated in favor of explicitely using " *
+                     "AnalyticalWeights, ProbabilityWeights, or FrequencyWeights. Proceeding " *
+                     "by coercing wts to `FrequencyWeights`", :fit)
+        fweights(wts)
+    else
+        throw(ArgumentError("`wts` should be an AbstractVector coercible to AbstractWeights"))
+    end
+    rr = GlmResp(y, d, l, offset, _wts)
+    res = M(rr, cholpred(X, false, _wts), false)
     return dofit ? fit!(res; fitargs...) : res
 end
 
@@ -537,6 +541,7 @@ function dispersion(m::AbstractGLM, sqr::Bool=false)
     end
 end
 
+
 """
     predict(mm::AbstractGLM, newX::AbstractMatrix; offset::FPVector=eltype(newX)[],
             interval::Union{Symbol,Nothing}=nothing, level::Real = 0.95,
@@ -649,3 +654,18 @@ function checky(y, d::Binomial)
     end
     return nothing
 end
+
+"""
+    nobs(obj::LinearModel)
+    nobs(obj::GLM)
+
+For linear and generalized linear models, returns the number of rows, or,
+when prior weights of type FrequencyWeights are specified, the sum of weights.
+"""
+nobs(obj::LinPredModel) = nobs(obj.rr)
+
+nobs(r::LmResp{V,W}) where {V,W} = oftype(sum(one(eltype(r.wts))), length(r.y))
+nobs(r::LmResp{V,W}) where {V,W<:FrequencyWeights} = isempty(r.wts) ? oftype(sum(one(eltype(r.wts))), length(r.y)) : r.wts.sum
+
+nobs(r::GlmResp{V,D,L,W}) where {V,D,L,W<:FrequencyWeights} = isempty(r.wts) ? oftype(sum(one(eltype(r.wts))), length(r.y)) : r.wts.sum
+nobs(r::GlmResp{V,D,L,W}) where {V,D,L,W} = oftype(sum(one(eltype(r.wts))), length(r.y))
diff --git a/src/linpred.jl b/src/linpred.jl
index 4274f575..441f1211 100644
--- a/src/linpred.jl
+++ b/src/linpred.jl
@@ -45,28 +45,39 @@ A `LinPred` type with a dense, unpivoted QR decomposition of `X`
 - `scratchbeta`: scratch vector of length `p`, used in `linpred!` method
 - `qr`: a `QRCompactWY` object created from `X`, with optional row weights.
 """
-mutable struct DensePredQR{T<:BlasReal} <: DensePred
+mutable struct DensePredQR{T<:BlasReal, W<:AbstractWeights{<:Real}} <: DensePred
     X::Matrix{T}                  # model matrix
+    Xw::Matrix{T}                 # weighted model matrix
     beta0::Vector{T}              # base coefficient vector
     delbeta::Vector{T}            # coefficient increment
     scratchbeta::Vector{T}
     qr::QRCompactWY{T}
-    function DensePredQR{T}(X::Matrix{T}, beta0::Vector{T}) where T
+    wts::W
+    wresponse::Vector{T}
+    function DensePredQR{T}(X::Matrix{T}, beta0::Vector{T}, wts::W) where {T,W<:AbstractWeights{<:Real}}
         n, p = size(X)
         length(beta0) == p || throw(DimensionMismatch("length(β0) ≠ size(X,2)"))
-        new{T}(X, beta0, zeros(T,p), zeros(T,p), qr(X))
+        (length(wts) == n || isempty(wts)) || throw(DimensionMismatch("Lenght of weights does not match the dimension of X"))
+        Xw = wts isa UnitWeights ? Matrix{T}(undef, 0, 0) : sqrt.(wts).*X
+        qrX = wts isa UnitWeights ? qr(X) : qr(Xw)
+        new{T,W}(X, Xw, beta0, zeros(T,p), zeros(T,p), qrX, wts, similar(X, T, (size(X,1),) ))
     end
-    function DensePredQR{T}(X::Matrix{T}) where T
+    function DensePredQR{T}(X::Matrix{T}, wts::W) where {T,W}
         n, p = size(X)
-        new{T}(X, zeros(T, p), zeros(T,p), zeros(T,p), qr(X))
+        DensePredQR(X, zeros(T, p), wts)
+    end
+    function DensePredQR(X::Matrix{T}) where T
+        n, p = size(X)
+        DensePredQR{T}(X, zeros(T, p), uweights(0))
     end
 end
-DensePredQR(X::Matrix, beta0::Vector) = DensePredQR{eltype(X)}(X, beta0)
-DensePredQR(X::Matrix{T}) where T = DensePredQR{T}(X, zeros(T, size(X,2)))
-convert(::Type{DensePredQR{T}}, X::Matrix{T}) where {T} = DensePredQR{T}(X, zeros(T, size(X, 2)))
+DensePredQR{T}(X::Matrix) where T = DensePredQR{eltype(X)}(X, zeros(T, size(X, 2)), uweights(size(X,1)))
+DensePredQR(X::Matrix, beta0::Vector, wts::AbstractVector) = DensePredQR{eltype(X)}(X, beta0, wts)
+DensePredQR(X::Matrix{T}, wts::AbstractVector) where T = DensePredQR{T}(X, zeros(T, size(X,2)), wts)
+convert(::Type{DensePredQR{T}}, X::Matrix{T}) where {T} = DensePredQR{T}(X)
 
 """
-    delbeta!(p::LinPred, r::Vector)
+delbeta!(p::LinPred, r::Vector)
 
 Evaluate and return `p.delbeta` the increment to the coefficient vector from residual `r`
 """
@@ -78,7 +89,7 @@ function delbeta!(p::DensePredQR{T}, r::Vector{T}) where T<:BlasReal
 end
 
 """
-    DensePredChol{T}
+DensePredChol{T}
 
 A `LinPred` type with a dense Cholesky factorization of `X'X`
 
@@ -90,31 +101,40 @@ A `LinPred` type with a dense Cholesky factorization of `X'X`
 - `scratchbeta`: scratch vector of length `p`, used in `linpred!` method
 - `chol`: a `Cholesky` object created from `X'X`, possibly using row weights.
 - `scratchm1`: scratch Matrix{T} of the same size as `X`
-- `scratchm2`: scratch Matrix{T} os the same size as `X'X`
+- `scratchm2`: scratch Matrix{T} of the same size as `X'X`
+- `scratchv1`: scratch Vector{T} of the same size of `y`
 """
-mutable struct DensePredChol{T<:BlasReal,C} <: DensePred
+mutable struct DensePredChol{T<:BlasReal,C,W<:AbstractVector{<:Real}} <: DensePred
     X::Matrix{T}                   # model matrix
+    Xw::Matrix{T}                  # weighted model matrix
     beta0::Vector{T}               # base vector for coefficients
     delbeta::Vector{T}             # coefficient increment
     scratchbeta::Vector{T}
     chol::C
+    wts::W
     scratchm1::Matrix{T}
     scratchm2::Matrix{T}
+    scratchv1::Vector{T}
 end
-function DensePredChol(X::AbstractMatrix, pivot::Bool)
-    F = Hermitian(float(X'X))
+function DensePredChol(X::AbstractMatrix, pivot::Bool, wts::AbstractWeights{<:Real})
+    Xw = isempty(wts) ? Matrix{eltype(X)}(undef, 0, 0) : sqrt.(wts).*X
+    F = isempty(wts) ? Hermitian(float(X'X)) : Hermitian(float(Xw'Xw))
     T = eltype(F)
     F = pivot ? pivoted_cholesky!(F, tol = -one(T), check = false) : cholesky!(F)
     DensePredChol(Matrix{T}(X),
+        Matrix{T}(Xw),
         zeros(T, size(X, 2)),
         zeros(T, size(X, 2)),
         zeros(T, size(X, 2)),
         F,
+        wts,
         similar(X, T),
-        similar(cholfactors(F)))
+        similar(cholfactors(F)),
+        similar(X, T, (size(X,1),)))
 end
 
-cholpred(X::AbstractMatrix, pivot::Bool=false) = DensePredChol(X, pivot)
+cholpred(X::AbstractMatrix, pivot::Bool, wts::AbstractWeights) = DensePredChol(X, pivot, wts)
+cholpred(X::AbstractMatrix, pivot::Bool=false) = DensePredChol(X, pivot, uweights(size(X,1)))
 
 cholfactors(c::Union{Cholesky,CholeskyPivoted}) = c.factors
 cholesky!(p::DensePredChol{T}) where {T<:FP} = p.chol
@@ -126,12 +146,16 @@ function cholesky(p::DensePredChol{T}) where T<:FP
 end
 cholesky!(p::DensePredQR{T}) where {T<:FP} = Cholesky{T,typeof(p.X)}(p.qr.R, 'U', 0)
 
-function delbeta!(p::DensePredChol{T,<:Cholesky}, r::Vector{T}) where T<:BlasReal
+function delbeta!(p::DensePredChol{T,<:Cholesky, <:UnitWeights}, r::Vector{T}) where T<:BlasReal
     ldiv!(p.chol, mul!(p.delbeta, transpose(p.X), r))
-    p
 end
 
-function delbeta!(p::DensePredChol{T,<:CholeskyPivoted}, r::Vector{T}) where T<:BlasReal
+function delbeta!(p::DensePredChol{T,<:Cholesky, <:AbstractWeights}, r::Vector{T}) where T<:BlasReal
+    p.scratchv1 .= r.*sqrt(p.wts)
+    ldiv!(p.chol, mul!(p.delbeta, transpose(p.Xw), p.scratchv1))
+end
+
+function delbeta!(p::DensePredChol{T,<:CholeskyPivoted,<:UnitWeights}, r::Vector{T}) where T<:BlasReal
     ch = p.chol
     delbeta = mul!(p.delbeta, adjoint(p.X), r)
     rnk = rank(ch)
@@ -148,59 +172,102 @@ function delbeta!(p::DensePredChol{T,<:CholeskyPivoted}, r::Vector{T}) where T<:
     p
 end
 
-function delbeta!(p::DensePredChol{T,<:Cholesky}, r::Vector{T}, wt::Vector{T}) where T<:BlasReal
-    scr = mul!(p.scratchm1, Diagonal(wt), p.X)
-    cholesky!(Hermitian(mul!(cholfactors(p.chol), transpose(scr), p.X), :U))
-    mul!(p.delbeta, transpose(scr), r)
+function delbeta!(p::DensePredChol{T,<:CholeskyPivoted,<:AbstractWeights}, r::Vector{T}) where T<:BlasReal
+    ch = p.chol
+    Z = p.Xw
+    p.scratchv1 .= r.*sqrt.(p.wts)
+    delbeta = mul!(p.delbeta, adjoint(p.Xw), p.scratchv1)
+    rnk = rank(ch)
+    if rnk == length(delbeta)
+        ldiv!(ch, delbeta)
+    else
+        permute!(delbeta, ch.p)
+        for k=(rnk+1):length(delbeta)
+            delbeta[k] = -zero(T)
+        end
+        LAPACK.potrs!(ch.uplo, view(ch.factors, 1:rnk, 1:rnk), view(delbeta, 1:rnk))
+        invpermute!(delbeta, ch.p)
+    end
+    p
+end
+
+function delbeta!(p::DensePredChol{T,<:Cholesky,<:AbstractWeights}, r::Vector{T}, wt::Vector{T}) where T<:BlasReal
+    p.scratchm1 .= wt.*p.X
+    cholesky!(Hermitian(mul!(cholfactors(p.chol), transpose(p.scratchm1), p.X), :U))
+    mul!(p.delbeta, transpose(p.scratchm1), r)
     ldiv!(p.chol, p.delbeta)
     p
 end
 
-function delbeta!(p::DensePredChol{T,<:CholeskyPivoted}, r::Vector{T}, wt::Vector{T}) where T<:BlasReal
+function delbeta!(p::DensePredChol{T,<:CholeskyPivoted,<:AbstractWeights}, r::Vector{T}, wt::Vector{T}) where T<:BlasReal
     cf = cholfactors(p.chol)
     piv = p.chol.p
-    cf .= mul!(p.scratchm2, adjoint(LinearAlgebra.mul!(p.scratchm1, Diagonal(wt), p.X)), p.X)[piv, piv]
+    p.scratchm1 .= wt.*p.X
+    cf .= mul!(p.scratchm2, adjoint(p.scratchm1), p.X)[piv, piv]
     cholesky!(Hermitian(cf, Symbol(p.chol.uplo)))
     ldiv!(p.chol, mul!(p.delbeta, transpose(p.scratchm1), r))
     p
 end
 
-mutable struct SparsePredChol{T,M<:SparseMatrixCSC,C} <: GLM.LinPred
+mutable struct SparsePredChol{T,M<:SparseMatrixCSC,C,W<:AbstractWeights{<:Real}} <: GLM.LinPred
     X::M                           # model matrix
+    Xw::M                          # weighted model matrix
     Xt::M                          # X'
     beta0::Vector{T}               # base vector for coefficients
     delbeta::Vector{T}             # coefficient increment
     scratchbeta::Vector{T}
     chol::C
     scratch::M
+    wts::W
 end
-function SparsePredChol(X::SparseMatrixCSC{T}) where T
+
+function SparsePredChol(X::SparseMatrixCSC{T}, wts::AbstractVector) where T
     chol = cholesky(sparse(I, size(X, 2), size(X,2)))
-    return SparsePredChol{eltype(X),typeof(X),typeof(chol)}(X,
+    sqrtwts = sqrt.(wts)
+    Xw = isempty(wts) ? SparseMatrixCSC(I, 0, 0) : sqrtwts.*X
+    return SparsePredChol{eltype(X),typeof(X),typeof(chol), typeof(wts)}(X,
+        Xw,
         X',
         zeros(T, size(X, 2)),
         zeros(T, size(X, 2)),
         zeros(T, size(X, 2)),
         chol,
-        similar(X))
+        similar(X),
+        wts)
 end
 
-cholpred(X::SparseMatrixCSC, pivot::Bool=false) = SparsePredChol(X)
+cholpred(X::SparseMatrixCSC, pivot::Bool=false) = SparsePredChol(X, uweights(size(X,1)))
+cholpred(X::SparseMatrixCSC, pivot::Bool, wts::AbstractWeights) = SparsePredChol(X, wts)
 
-function delbeta!(p::SparsePredChol{T}, r::Vector{T}, wt::Vector{T}) where T
+function delbeta!(p::SparsePredChol{T,M,C,<:UnitWeights}, r::Vector{T}, wt::Vector{T}) where {T,M,C}
     scr = mul!(p.scratch, Diagonal(wt), p.X)
     XtWX = p.Xt*scr
     c = p.chol = cholesky(Symmetric{eltype(XtWX),typeof(XtWX)}(XtWX, 'L'))
     p.delbeta = c \ mul!(p.delbeta, adjoint(scr), r)
 end
 
-function delbeta!(p::SparsePredChol{T}, r::Vector{T}) where T
+function delbeta!(p::SparsePredChol{T,M,C,<:AbstractWeights}, r::Vector{T}, wt::Vector{T}) where {T,M,C}
+    scr = mul!(p.scratch, Diagonal(wt.*p.wts), p.X)
+    XtWX = p.Xt*scr
+    c = p.chol = cholesky(Symmetric{eltype(XtWX),typeof(XtWX)}(XtWX, 'L'))
+    p.delbeta = c \ mul!(p.delbeta, adjoint(scr), r)
+end
+
+function delbeta!(p::SparsePredChol{T,M,C,<:UnitWeights}, r::Vector{T}) where {T,M,C}
     scr = p.scratch = p.X
     XtWX = p.Xt*scr
     c = p.chol = cholesky(Symmetric{eltype(XtWX),typeof(XtWX)}(XtWX, 'L'))
     p.delbeta = c \ mul!(p.delbeta, adjoint(scr), r)
 end
 
+function delbeta!(p::SparsePredChol{T,M,C,<:AbstractWeights}, r::Vector{T}) where {T,M,C}
+    scr = p.scratch .= p.X.*p.wts
+    XtWX = p.Xt*scr
+    @show XtWX
+    c = p.chol = cholesky(Symmetric{eltype(XtWX),typeof(XtWX)}(XtWX, 'L'))
+    p.delbeta = c \ mul!(p.delbeta, adjoint(scr), r)
+end
+
 LinearAlgebra.cholesky(p::SparsePredChol{T}) where {T} = copy(p.chol)
 LinearAlgebra.cholesky!(p::SparsePredChol{T}) where {T} = p.chol
 
@@ -219,8 +286,38 @@ function invchol(x::DensePredChol{T,<: CholeskyPivoted}) where T
     ipiv = invperm(ch.p)
     res[ipiv, ipiv]
 end
+
 invchol(x::SparsePredChol) = cholesky!(x) \ Matrix{Float64}(I, size(x.X, 2), size(x.X, 2))
-vcov(x::LinPredModel) = rmul!(invchol(x.pp), dispersion(x, true))
+
+function vcov(x::LinPredModel)
+    d = dispersion(x, true)
+    B = _covm(x.pp)
+    rmul!(B, dispersion(x, true))
+end
+
+_covm(pp::LinPred) = invchol(pp)
+
+function _covm(pp::DensePredChol{T, <:ProbabilityWeights, <:Cholesky}) where {T}
+    wts = pp.wts
+    Z = pp.scratchm1 .= pp.X.*wts
+    XtW2X = Z'Z
+    invXtWX = invchol(pp)
+    invXtWX*XtW2X*invXtWX
+end
+
+function _covm(pp::DensePredChol{T, <:ProbabilityWeights, <:CholeskyPivoted}) where {T}
+    wts = pp.wts
+    Z = pp.scratchm1 .= pp.X.*wts
+    rnk = rank(pp.chol)
+    p = length(pp.delbeta)
+    if rnk == p
+        XtW2X = Z'Z
+    else
+        ## no idea
+    end
+    invXtWX = invchol(pp)
+    invXtWX*XtW2X*invXtWX
+end
 
 function cor(x::LinPredModel)
     Σ = vcov(x)
@@ -235,28 +332,24 @@ function show(io::IO, obj::LinPredModel)
 end
 
 modelframe(obj::LinPredModel) = obj.fr
-modelmatrix(obj::LinPredModel) = obj.pp.X
+
+function modelmatrix(obj::LinPredModel; weighted=false)
+    if !weighted
+        obj.pp.X
+    elseif !isempty(weights(obj))
+        obj.pp.Xw
+    else
+        throw(ArgumentError("`weighted=true` allowed only for weighted models."))
+    end
+end
+
 response(obj::LinPredModel) = obj.rr.y
 
 fitted(m::LinPredModel) = m.rr.mu
 predict(mm::LinPredModel) = fitted(mm)
 StatsModels.formula(obj::LinPredModel) = modelframe(obj).formula
-residuals(obj::LinPredModel) = residuals(obj.rr)
-
-"""
-    nobs(obj::LinearModel)
-    nobs(obj::GLM)
-
-For linear and generalized linear models, returns the number of rows, or,
-when prior weights are specified, the sum of weights.
-"""
-function nobs(obj::LinPredModel)
-    if isempty(obj.rr.wts)
-        oftype(sum(one(eltype(obj.rr.wts))), length(obj.rr.y))
-    else
-        sum(obj.rr.wts)
-    end
-end
+residuals(obj::LinPredModel; kwarg...) = residuals(obj.rr; kwarg...)
+weights(obj::LinPredModel) = weights(obj.rr)
 
 coef(x::LinPred) = x.beta0
 coef(obj::LinPredModel) = coef(obj.pp)
diff --git a/src/lm.jl b/src/lm.jl
index 079bc893..42a419c5 100644
--- a/src/lm.jl
+++ b/src/lm.jl
@@ -7,46 +7,41 @@ Encapsulates the response for a linear model
 
 - `mu`: current value of the mean response vector or fitted value
 - `offset`: optional offset added to the linear predictor to form `mu`
-- `wts`: optional vector of prior frequency (a.k.a. case) weights for observations
+- `wts`: optional weights for observations (AbstractWeights)
 - `y`: observed response vector
 
 Either or both `offset` and `wts` may be of length 0
 """
-mutable struct LmResp{V<:FPVector} <: ModResp  # response in a linear model
+mutable struct LmResp{V<:FPVector, W<:AbstractWeights{<:Real}} <: ModResp  # response in a linear model
     mu::V                                  # mean response
     offset::V                              # offset added to linear predictor (may have length 0)
-    wts::V                                 # prior weights (may have length 0)
+    wts::W                                 # prior weights (may have length 0)
     y::V                                   # response
-    function LmResp{V}(mu::V, off::V, wts::V, y::V) where V
+    function LmResp{V, W}(mu::V, off::V, wts::W, y::V) where {V, W}
         n = length(y)
         length(mu) == n || error("mismatched lengths of mu and y")
         ll = length(off)
         ll == 0 || ll == n || error("length of offset is $ll, must be $n or 0")
         ll = length(wts)
         ll == 0 || ll == n || error("length of wts is $ll, must be $n or 0")
-        new{V}(mu, off, wts, y)
+        new{V,W}(mu, off, wts, y)
     end
 end
 
-function LmResp(y::AbstractVector{<:Real}, wts::Union{Nothing,AbstractVector{<:Real}}=nothing)
+function LmResp(y::AbstractVector{<:Real}, wts::AbstractWeights{<:Real})
     # Instead of convert(Vector{Float64}, y) to be more ForwardDiff friendly
     _y = convert(Vector{float(eltype(y))}, y)
-    _wts = if wts === nothing
-        similar(_y, 0)
-    else
-        convert(Vector{float(eltype(wts))}, wts)
-    end
-    return LmResp{typeof(_y)}(zero(_y), zero(_y), _wts, _y)
+    return LmResp{typeof(_y), typeof(wts)}(zero(_y), zero(_y), wts, _y)
 end
 
-function updateμ!(r::LmResp{V}, linPr::V) where V<:FPVector
+function updateμ!(r::LmResp{V, W}, linPr::V) where {V<:FPVector, W}
     n = length(linPr)
     length(r.y) == n || error("length(linPr) is $n, should be $(length(r.y))")
     length(r.offset) == 0 ? copyto!(r.mu, linPr) : broadcast!(+, r.mu, linPr, r.offset)
     deviance(r)
 end
 
-updateμ!(r::LmResp{V}, linPr) where {V<:FPVector} = updateμ!(r, convert(V, vec(linPr)))
+updateμ!(r::LmResp{V, W}, linPr) where {V<:FPVector, W} = updateμ!(r, convert(V, vec(linPr)))
 
 function deviance(r::LmResp)
     y = r.y
@@ -70,7 +65,25 @@ function loglikelihood(r::LmResp)
     -n/2 * (log(2π * deviance(r)/n) + 1)
 end
 
-residuals(r::LmResp) = r.y - r.mu
+function nullloglikelihood(r::LmResp)
+    n = isempty(r.wts) ? length(r.y) : sum(r.wts)
+    -n/2 * (log(2π * nulldeviance(r)/n) + 1)
+end
+
+function residuals(r::LmResp; weighted=false)
+    wts = weights(r)
+    res = r.y - r.mu
+    if !weighted
+        res
+    elseif !isempty(wts)
+        sqrt.(wts).*res
+    else
+        throw(ArgumentError("`weighted=true` allowed only for weighted models."))
+    end
+end
+
+weights(r::LmResp) = r.wts
+
 
 """
     LinearModel
@@ -90,12 +103,8 @@ end
 LinearAlgebra.cholesky(x::LinearModel) = cholesky(x.pp)
 
 function StatsBase.fit!(obj::LinearModel)
-    if isempty(obj.rr.wts)
-        delbeta!(obj.pp, obj.rr.y)
-    else 
-        delbeta!(obj.pp, obj.rr.y, obj.rr.wts)
-    end
-    installbeta!(obj.pp)     
+    delbeta!(obj.pp, obj.rr.y)
+    installbeta!(obj.pp)
     updateμ!(obj.rr, linpred(obj.pp, zero(eltype(obj.rr.y))))
     return obj
 end
@@ -108,12 +117,15 @@ const FIT_LM_DOC = """
     in columns (including if appropriate the intercept), and `y` must be a vector holding
     values of the dependent variable.
 
-    The keyword argument `wts` can be a `Vector` specifying frequency weights for observations.
-    Such weights are equivalent to repeating each observation a number of times equal
-    to its weight. Do note that this interpretation gives equal point estimates but
-    different standard errors from analytical (a.k.a. inverse variance) weights and
-    from probability (a.k.a. sampling) weights which are the default in some other
-    software.
+    The keyword argument `wts` can be an `AbstractWeights` specifying frequency weights for observations.
+    Weights allowed are:
+    - `AnalyticaWeights`: describe a non-random relative importance (usually between 0 and 1)
+    for each observation.
+    - `FrequencyWeights`: describe the number of times (or frequency) each observation was observed.
+    - `ProbabilityWeights`: represent the inverse of the sampling probability for each observation,
+    providing a correction mechanism for under- or over-sampling certain population groups
+    These weights gives equal point estimates but different standard errors.
+    If a vector is passed (deprecated), it is coerced to `FrequencyWeights`.
 
     `dropcollinear` controls whether or not `lm` accepts a model matrix which
     is less-than-full rank. If `true` (the default), only the first of each set of
@@ -133,14 +145,25 @@ $FIT_LM_DOC
 """
 function fit(::Type{LinearModel}, X::AbstractMatrix{<:Real}, y::AbstractVector{<:Real},
              allowrankdeficient_dep::Union{Bool,Nothing}=nothing;
-             wts::AbstractVector{<:Real}=similar(y, 0),
+             wts::AbstractVector{<:Real}=uweights(length(y)),
              dropcollinear::Bool=true)
     if allowrankdeficient_dep !== nothing
         @warn "Positional argument `allowrankdeficient` is deprecated, use keyword " *
               "argument `dropcollinear` instead. Proceeding with positional argument value: $allowrankdeficient_dep"
         dropcollinear = allowrankdeficient_dep
     end
-    fit!(LinearModel(LmResp(y, wts), cholpred(X, dropcollinear)))
+    # For backward compatibility accept wts as AbstractArray and coerce them to FrequencyWeights
+    _wts = if isa(wts, AbstractWeights)
+        wts
+    elseif isa(wts, AbstractVector)
+        Base.depwarn("Passing weights as vector is deprecated in favor of explicitely using " *
+                     "AnalyticalWeights, ProbabilityWeights, or FrequencyWeights. Proceeding " *
+                     "by coercing wts to `FrequencyWeights`", :fit)
+        fweights(wts)
+    else
+        throw(ArgumentError("`wts` should be an AbstractVector coercible to AbstractWeights"))
+    end
+    fit!(LinearModel(LmResp(y, _wts), cholpred(X, dropcollinear, _wts)))
 end
 
 """
@@ -299,19 +322,15 @@ of each data point.
 Currently only implemented for linear models without weights.
 """
 function StatsBase.cooksdistance(obj::LinearModel)
-    u = residuals(obj)
-    mse = dispersion(obj,true)
+    wts = weights(obj)
+    u = residuals(obj; weighted=!isempty(wts))
+    mse = GLM.dispersion(obj,true)
     k = dof(obj)-1
     d_res = dof_residual(obj)
-    X = modelmatrix(obj)
-    XtX = crossmodelmatrix(obj)
+    X = modelmatrix(obj; weighted=!isempty(wts))
+    XtX = crossmodelmatrix(obj; weighted=!isempty(wts))
     k == size(X,2) || throw(ArgumentError("Models with collinear terms are not currently supported."))
-    wts = obj.rr.wts
-    if isempty(wts)
-        hii = diag(X * inv(XtX) * X')
-    else
-        throw(ArgumentError("Weighted models are not currently supported."))
-    end
+    hii = diag(X * inv(XtX) * X')
     D = @. u^2 * (hii / (1 - hii)^2) / (k*mse)
     return D
 end
diff --git a/src/scratch.jl b/src/scratch.jl
new file mode 100644
index 00000000..4f2612b0
--- /dev/null
+++ b/src/scratch.jl
@@ -0,0 +1,77 @@
+using GLM
+using DataFrames
+using Random
+using CSV
+using StatsBase
+using RDatasets
+Random.seed!(11)
+
+y = rand(10)
+x = rand(10,2)
+wts = rand(10)
+df = DataFrame(x, :auto)
+df.y = y
+df.wts = wts
+lm1 = lm(x,y)
+lmw = lm(x,y; wts = wts)
+lmf = lm(@formula(y~x1+x2-1), df)
+lmfw = lm(@formula(y~-1+x1+x2), df; wts = aweights(wts))
+lmfw = lm(@formula(y~-1+x1+x2), df; wts = pweights(wts))
+lmfw = lm(@formula(y~-1+x1+x2), df; wts = fweights(wts))
+
+glm(@formula(y~-1+x1+x2), df, Normal, IdentityLink; wts = fweights(wts))
+
+cooksdistance(lm1)
+
+
+
+df = dataset("quantreg", "engel")
+N = nrow(df)
+df.weights = repeat(1:5, Int(N/5))
+f = @formula(FoodExp ~ Income)
+lm_model = lm(f, df, wts = FrequencyWeights(df.weights))
+glm_model = glm(f, df, Normal(), wts = FrequencyWeights(df.weights))
+@test isapprox(coef(lm_model), [154.35104595140706, 0.4836896390157505])
+@test isapprox(coef(glm_model), [154.35104595140706, 0.4836896390157505])
+@test isapprox(stderror(lm_model), [9.382302620120193, 0.00816741377772968])
+@test isapprox(r2(lm_model), 0.8330258148644486)
+@test isapprox(adjr2(lm_model), 0.832788298242634)
+@test isapprox(vcov(lm_model), [88.02760245551447 -0.06772589439264813; 
+                                -0.06772589439264813 6.670664781664879e-5])
+@test isapprox(first(predict(lm_model)), 357.57694841780994)
+@test isapprox(loglikelihood(lm_model), -4353.946729075838)
+@test isapprox(loglikelihood(glm_model), -4353.946729075838)
+@test isapprox(nullloglikelihood(lm_model), -4984.892139711452)
+@test isapprox(mean(residuals(lm_model)), -5.412966629787718) 
+
+lm_model = lm(f, df, wts = df.weights)
+glm_model = glm(f, df, Normal(), wts = df.weights)
+@test isapprox(coef(lm_model), [154.35104595140706, 0.4836896390157505])
+@test isapprox(coef(glm_model), [154.35104595140706, 0.4836896390157505])
+@test isapprox(stderror(lm_model), [9.382302620120193, 0.00816741377772968])
+@test isapprox(r2(lm_model), 0.8330258148644486)
+@test isapprox(adjr2(lm_model), 0.832788298242634)
+@test isapprox(vcov(lm_model), [88.02760245551447 -0.06772589439264813; 
+                                -0.06772589439264813 6.670664781664879e-5])
+@test isapprox(first(predict(lm_model)), 357.57694841780994)
+@test isapprox(loglikelihood(lm_model), -4353.946729075838)
+@test isapprox(loglikelihood(glm_model), -4353.946729075838)
+@test isapprox(nullloglikelihood(lm_model), -4984.892139711452)
+@test isapprox(mean(residuals(lm_model)), -5.412966629787718) 
+
+
+
+lm_model = lm(f, df, wts = aweights(df.weights))
+glm_model = glm(f, df, Normal(), wts = aweights(df.weights))
+@test isapprox(coef(lm_model), [154.35104595140706, 0.4836896390157505])
+@test isapprox(coef(glm_model), [154.35104595140706, 0.4836896390157505])
+@test isapprox(stderror(lm_model), [16.297055281313032, 0.014186793927918842])
+@test isapprox(r2(lm_model), 0.8330258148644486)
+@test isapprox(adjr2(lm_model), 0.8323091874604334)
+@test isapprox(vcov(lm_model), [265.59401084217296   -0.20434035947652907; 
+                                 -0.20434035947652907 0.00020126512195323495])
+@test isapprox(first(predict(lm_model)), 357.57694841780994)
+@test isapprox(loglikelihood(lm_model), -4353.946729075838)
+@test isapprox(loglikelihood(glm_model), -4353.946729075838)
+@test isapprox(nullloglikelihood(lm_model), -4984.892139711452)
+@test isapprox(mean(residuals(lm_model)), -5.412966629787718) 
\ No newline at end of file
diff --git a/test/runtests.jl b/test/runtests.jl
index 447798b2..b85ee5c8 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -83,12 +83,13 @@ end
 end
 
 @testset "linear model with weights" begin 
+
     df = dataset("quantreg", "engel")
     N = nrow(df)
     df.weights = repeat(1:5, Int(N/5))
     f = @formula(FoodExp ~ Income)
-    lm_model = lm(f, df, wts = df.weights)
-    glm_model = glm(f, df, Normal(), wts = df.weights)
+    lm_model = lm(f, df, wts = FrequencyWeights(df.weights))
+    glm_model = glm(f, df, Normal(), wts = FrequencyWeights(df.weights))
     @test isapprox(coef(lm_model), [154.35104595140706, 0.4836896390157505])
     @test isapprox(coef(glm_model), [154.35104595140706, 0.4836896390157505])
     @test isapprox(stderror(lm_model), [9.382302620120193, 0.00816741377772968])
@@ -101,6 +102,29 @@ end
     @test isapprox(loglikelihood(glm_model), -4353.946729075838)
     @test isapprox(nullloglikelihood(lm_model), -4984.892139711452)
     @test isapprox(mean(residuals(lm_model)), -5.412966629787718) 
+    
+    lm_model = lm(f, df, wts = df.weights)
+    glm_model = glm(f, df, Normal(), wts = df.weights)
+    @test isa(weights(lm_model), FrequencyWeights)
+    @test isa(weights(glm_model), FrequencyWeights)
+    
+    
+    
+    
+    lm_model = lm(f, df, wts = aweights(df.weights))
+    glm_model = glm(f, df, Normal(), wts = aweights(df.weights))
+    @test isapprox(coef(lm_model), [154.35104595140706, 0.4836896390157505])
+    @test isapprox(coef(glm_model), [154.35104595140706, 0.4836896390157505])
+    @test isapprox(stderror(lm_model), [16.297055281313032, 0.014186793927918842])
+    @test isapprox(r2(lm_model), 0.8330258148644486)
+    @test isapprox(adjr2(lm_model), 0.8323091874604334)
+    @test isapprox(vcov(lm_model), [265.59401084217296   -0.20434035947652907; 
+                                     -0.20434035947652907 0.00020126512195323495])
+    @test isapprox(first(predict(lm_model)), 357.57694841780994)
+    @test isapprox(loglikelihood(lm_model), -4353.946729075838)
+    @test isapprox(loglikelihood(glm_model), -4353.946729075838)
+    @test isapprox(nullloglikelihood(lm_model), -4984.892139711452)
+    @test isapprox(mean(residuals(lm_model)), -5.412966629787718) 
 end
 
 @testset "rankdeficient" begin
@@ -142,6 +166,16 @@ end
     @test isapprox(coef(m2p_dep_pos_kw), coef(m2p))
 end
 
+@testset "Passing wts (depwarn)" begin
+    df = DataFrame(x=["a", "b", "c"], y=[1, 2, 3], wts = [3,3,3])
+    @test_logs (:warn, "Passing weights as vector is deprecated in favor of explicitely using " * 
+                       "AnalyticalWeights, ProbabilityWeights, or FrequencyWeights. Proceeding " * 
+                       "by coercing wts to `FrequencyWeights`") lm(@formula(y~x), df; wts=wts)
+    @test_logs (:warn, "Passing weights as vector is deprecated in favor of explicitely using " * 
+                       "AnalyticalWeights, ProbabilityWeights, or FrequencyWeights. Proceeding " * 
+                       "by coercing wts to `FrequencyWeights`") glm(@formula(y~x), Normal(), IdentityLink(), df; wts=wts)
+end
+
 @testset "saturated linear model" begin
     df = DataFrame(x=["a", "b", "c"], y=[1, 2, 3])
     model = lm(@formula(y ~ x), df)
@@ -475,10 +509,10 @@ admit_agr = DataFrame(count = [28., 97, 93, 55, 33, 54, 28, 12],
                       admit = repeat([false, true], inner=[4]),
                       rank = categorical(repeat(1:4, outer=2)))
 
-@testset "Aggregated Binomial LogitLink" begin
+@testset "Aggregated Binomial LogitLink (FrequencyWeights)" begin
     for distr in (Binomial, Bernoulli)
         gm14 = fit(GeneralizedLinearModel, @formula(admit ~ 1 + rank), admit_agr, distr(),
-                   wts=Array(admit_agr.count))
+                   wts=fweights(admit_agr.count))
         @test dof(gm14) == 4
         @test nobs(gm14) == 400
         @test isapprox(deviance(gm14), 474.9667184280627)
@@ -489,8 +523,25 @@ admit_agr = DataFrame(count = [28., 97, 93, 55, 33, 54, 28, 12],
         @test isapprox(coef(gm14),
             [0.164303051291, -0.7500299832, -1.36469792994, -1.68672866457], atol=1e-5)
     end
+    
 end
 
+@testset "Aggregated Binomial LogitLink (AnalyticWeights)" begin
+    for distr in (Binomial, Bernoulli)
+        gm14 = fit(GeneralizedLinearModel, @formula(admit ~ 1 + rank), admit_agr, distr(),
+                   wts=aweights(admit_agr.count))
+        @test dof(gm14) == 4
+        @test nobs(gm14) == 8
+        @test isapprox(deviance(gm14), 474.9667184280627)
+        @test isapprox(loglikelihood(gm14), -237.48335921403134)
+        @test isapprox(aic(gm14), 482.96671842822883)
+        @test isapprox(aicc(gm14), 496.3000517613874)
+        @test isapprox(bic(gm14), 483.28448459477346)
+        @test isapprox(coef(gm14),
+            [0.164303051291, -0.7500299832, -1.36469792994, -1.68672866457], atol=1e-5)
+    end
+    
+end
 # Logistic regression using aggregated data with proportions of successes and weights
 admit_agr2 = DataFrame(Any[[61., 151, 121, 67], [33., 54, 28, 12], categorical(1:4)],
     [:count, :admit, :rank])
@@ -499,7 +550,7 @@ admit_agr2.p = admit_agr2.admit ./ admit_agr2.count
 ## The model matrix here is singular so tests like the deviance are just round off error
 @testset "Binomial LogitLink aggregated" begin
     gm15 = fit(GeneralizedLinearModel, @formula(p ~ rank), admit_agr2, Binomial(),
-               wts=admit_agr2.count)
+               wts=fweights(admit_agr2.count))
     test_show(gm15)
     @test dof(gm15) == 4
     @test nobs(gm15) == 400
@@ -514,7 +565,7 @@ end
 # Weighted Gamma example (weights are totally made up)
 @testset "Gamma InverseLink Weights" begin
     gm16 = fit(GeneralizedLinearModel, @formula(lot1 ~ 1 + u), clotting, Gamma(),
-               wts=[1.5,2.0,1.1,4.5,2.4,3.5,5.6,5.4,6.7])
+               wts=fweights([1.5,2.0,1.1,4.5,2.4,3.5,5.6,5.4,6.7]))
     test_show(gm16)
     @test dof(gm16) == 3
     @test nobs(gm16) == 32.7
@@ -529,7 +580,7 @@ end
 # Weighted Poisson example (weights are totally made up)
 @testset "Poisson LogLink Weights" begin
     gm17 = fit(GeneralizedLinearModel, @formula(Counts ~ Outcome + Treatment), dobson, Poisson(),
-        wts = [1.5,2.0,1.1,4.5,2.4,3.5,5.6,5.4,6.7])
+        wts = fweights([1.5,2.0,1.1,4.5,2.4,3.5,5.6,5.4,6.7]))
     test_show(gm17)
     @test dof(gm17) == 5
     @test isapprox(deviance(gm17), 17.699857821414266)
@@ -686,6 +737,37 @@ end
     end
 end
 
+@testset "Sparse LM (weighted)" begin
+    rng = StableRNG(1)
+    X = sprand(rng, 1000, 10, 0.01)
+    β = randn(rng, 10)
+    y = Bool[rand(rng) < logistic(x) for x in X * β]
+    wts = rand(1000)
+    gmsparsev = [fit(LinearModel, X, y; wts=fweights(wts)),
+                 fit(LinearModel, X, sparse(y); wts=fweights(wts)),
+                 fit(LinearModel, Matrix(X), sparse(y); wts=fweights(wts))]
+    gmdense = fit(LinearModel, Matrix(X), y; wts=fweights(wts))
+
+    for gmsparse in gmsparsev
+        @test isapprox(deviance(gmsparse), deviance(gmdense))
+        @test isapprox(coef(gmsparse), coef(gmdense))
+        @test isapprox(vcov(gmsparse), vcov(gmdense))
+        @test isapprox(Matrix(modelmatrix(gmsparse; weighted=true)), modelmatrix(gmdense; weighted=true))
+    end
+
+    gmsparsev = [fit(LinearModel, X, y; wts=aweights(wts)),
+                 fit(LinearModel, X, sparse(y); wts=aweights(wts)),
+                 fit(LinearModel, Matrix(X), sparse(y); wts=aweights(wts))]
+    gmdense = fit(LinearModel, Matrix(X), y; wts=aweights(wts))
+
+    for gmsparse in gmsparsev
+        @test isapprox(deviance(gmsparse), deviance(gmdense))
+        @test isapprox(coef(gmsparse), coef(gmdense))
+        @test isapprox(vcov(gmsparse), vcov(gmdense))
+        @test isapprox(Matrix(modelmatrix(gmsparse; weighted=true)), modelmatrix(gmdense; weighted=true))
+    end
+end
+
 @testset "Predict" begin
     rng = StableRNG(123)
     X = rand(rng, 10, 2)
@@ -1201,14 +1283,14 @@ end
         glm4 = glm(view(x, :, :), view(y, :), Binomial())
         @test coef(glm1) == coef(glm2) == coef(glm3) == coef(glm4)
 
-        glm5 = glm(x, y, Binomial(), wts=w)
-        glm6 = glm(x, view(y, :), Binomial(), wts=w)
-        glm7 = glm(view(x, :, :), y, Binomial(), wts=w)
-        glm8 = glm(view(x, :, :), view(y, :), Binomial(), wts=w)
-        glm9 = glm(x, y, Binomial(), wts=view(w, :))
-        glm10 = glm(x, view(y, :), Binomial(), wts=view(w, :))
-        glm11 = glm(view(x, :, :), y, Binomial(), wts=view(w, :))
-        glm12 = glm(view(x, :, :), view(y, :), Binomial(), wts=view(w, :))
+        glm5 = glm(x, y, Binomial(), wts=fweights(w))
+        glm6 = glm(x, view(y, :), Binomial(), wts=fweights(w))
+        glm7 = glm(view(x, :, :), y, Binomial(), wts=fweights(w))
+        glm8 = glm(view(x, :, :), view(y, :), Binomial(), wts=fweights(w))
+        glm9 = glm(x, y, Binomial(), wts=fweights(view(w, :)))
+        glm10 = glm(x, view(y, :), Binomial(), wts=fweights(view(w, :)))
+        glm11 = glm(view(x, :, :), y, Binomial(), wts=fweights(view(w, :)))
+        glm12 = glm(view(x, :, :), view(y, :), Binomial(), wts=fweights(view(w, :)))
         @test coef(glm5) == coef(glm6) == coef(glm7) == coef(glm8) == coef(glm9) == coef(glm10) ==
             coef(glm11) == coef(glm12)
     end