From 2d969059c5b513f1ac432e9d089fafdeaaa35b86 Mon Sep 17 00:00:00 2001 From: Alex Arslan Date: Sat, 9 Jul 2022 17:13:39 -0700 Subject: [PATCH 1/3] Store Cholesky of the OIM rather than separate matrices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently we're storing both the (observed) Fisher information matrix and its inverse, the variance-covariance matrix, separately in the `CoxModel` object. Since we know the observed information is positive semi-definite, we can instead store a (possibly pivoted) Cholesky factorization of the information. That allows us to easily construct both the original matrix and its inverse without needing to store both. In the case of few parameters (like ≤3) and positive semi-definite information but not positive definite, this will actually use a bit more memory than storing the matrices separately. However, for any more parameters, regardless of whether the information is positive definite, this saves a good amount of memory, with the difference becoming more pronounced with more parameters. --- src/cox.jl | 28 ++++++++++++++++++++++------ test/runtests.jl | 7 ++++--- 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/src/cox.jl b/src/cox.jl index 44ffcbf..61bfeab 100644 --- a/src/cox.jl +++ b/src/cox.jl @@ -78,13 +78,12 @@ end # Structure of Cox regression output -struct CoxModel{T<:Real} <: RegressionModel +struct CoxModel{T<:Real,C<:Union{Cholesky{T},CholeskyPivoted{T}}} <: RegressionModel aux::CoxAux{T} β::Vector{T} loglik::T score::Vector{T} - fischer_info::Matrix{T} - vcov::Matrix{T} + chol::C end function StatsAPI.coeftable(obj::CoxModel) @@ -119,7 +118,7 @@ StatsAPI.dof(obj::CoxModel) = length(coef(obj)) StatsAPI.dof_residual(obj::CoxModel) = nobs(obj) - dof(obj) -StatsAPI.vcov(obj::CoxModel) = obj.vcov +StatsAPI.vcov(obj::CoxModel) = inv(obj.chol) StatsAPI.stderror(obj::CoxModel) = sqrt.(diag(vcov(obj))) @@ -193,6 +192,19 @@ function _cox_fgh!(β, grad, hes, c::CoxAux{T}) where T return y end +function _chol_me_maybe(A) + C = cholesky(A; check=false) + if issuccess(C) + return C + else + @static if VERSION >= v"1.8.0-DEV.1139" + return cholesky(A, RowMaximum()) + else + return cholesky(A, Val(true)) + end + end +end + _coxph(X::AbstractArray{<:Integer}, s::AbstractVector; tol, l2_cost) = _coxph(float(X), s; tol=tol, l2_cost=l2_cost) function _coxph(X::AbstractArray{T}, s::AbstractVector; l2_cost, tol) where T @@ -201,8 +213,12 @@ function _coxph(X::AbstractArray{T}, s::AbstractVector; l2_cost, tol) where T β₀ = zeros(R, size(X, 2)) fgh! = TwiceDifferentiable(Optim.only_fgh!((f, G, H, x)->_cox_fgh!(x, G, H, c)), β₀) res = optimize(fgh!, β₀, NewtonTrustRegion(), Optim.Options(g_tol = tol)) - β, neg_ll, grad, hes = Optim.minimizer(res), Optim.minimum(res), Optim.gradient(fgh!), Optim.hessian(fgh!) - return CoxModel{R}(c, β, -neg_ll, -grad, hes, pinv(hes)) + β = Optim.minimizer(res) + neg_ll = minimum(res) + grad = Optim.gradient(fgh!) + hes = Symmetric(Optim.hessian(fgh!)) + chol = _chol_me_maybe(hes) + return CoxModel{R,typeof(chol)}(c, β, -neg_ll, -grad, chol) end StatsModels.drop_intercept(::Type{CoxModel}) = true diff --git a/test/runtests.jl b/test/runtests.jl index 94e2e46..5ff619d 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -201,7 +201,7 @@ end @test modelmatrix(outcome) == modelmatrix(outcome_without_formula) @test sprint(show, outcome_without_formula) == """ -CoxModel{Float64} +CoxModel{Float64, Cholesky{Float64, Matrix{Float64}}} Coefficients: ────────────────────────────────────────────── @@ -241,8 +241,9 @@ x7 0.0914971 0.0286485 3.19378 0.0014 @test dof(outcome) == 7 @test dof_residual(outcome) == 425 @test loglikelihood(outcome) > nullloglikelihood(outcome) - @test all(x->x > 0, eigen(outcome.model.fischer_info).values) - @test outcome.model.fischer_info * vcov(outcome) ≈ I atol=1e-10 + fisher_info = Matrix(outcome.model.chol) + @test all(x->x > 0, eigen(fisher_info).values) + @test fisher_info * vcov(outcome) ≈ I atol=1e-10 @test norm(outcome.model.score) < 1e-5 @test hcat(outcome_coefmat.cols[1:3]...) ≈ expected_coefs[:,1:3] atol=1e-5 From d2f0563091600ef82afc38ff561aabe59965d882 Mon Sep 17 00:00:00 2001 From: Alex Arslan Date: Tue, 26 Jul 2022 15:24:16 -0700 Subject: [PATCH 2/3] Check for collinearity when fitting a Cox model --- src/cox.jl | 3 +++ test/runtests.jl | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/src/cox.jl b/src/cox.jl index 178000c..0b56c3d 100644 --- a/src/cox.jl +++ b/src/cox.jl @@ -240,6 +240,9 @@ Cox proportional hazard model estimate of coefficients. Returns a `CoxModel` object. """ function StatsAPI.fit(::Type{CoxModel}, M::AbstractMatrix, y::AbstractVector; tol=1e-4, l2_cost=0) + if rank(M) < min(size(M)...) + throw(ArgumentError("model matrix is not full rank; some terms may be collinear")) + end index_perm = sortperm(y) X = M[index_perm,:] s = y[index_perm] diff --git a/test/runtests.jl b/test/runtests.jl index f1284ef..307a2be 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -280,6 +280,10 @@ x7 0.0914971 0.0286485 3.19378 0.0014 outcome_fincatracecat = coxph(@formula(event ~ fin * race), rossi; tol=1e-8) @test coeftable(outcome_fincatracecat).rownms == ["fin: 1", "race: 1","fin: 1 & race: 1"] @test coef(outcome_fincatracecat) ≈ coef(outcome_finrace) atol=1e-8 + + transform!(rossi, :age => ByRow(age -> 2 * age) => :age_times_two) + @test_throws ArgumentError fit(CoxModel, @formula(event ~ fin + age + age_times_two), + rossi; tol=1e-8) end @testset "EventTable" begin From e2810dbc719b6d1305ba020f11dd2c635ffb2864 Mon Sep 17 00:00:00 2001 From: Alex Arslan Date: Tue, 26 Jul 2022 15:42:04 -0700 Subject: [PATCH 3/3] Store the covariance matrix only, not the information And store it as a matrix rather than a decomposition --- src/cox.jl | 26 +++++++------------------- test/runtests.jl | 6 ++---- 2 files changed, 9 insertions(+), 23 deletions(-) diff --git a/src/cox.jl b/src/cox.jl index 0b56c3d..e9ac1cc 100644 --- a/src/cox.jl +++ b/src/cox.jl @@ -78,12 +78,12 @@ end # Structure of Cox regression output -struct CoxModel{T<:Real,C<:Union{Cholesky{T},CholeskyPivoted{T}}} <: RegressionModel +struct CoxModel{T<:Real} <: RegressionModel aux::CoxAux{T} β::Vector{T} loglik::T score::Vector{T} - chol::C + vcov::Symmetric{T,Matrix{T}} end function StatsAPI.coeftable(obj::CoxModel) @@ -118,7 +118,7 @@ StatsAPI.dof(obj::CoxModel) = length(coef(obj)) StatsAPI.dof_residual(obj::CoxModel) = nobs(obj) - dof(obj) -StatsAPI.vcov(obj::CoxModel) = inv(obj.chol) +StatsAPI.vcov(obj::CoxModel) = obj.vcov StatsAPI.stderror(obj::CoxModel) = sqrt.(diag(vcov(obj))) @@ -201,19 +201,6 @@ function _cox_fgh!(β, grad, hes, c::CoxAux{T}) where T return y end -function _chol_me_maybe(A) - C = cholesky(A; check=false) - if issuccess(C) - return C - else - @static if VERSION >= v"1.8.0-DEV.1139" - return cholesky(A, RowMaximum()) - else - return cholesky(A, Val(true)) - end - end -end - _coxph(X::AbstractArray{<:Integer}, s::AbstractVector; tol, l2_cost) = _coxph(float(X), s; tol=tol, l2_cost=l2_cost) function _coxph(X::AbstractArray{T}, s::AbstractVector; l2_cost, tol) where T @@ -225,9 +212,10 @@ function _coxph(X::AbstractArray{T}, s::AbstractVector; l2_cost, tol) where T β = Optim.minimizer(res) neg_ll = minimum(res) grad = Optim.gradient(fgh!) - hes = Symmetric(Optim.hessian(fgh!)) - chol = _chol_me_maybe(hes) - return CoxModel{R,typeof(chol)}(c, β, -neg_ll, -grad, chol) + hes = Optim.hessian(fgh!) + chol = cholesky!(Symmetric(hes)) + vcov = Symmetric(LAPACK.potri!(chol.uplo, chol.factors), Symbol(chol.uplo)) + return CoxModel{R}(c, β, -neg_ll, -grad, vcov) end StatsModels.drop_intercept(::Type{CoxModel}) = true diff --git a/test/runtests.jl b/test/runtests.jl index 307a2be..f0f2894 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -205,7 +205,7 @@ end @test modelmatrix(outcome) == modelmatrix(outcome_without_formula) @test sprint(show, outcome_without_formula) == """ -CoxModel{Float64, Cholesky{Float64, Matrix{Float64}}} +CoxModel{Float64} Coefficients: ────────────────────────────────────────────── @@ -258,9 +258,7 @@ x7 0.0914971 0.0286485 3.19378 0.0014 @test dof(outcome) == 7 @test dof_residual(outcome) == 425 @test loglikelihood(outcome) > nullloglikelihood(outcome) - fisher_info = Matrix(outcome.model.chol) - @test all(x->x > 0, eigen(fisher_info).values) - @test fisher_info * vcov(outcome) ≈ I atol=1e-10 + @test isposdef(vcov(outcome)) @test norm(outcome.model.score) < 1e-5 @test hcat(outcome_coefmat.cols[1:3]...) ≈ expected_coefs[:,1:3] atol=1e-5 @test confint(outcome_from_matrix) ≈ expected_wald_intervals atol=1e-6