From 2d969059c5b513f1ac432e9d089fafdeaaa35b86 Mon Sep 17 00:00:00 2001
From: Alex Arslan <ararslan@comcast.net>
Date: Sat, 9 Jul 2022 17:13:39 -0700
Subject: [PATCH 1/3] Store Cholesky of the OIM rather than separate matrices
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently we're storing both the (observed) Fisher information matrix
and its inverse, the variance-covariance matrix, separately in the
`CoxModel` object. Since we know the observed information is positive
semi-definite, we can instead store a (possibly pivoted) Cholesky
factorization of the information. That allows us to easily construct
both the original matrix and its inverse without needing to store both.

In the case of few parameters (like ≤3) and positive semi-definite
information but not positive definite, this will actually use a bit more
memory than storing the matrices separately. However, for any more
parameters, regardless of whether the information is positive definite,
this saves a good amount of memory, with the difference becoming more
pronounced with more parameters.
---
 src/cox.jl       | 28 ++++++++++++++++++++++------
 test/runtests.jl |  7 ++++---
 2 files changed, 26 insertions(+), 9 deletions(-)

diff --git a/src/cox.jl b/src/cox.jl
index 44ffcbf..61bfeab 100644
--- a/src/cox.jl
+++ b/src/cox.jl
@@ -78,13 +78,12 @@ end
 
 # Structure of Cox regression output
 
-struct CoxModel{T<:Real} <: RegressionModel
+struct CoxModel{T<:Real,C<:Union{Cholesky{T},CholeskyPivoted{T}}} <: RegressionModel
     aux::CoxAux{T}
     β::Vector{T}
     loglik::T
     score::Vector{T}
-    fischer_info::Matrix{T}
-    vcov::Matrix{T}
+    chol::C
 end
 
 function StatsAPI.coeftable(obj::CoxModel)
@@ -119,7 +118,7 @@ StatsAPI.dof(obj::CoxModel) = length(coef(obj))
 
 StatsAPI.dof_residual(obj::CoxModel) = nobs(obj) - dof(obj)
 
-StatsAPI.vcov(obj::CoxModel) = obj.vcov
+StatsAPI.vcov(obj::CoxModel) = inv(obj.chol)
 
 StatsAPI.stderror(obj::CoxModel) = sqrt.(diag(vcov(obj)))
 
@@ -193,6 +192,19 @@ function _cox_fgh!(β, grad, hes, c::CoxAux{T}) where T
     return y
 end
 
+function _chol_me_maybe(A)
+    C = cholesky(A; check=false)
+    if issuccess(C)
+        return C
+    else
+        @static if VERSION >= v"1.8.0-DEV.1139"
+            return cholesky(A, RowMaximum())
+        else
+            return cholesky(A, Val(true))
+        end
+    end
+end
+
 _coxph(X::AbstractArray{<:Integer}, s::AbstractVector; tol, l2_cost) = _coxph(float(X), s; tol=tol, l2_cost=l2_cost)
 
 function _coxph(X::AbstractArray{T}, s::AbstractVector; l2_cost, tol) where T
@@ -201,8 +213,12 @@ function _coxph(X::AbstractArray{T}, s::AbstractVector; l2_cost, tol) where T
     β₀ = zeros(R, size(X, 2))
     fgh! = TwiceDifferentiable(Optim.only_fgh!((f, G, H, x)->_cox_fgh!(x, G, H, c)), β₀)
     res = optimize(fgh!, β₀, NewtonTrustRegion(), Optim.Options(g_tol = tol))
-    β, neg_ll, grad, hes = Optim.minimizer(res), Optim.minimum(res), Optim.gradient(fgh!), Optim.hessian(fgh!)
-    return CoxModel{R}(c, β, -neg_ll, -grad, hes, pinv(hes))
+    β = Optim.minimizer(res)
+    neg_ll = minimum(res)
+    grad = Optim.gradient(fgh!)
+    hes = Symmetric(Optim.hessian(fgh!))
+    chol = _chol_me_maybe(hes)
+    return CoxModel{R,typeof(chol)}(c, β, -neg_ll, -grad, chol)
 end
 
 StatsModels.drop_intercept(::Type{CoxModel}) = true
diff --git a/test/runtests.jl b/test/runtests.jl
index 94e2e46..5ff619d 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -201,7 +201,7 @@ end
     @test modelmatrix(outcome) == modelmatrix(outcome_without_formula)
 
     @test sprint(show, outcome_without_formula) == """
-CoxModel{Float64}
+CoxModel{Float64, Cholesky{Float64, Matrix{Float64}}}
 
 Coefficients:
 ──────────────────────────────────────────────
@@ -241,8 +241,9 @@ x7   0.0914971  0.0286485   3.19378     0.0014
     @test dof(outcome) == 7
     @test dof_residual(outcome) == 425
     @test loglikelihood(outcome) > nullloglikelihood(outcome)
-    @test all(x->x > 0, eigen(outcome.model.fischer_info).values)
-    @test outcome.model.fischer_info * vcov(outcome) ≈ I atol=1e-10
+    fisher_info = Matrix(outcome.model.chol)
+    @test all(x->x > 0, eigen(fisher_info).values)
+    @test fisher_info * vcov(outcome) ≈ I atol=1e-10
     @test norm(outcome.model.score) < 1e-5
     @test hcat(outcome_coefmat.cols[1:3]...) ≈ expected_coefs[:,1:3] atol=1e-5
 

From d2f0563091600ef82afc38ff561aabe59965d882 Mon Sep 17 00:00:00 2001
From: Alex Arslan <ararslan@comcast.net>
Date: Tue, 26 Jul 2022 15:24:16 -0700
Subject: [PATCH 2/3] Check for collinearity when fitting a Cox model

---
 src/cox.jl       | 3 +++
 test/runtests.jl | 4 ++++
 2 files changed, 7 insertions(+)

diff --git a/src/cox.jl b/src/cox.jl
index 178000c..0b56c3d 100644
--- a/src/cox.jl
+++ b/src/cox.jl
@@ -240,6 +240,9 @@ Cox proportional hazard model estimate of coefficients. Returns a `CoxModel`
 object.
 """
 function StatsAPI.fit(::Type{CoxModel}, M::AbstractMatrix, y::AbstractVector; tol=1e-4, l2_cost=0)
+    if rank(M) < min(size(M)...)
+        throw(ArgumentError("model matrix is not full rank; some terms may be collinear"))
+    end
     index_perm = sortperm(y)
     X = M[index_perm,:]
     s = y[index_perm]
diff --git a/test/runtests.jl b/test/runtests.jl
index f1284ef..307a2be 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -280,6 +280,10 @@ x7   0.0914971  0.0286485   3.19378     0.0014
     outcome_fincatracecat = coxph(@formula(event ~ fin * race), rossi; tol=1e-8)
     @test coeftable(outcome_fincatracecat).rownms == ["fin: 1", "race: 1","fin: 1 & race: 1"]
     @test coef(outcome_fincatracecat) ≈ coef(outcome_finrace) atol=1e-8
+
+    transform!(rossi, :age => ByRow(age -> 2 * age) => :age_times_two)
+    @test_throws ArgumentError fit(CoxModel, @formula(event ~ fin + age + age_times_two),
+                                   rossi; tol=1e-8)
 end
 
 @testset "EventTable" begin

From e2810dbc719b6d1305ba020f11dd2c635ffb2864 Mon Sep 17 00:00:00 2001
From: Alex Arslan <ararslan@comcast.net>
Date: Tue, 26 Jul 2022 15:42:04 -0700
Subject: [PATCH 3/3] Store the covariance matrix only, not the information

And store it as a matrix rather than a decomposition
---
 src/cox.jl       | 26 +++++++-------------------
 test/runtests.jl |  6 ++----
 2 files changed, 9 insertions(+), 23 deletions(-)

diff --git a/src/cox.jl b/src/cox.jl
index 0b56c3d..e9ac1cc 100644
--- a/src/cox.jl
+++ b/src/cox.jl
@@ -78,12 +78,12 @@ end
 
 # Structure of Cox regression output
 
-struct CoxModel{T<:Real,C<:Union{Cholesky{T},CholeskyPivoted{T}}} <: RegressionModel
+struct CoxModel{T<:Real} <: RegressionModel
     aux::CoxAux{T}
     β::Vector{T}
     loglik::T
     score::Vector{T}
-    chol::C
+    vcov::Symmetric{T,Matrix{T}}
 end
 
 function StatsAPI.coeftable(obj::CoxModel)
@@ -118,7 +118,7 @@ StatsAPI.dof(obj::CoxModel) = length(coef(obj))
 
 StatsAPI.dof_residual(obj::CoxModel) = nobs(obj) - dof(obj)
 
-StatsAPI.vcov(obj::CoxModel) = inv(obj.chol)
+StatsAPI.vcov(obj::CoxModel) = obj.vcov
 
 StatsAPI.stderror(obj::CoxModel) = sqrt.(diag(vcov(obj)))
 
@@ -201,19 +201,6 @@ function _cox_fgh!(β, grad, hes, c::CoxAux{T}) where T
     return y
 end
 
-function _chol_me_maybe(A)
-    C = cholesky(A; check=false)
-    if issuccess(C)
-        return C
-    else
-        @static if VERSION >= v"1.8.0-DEV.1139"
-            return cholesky(A, RowMaximum())
-        else
-            return cholesky(A, Val(true))
-        end
-    end
-end
-
 _coxph(X::AbstractArray{<:Integer}, s::AbstractVector; tol, l2_cost) = _coxph(float(X), s; tol=tol, l2_cost=l2_cost)
 
 function _coxph(X::AbstractArray{T}, s::AbstractVector; l2_cost, tol) where T
@@ -225,9 +212,10 @@ function _coxph(X::AbstractArray{T}, s::AbstractVector; l2_cost, tol) where T
     β = Optim.minimizer(res)
     neg_ll = minimum(res)
     grad = Optim.gradient(fgh!)
-    hes = Symmetric(Optim.hessian(fgh!))
-    chol = _chol_me_maybe(hes)
-    return CoxModel{R,typeof(chol)}(c, β, -neg_ll, -grad, chol)
+    hes = Optim.hessian(fgh!)
+    chol = cholesky!(Symmetric(hes))
+    vcov = Symmetric(LAPACK.potri!(chol.uplo, chol.factors), Symbol(chol.uplo))
+    return CoxModel{R}(c, β, -neg_ll, -grad, vcov)
 end
 
 StatsModels.drop_intercept(::Type{CoxModel}) = true
diff --git a/test/runtests.jl b/test/runtests.jl
index 307a2be..f0f2894 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -205,7 +205,7 @@ end
     @test modelmatrix(outcome) == modelmatrix(outcome_without_formula)
 
     @test sprint(show, outcome_without_formula) == """
-CoxModel{Float64, Cholesky{Float64, Matrix{Float64}}}
+CoxModel{Float64}
 
 Coefficients:
 ──────────────────────────────────────────────
@@ -258,9 +258,7 @@ x7   0.0914971  0.0286485   3.19378     0.0014
     @test dof(outcome) == 7
     @test dof_residual(outcome) == 425
     @test loglikelihood(outcome) > nullloglikelihood(outcome)
-    fisher_info = Matrix(outcome.model.chol)
-    @test all(x->x > 0, eigen(fisher_info).values)
-    @test fisher_info * vcov(outcome) ≈ I atol=1e-10
+    @test isposdef(vcov(outcome))
     @test norm(outcome.model.score) < 1e-5
     @test hcat(outcome_coefmat.cols[1:3]...) ≈ expected_coefs[:,1:3] atol=1e-5
     @test confint(outcome_from_matrix) ≈ expected_wald_intervals atol=1e-6