Skip to content

Commit

Permalink
New updates to improve package instability (#20)
Browse files Browse the repository at this point in the history
* Improve Fine-Gray model simulation

* Fine-Gray model: increase the number of the second outcome.

* Adding a new fold split function for given foldid for Fine and Gray model: to avoid discrepancy caused by different input format between Cox and FG.

* Bugfix for logistic regression 2-step procedure

* Improve stability for 2-stage selection when only 1 pair can be selected.

* Enable parallel computation for cross-validation.

* Fix bug in mcv.FLORAL and a.FLORAL

* bugfix for fine gray model with longitudinal covariates

* bugfix for finegray method

* Update readme file

* Update NEWS

* Increment version number to 0.2.0.9000
  • Loading branch information
tengfei-emory authored Jan 8, 2024
1 parent d10d714 commit 02277a8
Show file tree
Hide file tree
Showing 14 changed files with 474 additions and 194 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: FLORAL
Type: Package
Title: Fit Log-Ratio Lasso Regression for Compositional Data
Version: 0.2.0
Version: 0.2.0.9000
Date: 2023-07-04
Authors@R: c(
person("Teng", "Fei", , "[email protected]", role = c("aut", "cre", "cph"),
Expand Down
8 changes: 8 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
# FLORAL (development version)

* Improves stability when fitting Fine-Gray model with longitudinal covariates.

* Enables parallel computation for cross-validation.

* Fixes several bugs as reported in Issues.

# FLORAL 0.2.0

* Including more examples in document compared to CRAN version (0.1.0.9000)
Expand Down
34 changes: 24 additions & 10 deletions R/FLORAL.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#' @param a A scalar between 0 and 1: \code{a} is the weight for lasso penalty while \code{1-a} is the weight for ridge penalty.
#' @param mu Value of penalty for the augmented Lagrangian
#' @param ncv Folds of cross-validation. Use \code{NULL} if cross-validation is not wanted.
#' @param ncore Number of cores for parallel computing for cross-validation. Default is 1.
#' @param intercept \code{TRUE} or \code{FALSE}, indicating whether an intercept should be estimated.
#' @param foldid A vector of fold indicator. Default is \code{NULL}.
#' @param step2 \code{TRUE} or \code{FALSE}, indicating whether a second-stage feature selection for specific ratios should be performed for the features selected by the main lasso algorithm. Will only be performed if cross validation is enabled.
Expand Down Expand Up @@ -45,7 +46,7 @@
#' # fit <- FLORAL(dat$xcount,survival::Surv(dat$t,dat$d,type="mstate"),failcode=1,
#' # family="finegray",progress=FALSE,step2=FALSE)
#'
#' @import Rcpp ggplot2 survival glmnet dplyr
#' @import Rcpp ggplot2 survival glmnet dplyr doParallel foreach doRNG parallel
#' @importFrom survcomp concordance.index
#' @importFrom reshape melt
#' @importFrom utils combn
Expand All @@ -69,6 +70,7 @@ FLORAL <- function(x,
a=1,
mu=1,
ncv=5,
ncore=1,
intercept=FALSE,
foldid=NULL,
step2=TRUE,
Expand Down Expand Up @@ -103,7 +105,8 @@ FLORAL <- function(x,
foldid,
step2,
progress,
plot)
plot,
ncore=ncore)
}

}else if(family == "binomial"){
Expand All @@ -123,7 +126,8 @@ FLORAL <- function(x,
foldid,
step2,
progress,
plot)
plot,
ncore=ncore)
}

}else if(family == "cox"){
Expand Down Expand Up @@ -165,7 +169,8 @@ FLORAL <- function(x,
foldid,
step2,
progress,
plot)
plot,
ncore=ncore)

}

Expand All @@ -183,7 +188,8 @@ FLORAL <- function(x,
foldid,
step2,
progress,
plot)
plot,
ncore=ncore)

}
}else if (family == "finegray"){
Expand All @@ -210,9 +216,10 @@ FLORAL <- function(x,

if (is.null(failcode)){
warning("`failcode` is `NULL`. Using the first failure type as default")
df_FG <- finegray(Surv(tstart, tstop, dd) ~ ., id=id, data=xy, etype=failcode,timefix = FALSE)
failcode = 1
df_FG <- finegray(Surv(tstart, tstop, dd, type="mstate") ~ ., id=id, data=xy, etype=failcode,timefix = FALSE)
}else{
df_FG <- finegray(Surv(tstart, tstop, dd) ~ ., id=id, data=xy, etype=failcode,timefix = FALSE)
df_FG <- finegray(Surv(tstart, tstop, dd, type="mstate") ~ ., id=id, data=xy, etype=failcode,timefix = FALSE)
}

newx <- as.matrix(df_FG[,colnames(x)])
Expand All @@ -234,7 +241,8 @@ FLORAL <- function(x,
foldid,
step2,
progress,
plot)
plot,
ncore=ncore)

}

Expand Down Expand Up @@ -270,7 +278,8 @@ FLORAL <- function(x,
foldid,
step2,
progress,
plot)
plot,
ncore=ncore)

}

Expand Down Expand Up @@ -378,7 +387,7 @@ FLORAL <- function(x,
#' dat <- simu(n=50,p=30,model="linear")
#' fit <- mcv.FLORAL(mcv=2,ncore=1,x=dat$xcount,y=dat$y,ncv=2,progress=FALSE,step2=TRUE,plot=FALSE)
#'
#' @import Rcpp ggplot2 survival glmnet dplyr doParallel foreach doRNG parallel
#' @import Rcpp ggplot2 survival glmnet dplyr doParallel foreach parallel
#' @importFrom survcomp concordance.index
#' @importFrom reshape melt
#' @importFrom utils combn
Expand Down Expand Up @@ -444,6 +453,7 @@ mcv.FLORAL <- function(mcv=10,
a,
mu,
ncv,
ncore=1,
intercept,
foldid=NULL,
step2,
Expand Down Expand Up @@ -499,6 +509,7 @@ mcv.FLORAL <- function(mcv=10,
a,
mu,
ncv,
ncore=1,
intercept,
foldid=NULL,
step2,
Expand Down Expand Up @@ -715,6 +726,7 @@ a.FLORAL <- function(a=c(0.1,0.5,1),
a = a[i],
mu,
ncv,
ncore=1,
intercept,
foldid=foldid,
step2,
Expand Down Expand Up @@ -769,6 +781,7 @@ a.FLORAL <- function(a=c(0.1,0.5,1),
a = a[1],
mu,
ncv,
ncore=1,
intercept,
foldid=NULL,
step2,
Expand Down Expand Up @@ -796,6 +809,7 @@ a.FLORAL <- function(a=c(0.1,0.5,1),
a = a[i],
mu,
ncv,
ncore=1,
intercept,
foldid=fit0$foldid,
step2,
Expand Down
141 changes: 96 additions & 45 deletions R/LogRatioCoxLasso.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ LogRatioCoxLasso <- function(x,
plot=TRUE,
mcv="Deviance",
loop1=FALSE,
loop2=FALSE){
loop2=FALSE,
ncore=1){

ptm <- proc.time()

Expand Down Expand Up @@ -66,7 +67,7 @@ LogRatioCoxLasso <- function(x,
if (!is.null(ncv)){

cvdev <- matrix(0,nrow=length(lidx),ncol=ncv)
cvdevnull <- rep(0,ncol=ncv)
# cvdevnull <- rep(0,ncol=ncv)

if (is.null(foldid)){
labels <- coxsplity(as.matrix(y),ncv)
Expand All @@ -76,66 +77,112 @@ LogRatioCoxLasso <- function(x,

# labels <- caret::createFolds(factor(d),k=ncv)

for (cv in 1:ncv){
if (ncore == 1){

if (progress) cat(paste0("Algorithm running for cv dataset ",cv," out of ",ncv,": \n"))

# train.x <- x[-labels[[cv]],]
# train.d <- d[-labels[[cv]]]
# train.t <- t[-labels[[cv]]]
# test.x <- x[labels[[cv]],]
# test.d <- d[labels[[cv]]]
# test.t <- t[labels[[cv]]]

train.x <- x[labels!=cv,]
train.d <- d[labels!=cv]
train.t <- t[labels!=cv]
test.x <- x[labels==cv,]
test.d <- d[labels==cv]
test.t <- t[labels==cv]

cv.devnull <- 0
train.tj <- sort(train.t[train.d==1])
for (j in 1:length(train.tj)){
cv.devnull <- cv.devnull + log(sum(train.t >= train.tj[j]))
for (cv in 1:ncv){

if (progress) cat(paste0("Algorithm running for cv dataset ",cv," out of ",ncv,": \n"))

# train.x <- x[-labels[[cv]],]
# train.d <- d[-labels[[cv]]]
# train.t <- t[-labels[[cv]]]
# test.x <- x[labels[[cv]],]
# test.d <- d[labels[[cv]]]
# test.t <- t[labels[[cv]]]

train.x <- x[labels!=cv,]
train.d <- d[labels!=cv]
train.t <- t[labels!=cv]
test.x <- x[labels==cv,]
test.d <- d[labels==cv]
test.t <- t[labels==cv]

cv.devnull <- 0
train.tj <- sort(train.t[train.d==1])
for (j in 1:length(train.tj)){
cv.devnull <- cv.devnull + log(sum(train.t >= train.tj[j]))
}
cv.devnull <- 2*cv.devnull

cvfit <- cox_enet_al(train.x,train.t,train.d,train.tj,length(lidx),mu,100,lambda[lidx],wcov,a,adjust,ncov,cv.devnull,progress,loop1,loop2,notcv=FALSE)

cv.devnull <- 0
loglik <- rep(0,length(lidx))
linprod <- test.x %*% cvfit$beta

if (mcv == "Deviance"){

test.tj <- sort(test.t[test.d==1])
for (j in 1:length(test.tj)){
cv.devnull <- cv.devnull + log(sum(test.t >= test.tj[j]))
if (sum(test.t >= test.tj[j]) > 1){
cvdev[,cv] <- cvdev[,cv] + linprod[test.t == test.tj[j],] -
log(colSums(exp(linprod[test.t >= test.tj[j],])) + 1e-8)
}else if (sum(test.t >= test.tj[j]) == 1){
cvdev[,cv] <- cvdev[,cv] + linprod[test.t == test.tj[j],] -
log(exp(linprod[test.t >= test.tj[j],]) + 1e-8)
}
#- log(accu(link(widx))+1e-8)
}

# cvdevnull[cv] <- 2*cv.devnull
cvdev[,cv] <- -2*cvdev[,cv]

}else if (mcv == "Cindex"){

for (kk in 1:length(lidx)){
cvdev[kk,cv] <- 1-concordance.index(x=linprod[,kk],surv.time=test.t,surv.event=test.d)$c.index
}

}
# cvmse[,cv] <- apply(cbind(1,test.x) %*% rbind(t(cvfit$beta0),cvfit$beta),2,function(x) sum((test.y - exp(x)/(1+exp(x)))^2)/length(test.y))

}
cv.devnull <- 2*cv.devnull

cvfit <- cox_enet_al(train.x,train.t,train.d,train.tj,length(lidx),mu,100,lambda[lidx],wcov,a,adjust,ncov,cv.devnull,progress,loop1,loop2,notcv=FALSE)
}else if (ncore > 1){

cv.devnull <- 0
loglik <- rep(0,length(lidx))
linprod <- test.x %*% cvfit$beta
if (progress) warning(paste0("Using ", ncore ," core for cross-validation computation."))

if (mcv == "Deviance"){
cl <- makeCluster(ncore)
registerDoParallel(cl)

cvdev <- foreach(cv=1:ncv,.combine=cbind) %dopar% {

train.x <- x[labels!=cv,]
train.d <- d[labels!=cv]
train.t <- t[labels!=cv]
test.x <- x[labels==cv,]
test.d <- d[labels==cv]
test.t <- t[labels==cv]

cv.devnull <- 0
train.tj <- sort(train.t[train.d==1])
for (j in 1:length(train.tj)){
cv.devnull <- cv.devnull + log(sum(train.t >= train.tj[j]))
}
cv.devnull <- 2*cv.devnull

cvfit <- cox_enet_al(train.x,train.t,train.d,train.tj,length(lidx),mu,100,lambda[lidx],wcov,a,adjust,ncov,cv.devnull,FALSE,loop1,loop2,notcv=FALSE)

linprod <- test.x %*% cvfit$beta
cv.dev <- rep(0,length(lidx))

test.tj <- sort(test.t[test.d==1])
for (j in 1:length(test.tj)){
cv.devnull <- cv.devnull + log(sum(test.t >= test.tj[j]))
if (sum(test.t >= test.tj[j]) > 1){
cvdev[,cv] <- cvdev[,cv] + linprod[test.t == test.tj[j],] -
cv.dev <- cv.dev + linprod[test.t == test.tj[j],] -
log(colSums(exp(linprod[test.t >= test.tj[j],])) + 1e-8)
}else if (sum(test.t >= test.tj[j]) == 1){
cvdev[,cv] <- cvdev[,cv] + linprod[test.t == test.tj[j],] -
cv.dev <- cv.dev + linprod[test.t == test.tj[j],] -
log(exp(linprod[test.t >= test.tj[j],]) + 1e-8)
}
#- log(accu(link(widx))+1e-8)
}

cvdevnull[cv] <- 2*cv.devnull
cvdev[,cv] <- -2*cvdev[,cv]

}else if (mcv == "Cindex"){

for (kk in 1:length(lidx)){
cvdev[kk,cv] <- 1-concordance.index(x=linprod[,kk],surv.time=test.t,surv.event=test.d)$c.index
}

cv.dev <- -2*cv.dev
cv.dev
}



# cvmse[,cv] <- apply(cbind(1,test.x) %*% rbind(t(cvfit$beta0),cvfit$beta),2,function(x) sum((test.y - exp(x)/(1+exp(x)))^2)/length(test.y))
stopCluster(cl)

}

Expand Down Expand Up @@ -230,6 +277,8 @@ LogRatioCoxLasso <- function(x,
stepglmnet <- cv.glmnet(x=x.select.min,y=Surv(t,d),type.measure = "deviance",family="cox")
x.select.min <- x.select.min[,which(stepglmnet$glmnet.fit$beta[,stepglmnet$index[1]]!=0)]
idxs <- idxs[,which(stepglmnet$glmnet.fit$beta[,stepglmnet$index[1]]!=0)]
}else{
idxs <- as.vector(idxs)
}

df_step2 <- data.frame(t=t,d=d,x=x.select.min)
Expand Down Expand Up @@ -259,6 +308,8 @@ LogRatioCoxLasso <- function(x,
stepglmnet <- cv.glmnet(x=x.select.min,y=Surv(t,d),type.measure = "deviance",family="cox")
x.select.min <- x.select.min[,which(stepglmnet$glmnet.fit$beta[,stepglmnet$index[1]]!=0)]
idxs <- idxs[,which(stepglmnet$glmnet.fit$beta[,stepglmnet$index[1]]!=0)]
}else{
idxs <- as.vector(idxs)
}

df_step2 <- data.frame(t=t,d=d,x=x.select.min)
Expand Down
Loading

0 comments on commit 02277a8

Please sign in to comment.