diff --git a/DESCRIPTION b/DESCRIPTION
index d3c61b5..4d197dc 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
Package: FLORAL
Type: Package
Title: Fit Log-Ratio Lasso Regression for Compositional Data
-Version: 0.2.0
+Version: 0.2.0.9000
Date: 2023-07-04
Authors@R: c(
person("Teng", "Fei", , "feit1@mskcc.org", role = c("aut", "cre", "cph"),
diff --git a/NEWS.md b/NEWS.md
index a3c94e5..09160bb 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,11 @@
+# FLORAL (development version)
+
+* Improves stability when fitting Fine-Gray model with longitudinal covariates.
+
+* Enables parallel computation for cross-validation.
+
+* Fixes several bugs as reported in Issues.
+
# FLORAL 0.2.0
* Including more examples in document compared to CRAN version (0.1.0.9000)
diff --git a/R/FLORAL.R b/R/FLORAL.R
index f98655b..6498dd1 100644
--- a/R/FLORAL.R
+++ b/R/FLORAL.R
@@ -15,6 +15,7 @@
#' @param a A scalar between 0 and 1: \code{a} is the weight for lasso penalty while \code{1-a} is the weight for ridge penalty.
#' @param mu Value of penalty for the augmented Lagrangian
#' @param ncv Folds of cross-validation. Use \code{NULL} if cross-validation is not wanted.
+#' @param ncore Number of cores for parallel computing for cross-validation. Default is 1.
#' @param intercept \code{TRUE} or \code{FALSE}, indicating whether an intercept should be estimated.
#' @param foldid A vector of fold indicator. Default is \code{NULL}.
#' @param step2 \code{TRUE} or \code{FALSE}, indicating whether a second-stage feature selection for specific ratios should be performed for the features selected by the main lasso algorithm. Will only be performed if cross validation is enabled.
@@ -45,7 +46,7 @@
#' # fit <- FLORAL(dat$xcount,survival::Surv(dat$t,dat$d,type="mstate"),failcode=1,
#' # family="finegray",progress=FALSE,step2=FALSE)
#'
-#' @import Rcpp ggplot2 survival glmnet dplyr
+#' @import Rcpp ggplot2 survival glmnet dplyr doParallel foreach doRNG parallel
#' @importFrom survcomp concordance.index
#' @importFrom reshape melt
#' @importFrom utils combn
@@ -69,6 +70,7 @@ FLORAL <- function(x,
a=1,
mu=1,
ncv=5,
+ ncore=1,
intercept=FALSE,
foldid=NULL,
step2=TRUE,
@@ -103,7 +105,8 @@ FLORAL <- function(x,
foldid,
step2,
progress,
- plot)
+ plot,
+ ncore=ncore)
}
}else if(family == "binomial"){
@@ -123,7 +126,8 @@ FLORAL <- function(x,
foldid,
step2,
progress,
- plot)
+ plot,
+ ncore=ncore)
}
}else if(family == "cox"){
@@ -165,7 +169,8 @@ FLORAL <- function(x,
foldid,
step2,
progress,
- plot)
+ plot,
+ ncore=ncore)
}
@@ -183,7 +188,8 @@ FLORAL <- function(x,
foldid,
step2,
progress,
- plot)
+ plot,
+ ncore=ncore)
}
}else if (family == "finegray"){
@@ -210,9 +216,10 @@ FLORAL <- function(x,
if (is.null(failcode)){
warning("`failcode` is `NULL`. Using the first failure type as default")
- df_FG <- finegray(Surv(tstart, tstop, dd) ~ ., id=id, data=xy, etype=failcode,timefix = FALSE)
+ failcode = 1
+ df_FG <- finegray(Surv(tstart, tstop, dd, type="mstate") ~ ., id=id, data=xy, etype=failcode,timefix = FALSE)
}else{
- df_FG <- finegray(Surv(tstart, tstop, dd) ~ ., id=id, data=xy, etype=failcode,timefix = FALSE)
+ df_FG <- finegray(Surv(tstart, tstop, dd, type="mstate") ~ ., id=id, data=xy, etype=failcode,timefix = FALSE)
}
newx <- as.matrix(df_FG[,colnames(x)])
@@ -234,7 +241,8 @@ FLORAL <- function(x,
foldid,
step2,
progress,
- plot)
+ plot,
+ ncore=ncore)
}
@@ -270,7 +278,8 @@ FLORAL <- function(x,
foldid,
step2,
progress,
- plot)
+ plot,
+ ncore=ncore)
}
@@ -378,7 +387,7 @@ FLORAL <- function(x,
#' dat <- simu(n=50,p=30,model="linear")
#' fit <- mcv.FLORAL(mcv=2,ncore=1,x=dat$xcount,y=dat$y,ncv=2,progress=FALSE,step2=TRUE,plot=FALSE)
#'
-#' @import Rcpp ggplot2 survival glmnet dplyr doParallel foreach doRNG parallel
+#' @import Rcpp ggplot2 survival glmnet dplyr doParallel foreach parallel
#' @importFrom survcomp concordance.index
#' @importFrom reshape melt
#' @importFrom utils combn
@@ -444,6 +453,7 @@ mcv.FLORAL <- function(mcv=10,
a,
mu,
ncv,
+ ncore=1,
intercept,
foldid=NULL,
step2,
@@ -499,6 +509,7 @@ mcv.FLORAL <- function(mcv=10,
a,
mu,
ncv,
+ ncore=1,
intercept,
foldid=NULL,
step2,
@@ -715,6 +726,7 @@ a.FLORAL <- function(a=c(0.1,0.5,1),
a = a[i],
mu,
ncv,
+ ncore=1,
intercept,
foldid=foldid,
step2,
@@ -769,6 +781,7 @@ a.FLORAL <- function(a=c(0.1,0.5,1),
a = a[1],
mu,
ncv,
+ ncore=1,
intercept,
foldid=NULL,
step2,
@@ -796,6 +809,7 @@ a.FLORAL <- function(a=c(0.1,0.5,1),
a = a[i],
mu,
ncv,
+ ncore=1,
intercept,
foldid=fit0$foldid,
step2,
diff --git a/R/LogRatioCoxLasso.R b/R/LogRatioCoxLasso.R
index f4dc05b..e735cf6 100644
--- a/R/LogRatioCoxLasso.R
+++ b/R/LogRatioCoxLasso.R
@@ -13,7 +13,8 @@ LogRatioCoxLasso <- function(x,
plot=TRUE,
mcv="Deviance",
loop1=FALSE,
- loop2=FALSE){
+ loop2=FALSE,
+ ncore=1){
ptm <- proc.time()
@@ -66,7 +67,7 @@ LogRatioCoxLasso <- function(x,
if (!is.null(ncv)){
cvdev <- matrix(0,nrow=length(lidx),ncol=ncv)
- cvdevnull <- rep(0,ncol=ncv)
+ # cvdevnull <- rep(0,ncol=ncv)
if (is.null(foldid)){
labels <- coxsplity(as.matrix(y),ncv)
@@ -76,66 +77,112 @@ LogRatioCoxLasso <- function(x,
# labels <- caret::createFolds(factor(d),k=ncv)
- for (cv in 1:ncv){
+ if (ncore == 1){
- if (progress) cat(paste0("Algorithm running for cv dataset ",cv," out of ",ncv,": \n"))
-
- # train.x <- x[-labels[[cv]],]
- # train.d <- d[-labels[[cv]]]
- # train.t <- t[-labels[[cv]]]
- # test.x <- x[labels[[cv]],]
- # test.d <- d[labels[[cv]]]
- # test.t <- t[labels[[cv]]]
-
- train.x <- x[labels!=cv,]
- train.d <- d[labels!=cv]
- train.t <- t[labels!=cv]
- test.x <- x[labels==cv,]
- test.d <- d[labels==cv]
- test.t <- t[labels==cv]
-
- cv.devnull <- 0
- train.tj <- sort(train.t[train.d==1])
- for (j in 1:length(train.tj)){
- cv.devnull <- cv.devnull + log(sum(train.t >= train.tj[j]))
+ for (cv in 1:ncv){
+
+ if (progress) cat(paste0("Algorithm running for cv dataset ",cv," out of ",ncv,": \n"))
+
+ # train.x <- x[-labels[[cv]],]
+ # train.d <- d[-labels[[cv]]]
+ # train.t <- t[-labels[[cv]]]
+ # test.x <- x[labels[[cv]],]
+ # test.d <- d[labels[[cv]]]
+ # test.t <- t[labels[[cv]]]
+
+ train.x <- x[labels!=cv,]
+ train.d <- d[labels!=cv]
+ train.t <- t[labels!=cv]
+ test.x <- x[labels==cv,]
+ test.d <- d[labels==cv]
+ test.t <- t[labels==cv]
+
+ cv.devnull <- 0
+ train.tj <- sort(train.t[train.d==1])
+ for (j in 1:length(train.tj)){
+ cv.devnull <- cv.devnull + log(sum(train.t >= train.tj[j]))
+ }
+ cv.devnull <- 2*cv.devnull
+
+ cvfit <- cox_enet_al(train.x,train.t,train.d,train.tj,length(lidx),mu,100,lambda[lidx],wcov,a,adjust,ncov,cv.devnull,progress,loop1,loop2,notcv=FALSE)
+
+ cv.devnull <- 0
+ loglik <- rep(0,length(lidx))
+ linprod <- test.x %*% cvfit$beta
+
+ if (mcv == "Deviance"){
+
+ test.tj <- sort(test.t[test.d==1])
+ for (j in 1:length(test.tj)){
+ cv.devnull <- cv.devnull + log(sum(test.t >= test.tj[j]))
+ if (sum(test.t >= test.tj[j]) > 1){
+ cvdev[,cv] <- cvdev[,cv] + linprod[test.t == test.tj[j],] -
+ log(colSums(exp(linprod[test.t >= test.tj[j],])) + 1e-8)
+ }else if (sum(test.t >= test.tj[j]) == 1){
+ cvdev[,cv] <- cvdev[,cv] + linprod[test.t == test.tj[j],] -
+ log(exp(linprod[test.t >= test.tj[j],]) + 1e-8)
+ }
+ #- log(accu(link(widx))+1e-8)
+ }
+
+ # cvdevnull[cv] <- 2*cv.devnull
+ cvdev[,cv] <- -2*cvdev[,cv]
+
+ }else if (mcv == "Cindex"){
+
+ for (kk in 1:length(lidx)){
+ cvdev[kk,cv] <- 1-concordance.index(x=linprod[,kk],surv.time=test.t,surv.event=test.d)$c.index
+ }
+
+ }
+ # cvmse[,cv] <- apply(cbind(1,test.x) %*% rbind(t(cvfit$beta0),cvfit$beta),2,function(x) sum((test.y - exp(x)/(1+exp(x)))^2)/length(test.y))
+
}
- cv.devnull <- 2*cv.devnull
- cvfit <- cox_enet_al(train.x,train.t,train.d,train.tj,length(lidx),mu,100,lambda[lidx],wcov,a,adjust,ncov,cv.devnull,progress,loop1,loop2,notcv=FALSE)
+ }else if (ncore > 1){
- cv.devnull <- 0
- loglik <- rep(0,length(lidx))
- linprod <- test.x %*% cvfit$beta
+ if (progress) warning(paste0("Using ", ncore ," core for cross-validation computation."))
- if (mcv == "Deviance"){
+ cl <- makeCluster(ncore)
+ registerDoParallel(cl)
+
+ cvdev <- foreach(cv=1:ncv,.combine=cbind) %dopar% {
+
+ train.x <- x[labels!=cv,]
+ train.d <- d[labels!=cv]
+ train.t <- t[labels!=cv]
+ test.x <- x[labels==cv,]
+ test.d <- d[labels==cv]
+ test.t <- t[labels==cv]
+
+ cv.devnull <- 0
+ train.tj <- sort(train.t[train.d==1])
+ for (j in 1:length(train.tj)){
+ cv.devnull <- cv.devnull + log(sum(train.t >= train.tj[j]))
+ }
+ cv.devnull <- 2*cv.devnull
+
+ cvfit <- cox_enet_al(train.x,train.t,train.d,train.tj,length(lidx),mu,100,lambda[lidx],wcov,a,adjust,ncov,cv.devnull,FALSE,loop1,loop2,notcv=FALSE)
+
+ linprod <- test.x %*% cvfit$beta
+ cv.dev <- rep(0,length(lidx))
test.tj <- sort(test.t[test.d==1])
for (j in 1:length(test.tj)){
- cv.devnull <- cv.devnull + log(sum(test.t >= test.tj[j]))
if (sum(test.t >= test.tj[j]) > 1){
- cvdev[,cv] <- cvdev[,cv] + linprod[test.t == test.tj[j],] -
+ cv.dev <- cv.dev + linprod[test.t == test.tj[j],] -
log(colSums(exp(linprod[test.t >= test.tj[j],])) + 1e-8)
}else if (sum(test.t >= test.tj[j]) == 1){
- cvdev[,cv] <- cvdev[,cv] + linprod[test.t == test.tj[j],] -
+ cv.dev <- cv.dev + linprod[test.t == test.tj[j],] -
log(exp(linprod[test.t >= test.tj[j],]) + 1e-8)
}
- #- log(accu(link(widx))+1e-8)
- }
-
- cvdevnull[cv] <- 2*cv.devnull
- cvdev[,cv] <- -2*cvdev[,cv]
-
- }else if (mcv == "Cindex"){
-
- for (kk in 1:length(lidx)){
- cvdev[kk,cv] <- 1-concordance.index(x=linprod[,kk],surv.time=test.t,surv.event=test.d)$c.index
}
+ cv.dev <- -2*cv.dev
+ cv.dev
}
-
-
- # cvmse[,cv] <- apply(cbind(1,test.x) %*% rbind(t(cvfit$beta0),cvfit$beta),2,function(x) sum((test.y - exp(x)/(1+exp(x)))^2)/length(test.y))
+ stopCluster(cl)
}
@@ -230,6 +277,8 @@ LogRatioCoxLasso <- function(x,
stepglmnet <- cv.glmnet(x=x.select.min,y=Surv(t,d),type.measure = "deviance",family="cox")
x.select.min <- x.select.min[,which(stepglmnet$glmnet.fit$beta[,stepglmnet$index[1]]!=0)]
idxs <- idxs[,which(stepglmnet$glmnet.fit$beta[,stepglmnet$index[1]]!=0)]
+ }else{
+ idxs <- as.vector(idxs)
}
df_step2 <- data.frame(t=t,d=d,x=x.select.min)
@@ -259,6 +308,8 @@ LogRatioCoxLasso <- function(x,
stepglmnet <- cv.glmnet(x=x.select.min,y=Surv(t,d),type.measure = "deviance",family="cox")
x.select.min <- x.select.min[,which(stepglmnet$glmnet.fit$beta[,stepglmnet$index[1]]!=0)]
idxs <- idxs[,which(stepglmnet$glmnet.fit$beta[,stepglmnet$index[1]]!=0)]
+ }else{
+ idxs <- as.vector(idxs)
}
df_step2 <- data.frame(t=t,d=d,x=x.select.min)
diff --git a/R/LogRatioFGLasso.R b/R/LogRatioFGLasso.R
index 973f243..9ace4ea 100644
--- a/R/LogRatioFGLasso.R
+++ b/R/LogRatioFGLasso.R
@@ -13,7 +13,8 @@ LogRatioFGLasso <- function(x,
step2=FALSE,
progress=TRUE,
plot=TRUE,
- mcv="Deviance"){
+ mcv="Deviance",
+ ncore=1){
ptm <- proc.time()
@@ -78,70 +79,127 @@ LogRatioFGLasso <- function(x,
if (is.null(foldid)){
labels <- coxsplitss(as.matrix(y),id,ncv)
}else{
- labels <- foldid
+ labels <- fgfoldid(id,foldid)
}
# labels <- caret::createFolds(factor(d),k=ncv)
- for (cv in 1:ncv){
+ if (ncore == 1){
- if (progress) cat(paste0("Algorithm running for cv dataset ",cv," out of ",ncv,": \n"))
-
- # train.x <- x[-labels[[cv]],]
- # train.d <- d[-labels[[cv]]]
- # train.t <- t[-labels[[cv]]]
- # test.x <- x[labels[[cv]],]
- # test.d <- d[labels[[cv]]]
- # test.t <- t[labels[[cv]]]
-
- train.x <- x[labels!=cv,]
- train.d <- d[labels!=cv]
- train.t0 <- t0[labels!=cv]
- train.t1 <- t1[labels!=cv]
- train.tt <- y[labels!=cv,1:2]
- train.w <- weight[labels!=cv]
-
- test.x <- x[labels==cv,]
- test.d <- d[labels==cv]
- test.t0 <- t0[labels==cv]
- test.t1 <- t1[labels==cv]
- test.tt <- y[labels==cv,1:2]
- test.w <- weight[labels==cv]
-
- cv.devnull <- 0
- train.tj <- sort(train.t1[train.d==1])
- for (j in 1:length(train.tj)){
- cv.devnull <- cv.devnull + log(sum(train.t1 >= train.tj[1] & train.t0 < train.tj[1]))
+ for (cv in 1:ncv){
+
+ if (progress) cat(paste0("Algorithm running for cv dataset ",cv," out of ",ncv,": \n"))
+
+ # train.x <- x[-labels[[cv]],]
+ # train.d <- d[-labels[[cv]]]
+ # train.t <- t[-labels[[cv]]]
+ # test.x <- x[labels[[cv]],]
+ # test.d <- d[labels[[cv]]]
+ # test.t <- t[labels[[cv]]]
+
+ train.x <- x[labels!=cv,]
+ train.d <- d[labels!=cv]
+ train.t0 <- t0[labels!=cv]
+ train.t1 <- t1[labels!=cv]
+ train.tt <- y[labels!=cv,1:2]
+ train.w <- weight[labels!=cv]
+
+ test.x <- x[labels==cv,]
+ test.d <- d[labels==cv]
+ test.t0 <- t0[labels==cv]
+ test.t1 <- t1[labels==cv]
+ test.tt <- y[labels==cv,1:2]
+ test.w <- weight[labels==cv]
+
+ cv.devnull <- 0
+ train.tj <- sort(train.t1[train.d==1])
+ for (j in 1:length(train.tj)){
+ cv.devnull <- cv.devnull + log(sum(train.t1 >= train.tj[1] & train.t0 < train.tj[1]))
+ }
+ cv.devnull <- 2*cv.devnull
+
+ cvfit <- fg_enet_al(train.x,train.t0,train.t1,train.d,train.tj,train.w,length(lidx),mu,100,lambda[lidx],wcov,a,adjust,ncov,cv.devnull,progress)
+
+ cv.devnull <- 0
+ loglik <- rep(0,length(lidx))
+ linprod <- test.w * (test.x %*% cvfit$beta)
+
+ if (mcv == "Deviance"){
+
+ test.tj <- sort(test.t1[test.d==1])
+ for (j in 1:length(test.tj)){
+ cv.devnull <- cv.devnull + log(sum(test.t1 >= test.tj[j] & test.t0 < test.tj[j]))
+ if (sum(test.t1 >= test.tj[j] & test.t0 < test.tj[j]) > 1){
+ cvdev[,cv] <- cvdev[,cv] + linprod[test.t1 == test.tj[j],] -
+ log(colSums(exp(linprod[test.t1 >= test.tj[j] & test.t0 < test.tj[j],])) + 1e-8)
+ }else if (sum(test.t1 >= test.tj[j] & test.t0 < test.tj[j]) == 1){
+ cvdev[,cv] <- cvdev[,cv] + linprod[test.t1 == test.tj[j],] -
+ log(exp(linprod[test.t1 >= test.tj[j] & test.t0 < test.tj[j],]) + 1e-8)
+ }
+ #- log(accu(link(widx))+1e-8)
+ }
+
+ cvdevnull[cv] <- 2*cv.devnull
+ cvdev[,cv] <- -2*cvdev[,cv]
+
+ }
+
+ # cvmse[,cv] <- apply(cbind(1,test.x) %*% rbind(t(cvfit$beta0),cvfit$beta),2,function(x) sum((test.y - exp(x)/(1+exp(x)))^2)/length(test.y))
+
}
- cv.devnull <- 2*cv.devnull
- cvfit <- fg_enet_al(train.x,train.t0,train.t1,train.d,train.tj,train.w,length(lidx),mu,100,lambda[lidx],wcov,a,adjust,ncov,cv.devnull,progress)
+ }else if(ncore > 1){
+
+ if (progress) warning(paste0("Using ", ncore ," core for cross-validation computation."))
- cv.devnull <- 0
- loglik <- rep(0,length(lidx))
- linprod <- test.w * (test.x %*% cvfit$beta)
+ cl <- makeCluster(ncore)
+ registerDoParallel(cl)
- if (mcv == "Deviance"){
+ cvdev <- foreach(cv=1:ncv,.combine=cbind) %dopar% {
+
+ train.x <- x[labels!=cv,]
+ train.d <- d[labels!=cv]
+ train.t0 <- t0[labels!=cv]
+ train.t1 <- t1[labels!=cv]
+ train.tt <- y[labels!=cv,1:2]
+ train.w <- weight[labels!=cv]
+
+ test.x <- x[labels==cv,]
+ test.d <- d[labels==cv]
+ test.t0 <- t0[labels==cv]
+ test.t1 <- t1[labels==cv]
+ test.tt <- y[labels==cv,1:2]
+ test.w <- weight[labels==cv]
+
+ cv.devnull <- 0
+ train.tj <- sort(train.t1[train.d==1])
+ for (j in 1:length(train.tj)){
+ cv.devnull <- cv.devnull + log(sum(train.t1 >= train.tj[1] & train.t0 < train.tj[1]))
+ }
+ cv.devnull <- 2*cv.devnull
+
+ cvfit <- fg_enet_al(train.x,train.t0,train.t1,train.d,train.tj,train.w,length(lidx),mu,100,lambda[lidx],wcov,a,adjust,ncov,cv.devnull,FALSE)
+
+ cv.dev <- rep(0,length(lidx))
+ linprod <- test.w * (test.x %*% cvfit$beta)
test.tj <- sort(test.t1[test.d==1])
for (j in 1:length(test.tj)){
- cv.devnull <- cv.devnull + log(sum(test.t1 >= test.tj[j] & test.t0 < test.tj[j]))
if (sum(test.t1 >= test.tj[j] & test.t0 < test.tj[j]) > 1){
- cvdev[,cv] <- cvdev[,cv] + linprod[test.t1 == test.tj[j],] -
+ cv.dev <- cv.dev + linprod[test.t1 == test.tj[j],] -
log(colSums(exp(linprod[test.t1 >= test.tj[j] & test.t0 < test.tj[j],])) + 1e-8)
}else if (sum(test.t1 >= test.tj[j] & test.t0 < test.tj[j]) == 1){
- cvdev[,cv] <- cvdev[,cv] + linprod[test.t1 == test.tj[j],] -
+ cv.dev <- cv.dev + linprod[test.t1 == test.tj[j],] -
log(exp(linprod[test.t1 >= test.tj[j] & test.t0 < test.tj[j],]) + 1e-8)
}
- #- log(accu(link(widx))+1e-8)
}
- cvdevnull[cv] <- 2*cv.devnull
- cvdev[,cv] <- -2*cvdev[,cv]
+ cv.dev <- -2*cv.dev
+ cv.dev
}
- # cvmse[,cv] <- apply(cbind(1,test.x) %*% rbind(t(cvfit$beta0),cvfit$beta),2,function(x) sum((test.y - exp(x)/(1+exp(x)))^2)/length(test.y))
+ stopCluster(cl)
}
@@ -238,6 +296,8 @@ LogRatioFGLasso <- function(x,
stepglmnet <- suppressWarnings(cv.glmnet(x=x.select.min,y=Surv(t0,t1,d),weights=weight,type.measure = "deviance",family="cox"))
x.select.min <- x.select.min[,which(stepglmnet$glmnet.fit$beta[,stepglmnet$index[1]]!=0)]
idxs <- idxs[,which(stepglmnet$glmnet.fit$beta[,stepglmnet$index[1]]!=0)]
+ }else{
+ idxs <- as.vector(idxs)
}
df_step2 <- data.frame(t0=t0,t1=t1,d=d,x=x.select.min)
@@ -272,6 +332,8 @@ LogRatioFGLasso <- function(x,
stepglmnet <- suppressWarnings(cv.glmnet(x=x.select.min,y=Surv(t0,t1,d),weights=weight,type.measure = "deviance",family="cox"))
x.select.min <- x.select.min[,which(stepglmnet$glmnet.fit$beta[,stepglmnet$index[1]]!=0)]
idxs <- idxs[,which(stepglmnet$glmnet.fit$beta[,stepglmnet$index[1]]!=0)]
+ }else{
+ idxs <- as.vector(idxs)
}
df_step2 <- data.frame(t0=t0,t1=t1,d=d,x=x.select.min)
diff --git a/R/LogRatioLasso.R b/R/LogRatioLasso.R
index 269c065..0841888 100644
--- a/R/LogRatioLasso.R
+++ b/R/LogRatioLasso.R
@@ -11,7 +11,8 @@ LogRatioLasso <- function(x,
foldid=NULL,
step2=FALSE,
progress=TRUE,
- plot=TRUE){
+ plot=TRUE,
+ ncore=1){
ptm <- proc.time()
@@ -58,22 +59,47 @@ LogRatioLasso <- function(x,
labels <- foldid
}
- for (cv in 1:ncv){
+ if (ncore == 1){
- if (progress) cat(paste0("Algorithm running for cv dataset ",cv," out of ",ncv,": \n"))
+ for (cv in 1:ncv){
+
+ if (progress) cat(paste0("Algorithm running for cv dataset ",cv," out of ",ncv,": \n"))
+
+ train.x <- x[labels!=cv,]
+ train.y <- y[labels!=cv]
+ test.x <- x[labels==cv,]
+ test.y <- y[labels==cv]
+
+ cvfit <- linear_enet_al(train.x,train.y,length.lambda,mu,100,lambda,wcov,a,adjust,ncov,progress)
+
+ cvmse[,cv] <- apply(test.x %*% cvfit$beta,2,function(x) sum((test.y-x)^2)/length(test.y))
+
+ }
+
+ }else if(ncore > 1){
+
+ if (progress) warning(paste0("Using ", ncore ," core for cross-validation computation."))
- train.x <- x[labels!=cv,]
- train.y <- y[labels!=cv]
- test.x <- x[labels==cv,]
- test.y <- y[labels==cv]
+ cl <- makeCluster(ncore)
+ registerDoParallel(cl)
- cvfit <- linear_enet_al(train.x,train.y,length.lambda,mu,100,lambda,wcov,a,adjust,ncov,progress)
+ cvmse <- foreach(cv=1:ncv,.combine=cbind) %dopar% {
+
+ train.x <- x[labels!=cv,]
+ train.y <- y[labels!=cv]
+ test.x <- x[labels==cv,]
+ test.y <- y[labels==cv]
+
+ cvfit <- linear_enet_al(train.x,train.y,length.lambda,mu,100,lambda,wcov,a,adjust,ncov,FALSE)
+
+ apply(test.x %*% cvfit$beta,2,function(x) sum((test.y-x)^2)/length(test.y))
+
+ }
- cvmse[,cv] <- apply(test.x %*% cvfit$beta,2,function(x) sum((test.y-x)^2)/length(test.y))
+ stopCluster(cl)
}
-
mean.cvmse <- rowMeans(cvmse)
se.cvmse <- apply(cvmse,1,function(x) sd(x)/sqrt(ncv))
@@ -164,6 +190,8 @@ LogRatioLasso <- function(x,
stepglmnet <- cv.glmnet(x=x.select.min,y=y,type.measure = "mse",family="gaussian")
x.select.min <- x.select.min[,which(stepglmnet$glmnet.fit$beta[,stepglmnet$index[1]]!=0)]
idxs <- idxs[,which(stepglmnet$glmnet.fit$beta[,stepglmnet$index[1]]!=0)]
+ }else{
+ idxs <- as.vector(idxs)
}
df_step2 <- data.frame(y=y,x=x.select.min)
@@ -201,6 +229,8 @@ LogRatioLasso <- function(x,
stepglmnet <- cv.glmnet(x=x.select.min,y=y,type.measure = "mse",family="gaussian")
x.select.min <- x.select.min[,which(stepglmnet$glmnet.fit$beta[,stepglmnet$index[1]]!=0)]
idxs <- idxs[,which(stepglmnet$glmnet.fit$beta[,stepglmnet$index[1]]!=0)]
+ }else{
+ idxs <- as.vector(idxs)
}
df_step2 <- data.frame(y=y,x=x.select.min)
step2fit <- suppressWarnings(step(glm(y~.,data=df_step2,family=gaussian),trace=0))
diff --git a/R/LogRatioLogisticLasso.R b/R/LogRatioLogisticLasso.R
index cb62131..97c01f8 100644
--- a/R/LogRatioLogisticLasso.R
+++ b/R/LogRatioLogisticLasso.R
@@ -12,7 +12,8 @@ LogRatioLogisticLasso <- function(x,
progress=TRUE,
plot=TRUE,
loop1=FALSE,
- loop2=FALSE){
+ loop2=FALSE,
+ ncore=1){
ptm <- proc.time()
@@ -54,18 +55,44 @@ LogRatioLogisticLasso <- function(x,
labels <- foldid
}
- for (cv in 1:ncv){
+ if (ncore == 1){
- if (progress) cat(paste0("Algorithm running for cv dataset ",cv," out of ",ncv,": \n"))
+ for (cv in 1:ncv){
+
+ if (progress) cat(paste0("Algorithm running for cv dataset ",cv," out of ",ncv,": \n"))
+
+ train.x <- x[labels!=cv,]
+ train.y <- y[labels!=cv]
+ test.x <- x[labels==cv,]
+ test.y <- y[labels==cv]
+
+ cvfit <- logistic_enet_al(train.x,train.y,length.lambda,mu,100,lambda,wcov,a,adjust,ncov,progress,loop1,loop2)
+
+ cvmse[,cv] <- apply(cbind(1,test.x) %*% rbind(t(cvfit$beta0),cvfit$beta),2,function(x) sum((test.y- exp(x)/(1+exp(x)))^2)/length(test.y))
+
+ }
+
+ }else if (ncore > 1){
- train.x <- x[labels!=cv,]
- train.y <- y[labels!=cv]
- test.x <- x[labels==cv,]
- test.y <- y[labels==cv]
+ if (progress) warning(paste0("Using ", ncore ," core for cross-validation computation."))
- cvfit <- logistic_enet_al(train.x,train.y,length.lambda,mu,100,lambda,wcov,a,adjust,ncov,progress,loop1,loop2)
+ cl <- makeCluster(ncore)
+ registerDoParallel(cl)
+
+ cvmse <- foreach(cv=1:ncv,.combine=cbind) %dopar% {
+
+ train.x <- x[labels!=cv,]
+ train.y <- y[labels!=cv]
+ test.x <- x[labels==cv,]
+ test.y <- y[labels==cv]
+
+ cvfit <- logistic_enet_al(train.x,train.y,length.lambda,mu,100,lambda,wcov,a,adjust,ncov,FALSE,loop1,loop2)
+
+ apply(cbind(1,test.x) %*% rbind(t(cvfit$beta0),cvfit$beta),2,function(x) sum((test.y- exp(x)/(1+exp(x)))^2)/length(test.y))
+
+ }
- cvmse[,cv] <- apply(cbind(1,test.x) %*% rbind(t(cvfit$beta0),cvfit$beta),2,function(x) sum((test.y- exp(x)/(1+exp(x)))^2)/length(test.y))
+ stopCluster(cl)
}
@@ -164,16 +191,14 @@ LogRatioLogisticLasso <- function(x,
stepglmnet <- cv.glmnet(x=x.select.min,y=y,type.measure = "mse",family="binomial")
x.select.min <- x.select.min[,which(stepglmnet$glmnet.fit$beta[,stepglmnet$index[1]]!=0)]
idxs <- idxs[,which(stepglmnet$glmnet.fit$beta[,stepglmnet$index[1]]!=0)]
+ }else{
+ idxs <- as.vector(idxs)
}
df_step2 <- data.frame(y=y,x=x.select.min)
step2fit <- suppressWarnings(step(glm(y~.,data=df_step2,family=binomial),trace=0))
vars <- as.numeric(sapply(names(step2fit$coefficients),function(x) strsplit(x,split = "[.]")[[1]][2]))
- if (ncol(idxs) == 1 & length(vars) == 2){
- vars = 1
- }
-
if (is.null(ncol(idxs))){
if (length(vars) == 2){
selected <- idxs
@@ -196,7 +221,6 @@ LogRatioLogisticLasso <- function(x,
if (length(which(ret$best.beta$add.1se!=0)) > 1){
idxs <- combn(which(ret$best.beta$add.1se!=0),2)
-
x.select.min <- matrix(NA,nrow=n,ncol=ncol(idxs))
for (k in 1:ncol(idxs)){
x.select.min[,k] <- x[,idxs[1,k]] - x[,idxs[2,k]]
@@ -206,6 +230,8 @@ LogRatioLogisticLasso <- function(x,
stepglmnet <- cv.glmnet(x=x.select.min,y=y,type.measure = "mse",family="binomial")
x.select.min <- x.select.min[,which(stepglmnet$glmnet.fit$beta[,stepglmnet$index[1]]!=0)]
idxs <- idxs[,which(stepglmnet$glmnet.fit$beta[,stepglmnet$index[1]]!=0)]
+ }else{
+ idxs <- as.vector(idxs)
}
df_step2 <- data.frame(y=y,x=x.select.min)
diff --git a/R/LogRatioTDCoxLasso.R b/R/LogRatioTDCoxLasso.R
index 79cacdf..7d24508 100644
--- a/R/LogRatioTDCoxLasso.R
+++ b/R/LogRatioTDCoxLasso.R
@@ -12,7 +12,8 @@ LogRatioTDCoxLasso <- function(x,
step2=FALSE,
progress=TRUE,
plot=TRUE,
- mcv="Deviance"){
+ mcv="Deviance",
+ ncore=1){
ptm <- proc.time()
@@ -42,7 +43,7 @@ LogRatioTDCoxLasso <- function(x,
sfun = d - expect
# hfun <- expect - sapply(t,function(x) sum(1/denomj[which(tj <= x)]^2)) + 1e-8 # hessian
# z <- sfun/hfun
-
+
if (a > 0){
lambda0 <- max(abs(t(sfun) %*% x))/(a*n)
}else if (a == 0){
@@ -82,66 +83,122 @@ LogRatioTDCoxLasso <- function(x,
# labels <- caret::createFolds(factor(d),k=ncv)
- for (cv in 1:ncv){
-
- if (progress) cat(paste0("Algorithm running for cv dataset ",cv," out of ",ncv,": \n"))
-
- # train.x <- x[-labels[[cv]],]
- # train.d <- d[-labels[[cv]]]
- # train.t <- t[-labels[[cv]]]
- # test.x <- x[labels[[cv]],]
- # test.d <- d[labels[[cv]]]
- # test.t <- t[labels[[cv]]]
-
- train.x <- x[labels!=cv,]
- train.d <- d[labels!=cv]
- train.t0 <- t0[labels!=cv]
- train.t1 <- t1[labels!=cv]
- train.tt <- y[labels!=cv,1:2]
-
- test.x <- x[labels==cv,]
- test.d <- d[labels==cv]
- test.t0 <- t0[labels==cv]
- test.t1 <- t1[labels==cv]
- test.tt <- y[labels==cv,1:2]
+ if (ncore == 1){
- cv.devnull <- 0
- train.tj <- sort(train.t1[train.d==1])
- for (j in 1:length(train.tj)){
- cv.devnull <- cv.devnull + log(sum(train.t1 >= train.tj[1] & train.t0 < train.tj[1]))
+ for (cv in 1:ncv){
+
+ if (progress) cat(paste0("Algorithm running for cv dataset ",cv," out of ",ncv,": \n"))
+
+ # train.x <- x[-labels[[cv]],]
+ # train.d <- d[-labels[[cv]]]
+ # train.t <- t[-labels[[cv]]]
+ # test.x <- x[labels[[cv]],]
+ # test.d <- d[labels[[cv]]]
+ # test.t <- t[labels[[cv]]]
+
+ train.x <- x[labels!=cv,]
+ train.d <- d[labels!=cv]
+ train.t0 <- t0[labels!=cv]
+ train.t1 <- t1[labels!=cv]
+ train.tt <- y[labels!=cv,1:2]
+
+ test.x <- x[labels==cv,]
+ test.d <- d[labels==cv]
+ test.t0 <- t0[labels==cv]
+ test.t1 <- t1[labels==cv]
+ test.tt <- y[labels==cv,1:2]
+
+ cv.devnull <- 0
+ train.tj <- sort(train.t1[train.d==1])
+ for (j in 1:length(train.tj)){
+ cv.devnull <- cv.devnull + log(sum(train.t1 >= train.tj[1] & train.t0 < train.tj[1]))
+ }
+ cv.devnull <- 2*cv.devnull
+
+ cvfit <- cox_timedep_enet_al(train.x,train.t0,train.t1,train.d,train.tj,length(lidx),mu,100,lambda[lidx],wcov,a,adjust,ncov,cv.devnull,progress)
+
+ cv.devnull <- 0
+ loglik <- rep(0,length(lidx))
+ linprod <- test.x %*% cvfit$beta
+
+ if (mcv == "Deviance"){
+
+ test.tj <- sort(test.t1[test.d==1])
+ for (j in 1:length(test.tj)){
+ cv.devnull <- cv.devnull + log(sum(test.t1 >= test.tj[j] & test.t0 < test.tj[j]))
+ if (sum(test.t1 >= test.tj[j] & test.t0 < test.tj[j]) > 1){
+ cvdev[,cv] <- cvdev[,cv] + linprod[test.t1 == test.tj[j],] -
+ log(colSums(exp(linprod[test.t1 >= test.tj[j] & test.t0 < test.tj[j],])) + 1e-8)
+ }else if (sum(test.t1 >= test.tj[j] & test.t0 < test.tj[j]) == 1){
+ cvdev[,cv] <- cvdev[,cv] + linprod[test.t1 == test.tj[j],] -
+ log(exp(linprod[test.t1 >= test.tj[j] & test.t0 < test.tj[j],]) + 1e-8)
+ }
+ #- log(accu(link(widx))+1e-8)
+ }
+
+ cvdevnull[cv] <- 2*cv.devnull
+ cvdev[,cv] <- -2*cvdev[,cv]
+
+ }
+
+ # cvmse[,cv] <- apply(cbind(1,test.x) %*% rbind(t(cvfit$beta0),cvfit$beta),2,function(x) sum((test.y - exp(x)/(1+exp(x)))^2)/length(test.y))
+
}
- cv.devnull <- 2*cv.devnull
- cvfit <- cox_timedep_enet_al(train.x,train.t0,train.t1,train.d,train.tj,length(lidx),mu,100,lambda[lidx],wcov,a,adjust,ncov,cv.devnull,progress)
+ }else if (ncore > 1){
- cv.devnull <- 0
- loglik <- rep(0,length(lidx))
- linprod <- test.x %*% cvfit$beta
+ if (progress) warning(paste0("Using ", ncore ," core for cross-validation computation."))
- if (mcv == "Deviance"){
+ cl <- makeCluster(ncore)
+ registerDoParallel(cl)
+
+ cvdev <- foreach(cv=1:ncv,.combine=cbind) %dopar% {
+
+ train.x <- x[labels!=cv,]
+ train.d <- d[labels!=cv]
+ train.t0 <- t0[labels!=cv]
+ train.t1 <- t1[labels!=cv]
+ train.tt <- y[labels!=cv,1:2]
+
+ test.x <- x[labels==cv,]
+ test.d <- d[labels==cv]
+ test.t0 <- t0[labels==cv]
+ test.t1 <- t1[labels==cv]
+ test.tt <- y[labels==cv,1:2]
+
+ cv.devnull <- 0
+ train.tj <- sort(train.t1[train.d==1])
+ for (j in 1:length(train.tj)){
+ cv.devnull <- cv.devnull + log(sum(train.t1 >= train.tj[1] & train.t0 < train.tj[1]))
+ }
+ cv.devnull <- 2*cv.devnull
+
+ cvfit <- cox_timedep_enet_al(train.x,train.t0,train.t1,train.d,train.tj,length(lidx),mu,100,lambda[lidx],wcov,a,adjust,ncov,cv.devnull,FALSE)
+
+ cv.dev <- rep(0,length(lidx))
+ linprod <- test.x %*% cvfit$beta
test.tj <- sort(test.t1[test.d==1])
for (j in 1:length(test.tj)){
- cv.devnull <- cv.devnull + log(sum(test.t1 >= test.tj[j] & test.t0 < test.tj[j]))
if (sum(test.t1 >= test.tj[j] & test.t0 < test.tj[j]) > 1){
- cvdev[,cv] <- cvdev[,cv] + linprod[test.t1 == test.tj[j],] -
+ cv.dev <- cv.dev + linprod[test.t1 == test.tj[j],] -
log(colSums(exp(linprod[test.t1 >= test.tj[j] & test.t0 < test.tj[j],])) + 1e-8)
}else if (sum(test.t1 >= test.tj[j] & test.t0 < test.tj[j]) == 1){
- cvdev[,cv] <- cvdev[,cv] + linprod[test.t1 == test.tj[j],] -
+ cv.dev <- cv.dev + linprod[test.t1 == test.tj[j],] -
log(exp(linprod[test.t1 >= test.tj[j] & test.t0 < test.tj[j],]) + 1e-8)
}
- #- log(accu(link(widx))+1e-8)
}
- cvdevnull[cv] <- 2*cv.devnull
- cvdev[,cv] <- -2*cvdev[,cv]
-
+ cv.dev <- -2*cv.dev
+ cv.dev
}
- # cvmse[,cv] <- apply(cbind(1,test.x) %*% rbind(t(cvfit$beta0),cvfit$beta),2,function(x) sum((test.y - exp(x)/(1+exp(x)))^2)/length(test.y))
+ stopCluster(cl)
}
+
+
mean.cvdev <- rowMeans(cvdev)
se.cvdev <- apply(cvdev,1,function(x) sd(x)/sqrt(ncv))
@@ -233,6 +290,8 @@ LogRatioTDCoxLasso <- function(x,
stepglmnet <- cv.glmnet(x=x.select.min,y=Surv(t0,t1,d),type.measure = "deviance",family="cox")
x.select.min <- x.select.min[,which(stepglmnet$glmnet.fit$beta[,stepglmnet$index[1]]!=0)]
idxs <- idxs[,which(stepglmnet$glmnet.fit$beta[,stepglmnet$index[1]]!=0)]
+ }else{
+ idxs <- as.vector(idxs)
}
df_step2 <- data.frame(t0=t0,t1=t1,d=d,x=x.select.min)
@@ -267,6 +326,8 @@ LogRatioTDCoxLasso <- function(x,
stepglmnet <- cv.glmnet(x=x.select.min,y=Surv(t0,t1,d),type.measure = "deviance",family="cox")
x.select.min <- x.select.min[,which(stepglmnet$glmnet.fit$beta[,stepglmnet$index[1]]!=0)]
idxs <- idxs[,which(stepglmnet$glmnet.fit$beta[,stepglmnet$index[1]]!=0)]
+ }else{
+ idxs <- as.vector(idxs)
}
df_step2 <- data.frame(t0=t0,t1=t1,d=d,x=x.select.min)
diff --git a/R/coxsplit.R b/R/coxsplit.R
index 7d6bf9f..bf9ea21 100644
--- a/R/coxsplit.R
+++ b/R/coxsplit.R
@@ -28,5 +28,14 @@ coxsplitss=function(y, id, nfolds){
foldid <- full$foldid
+ return(foldid)
+}
+
+fgfoldid=function(id, foldid){
+ idfoldid <- data.frame(id=unique(id),foldid=foldid)
+ yfoldid <- data.frame(id=id)
+ mergedid <- yfoldid %>% left_join(idfoldid,by="id")
+
+ foldid <- mergedid$foldid
return(foldid)
}
\ No newline at end of file
diff --git a/R/simu.R b/R/simu.R
index e4172b7..f735798 100644
--- a/R/simu.R
+++ b/R/simu.R
@@ -172,7 +172,7 @@ simu <- function(n = 100,
p.cif = 0.66
lambda <- exp(eta)
cl=0.19
- cu=1.09
+ cu=10
P1 <- 1-(1-p.cif)^(lambda)
epsilon <- rep(0,n)
@@ -195,10 +195,9 @@ simu <- function(n = 100,
#generate censoring time
c <- runif(n,cl,cu)
#observed time
- t <- t0*I(t0<=c) + c*I(t0>c)
-
+ t <- ifelse(t0 == Inf, c ,t0*I(t0<=c) + c*I(t0>c))
# outcome
- d <- 0*I(t == c) + epsilon*I(t < c)
+ d <- ifelse(t0 == Inf, 0, 0*I(t == c) + epsilon*I(t < c))
ret <- list(xcount=xcount,x=xobs,t=t,d=d,beta=c(beta,rep(0,p-weak-strong)),idx=true_set)
diff --git a/README.Rmd b/README.Rmd
index 200a0ec..6ce2598 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -24,11 +24,13 @@ knitr::opts_chunk$set(
The `FLORAL` package is an open-source computational tool to perform log-ratio lasso regression modeling and compositional feature selection for continuous, binary, time-to-event, and competing risk outcomes. The proposed method adapts the augmented Lagrangian algorithm for a zero-sum constraint optimization problem while enabling a two-stage screening process for extended false-positive control.
-The associated preprint for `FLORAL` is available on [bioRxiv](https://www.biorxiv.org/content/10.1101/2023.05.02.538599v1).
+The associated preprint for `FLORAL` is available on [bioRxiv](https://www.biorxiv.org/content/10.1101/2023.05.02.538599).
-## Installation
+## System requirements and installation
-You can install `FLORAL` with the following code.
+The current version of `FLORAL` (0.2.0) was built in R version 4.1.1. R package dependencies can be found in the `DESCRIPTION` file.
+
+You can install `FLORAL` with the following code. The installation is typically complete within minutes.
``` r
install.packages("FLORAL")
@@ -43,9 +45,9 @@ devtools::install_github("vdblab/FLORAL")
## Example
-Here is a toy example for linear regression with 10-fold cross-validation for a simulated data with 50 samples and 100 compositional features. Option `progress=TRUE` can be used to show the progress bar of the running algorithm.
+Here is a toy example for linear regression with 10-fold cross-validation for a simulated data with 50 samples and 100 compositional features. Option `progress=TRUE` can be used to show the progress bar of the running algorithm.
-The data simulation procedure is described in the preprint.
+The data simulation procedure is described in the preprint. The expected run time for the following demo is about a minute.
```{r example}
set.seed(23420)
@@ -99,10 +101,14 @@ dat.fg <- simu(n=50,p=100,model="finegray")
fit.fg <- FLORAL(dat.cox$xcount,survival::Surv(dat.cox$t,dat.cox$d,type="mstate"),family="finegray",ncv=10,progress=FALSE,step2=FALSE)
```
+## Repository for Reproducibility
+
+Reproducible code for the analyses results reported in the manuscript can be found at [this repository](https://github.com/vdblab/FLORAL-analysis).
+
## Contributing
The `FLORAL` package is jointly managed by [MSKCC Biostatistics service](https://www.mskcc.org/departments/epidemiology-biostatistics/biostatistics) and [the Marcel van den Brink Lab](https://vandenbrinklab.org/). Please note that the `FLORAL` project is released with a [Contributor Code of Conduct](https://github.com/vdblab/FLORAL/blob/master/.github/CODE_OF_CONDUCT.md). By contributing to this project, you agree to abide by its terms. Thank you to all contributors!
## Reference
-Fei T, Funnell T, Waters NR, Raj SS, Devlin SM, Dai A, Miltiadous O, Shouval R, Lv M, Peled JU, Ponce DM, Perales M-A, Gönen M, van den Brink MRM, Scalable Log-ratio Lasso Regression Enhances Microbiome Feature Selection for Predictive Models, bioRxiv 2023.05.02.538599; doi: https://doi.org/10.1101/2023.05.02.538599.
\ No newline at end of file
+Fei T, Funnell T, Waters NR, Raj SS, Sadeghi K, Dai A, Miltiadous O, Shouval R, Lv M, Peled JU, Ponce DM, Perales M-A, Gönen M, van den Brink MRM, Enhanced Feature Selection for Microbiome Data using FLORAL: Scalable Log-ratio Lasso Regression, bioRxiv 2023.05.02.538599; doi: https://doi.org/10.1101/2023.05.02.538599.
\ No newline at end of file
diff --git a/README.md b/README.md
index 88c9057..785512b 100644
--- a/README.md
+++ b/README.md
@@ -20,11 +20,15 @@ constraint optimization problem while enabling a two-stage screening
process for extended false-positive control.
The associated preprint for `FLORAL` is available on
-[bioRxiv](https://www.biorxiv.org/content/10.1101/2023.05.02.538599v1).
+[bioRxiv](https://www.biorxiv.org/content/10.1101/2023.05.02.538599).
-## Installation
+## System requirements and installation
-You can install `FLORAL` with the following code.
+The current version of `FLORAL` (0.2.0) was built in R version 4.1.1. R
+package dependencies can be found in the `DESCRIPTION` file.
+
+You can install `FLORAL` with the following code. The installation is
+typically complete within minutes.
``` r
install.packages("FLORAL")
@@ -45,7 +49,8 @@ cross-validation for a simulated data with 50 samples and 100
compositional features. Option `progress=TRUE` can be used to show the
progress bar of the running algorithm.
-The data simulation procedure is described in the preprint.
+The data simulation procedure is described in the preprint. The expected
+run time for the following demo is about a minute.
``` r
set.seed(23420)
@@ -62,10 +67,10 @@ coefficients, use `fit$pmse` or `fit$pcoef`:
To view selected compositional features, use `fit$selected.feature`,
where features are sorted by their names. Features under `min` and `1se`
-correspond to penalty parameter *λ*min and *λ*1se,
-respectively. Features under `min.2stage` and `1se.2stage` are obtained
-after applying 2-stage filtering based on features under `min` and
-`1se`, respectively.
+correspond to penalty parameter $\lambda_{\min}$ and
+$\lambda_{\text{1se}}$, respectively. Features under `min.2stage` and
+`1se.2stage` are obtained after applying 2-stage filtering based on
+features under `min` and `1se`, respectively.
We recommend interpreting the selected compositional features as
potential predictive markers to the outcome in the regression model in
@@ -110,14 +115,14 @@ fit$step2.ratios
#> [6] "taxa5/taxa8" "taxa6/taxa7" "taxa6/taxa9" "taxa7/taxa10" "taxa9/taxa32"
#>
#> $min.idx
-#> [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
-#> [1,] 1 1 1 2 3 3 5 6 7 7 8 9 9
-#> [2,] 13 20 84 5 8 92 8 9 10 79 60 32 92
+#> [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14]
+#> [1,] NA 1 1 1 2 3 3 5 6 7 7 8 9 9
+#> [2,] NA 13 20 84 5 8 92 8 9 10 79 60 32 92
#>
#> $`1se.idx`
-#> [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
-#> [1,] 1 1 1 2 3 5 6 6 7 9
-#> [2,] 13 20 84 5 8 8 7 9 10 32
+#> [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11]
+#> [1,] NA 1 1 1 2 3 5 6 6 7 9
+#> [2,] NA 13 20 84 5 8 8 7 9 10 32
```
More detailed interpretations can be obtained for the selected
@@ -153,6 +158,12 @@ dat.fg <- simu(n=50,p=100,model="finegray")
fit.fg <- FLORAL(dat.cox$xcount,survival::Surv(dat.cox$t,dat.cox$d,type="mstate"),family="finegray",ncv=10,progress=FALSE,step2=FALSE)
```
+## Repository for Reproducibility
+
+Reproducible code for the analyses results reported in the manuscript
+can be found at [this
+repository](https://github.com/vdblab/FLORAL-analysis).
+
## Contributing
The `FLORAL` package is jointly managed by [MSKCC Biostatistics
@@ -165,8 +176,8 @@ you to all contributors!
## Reference
-Fei T, Funnell T, Waters NR, Raj SS, Devlin SM, Dai A, Miltiadous O,
+Fei T, Funnell T, Waters NR, Raj SS, Sadeghi K, Dai A, Miltiadous O,
Shouval R, Lv M, Peled JU, Ponce DM, Perales M-A, Gönen M, van den Brink
-MRM, Scalable Log-ratio Lasso Regression Enhances Microbiome Feature
-Selection for Predictive Models, bioRxiv 2023.05.02.538599; doi:
+MRM, Enhanced Feature Selection for Microbiome Data using FLORAL:
+Scalable Log-ratio Lasso Regression, bioRxiv 2023.05.02.538599; doi:
.
diff --git a/inst/WORDLIST b/inst/WORDLIST
index 11274b8..5b0f824 100644
--- a/inst/WORDLIST
+++ b/inst/WORDLIST
@@ -4,7 +4,6 @@ CRC
Compositional
DM
Dai
-Devlin
Gönen
Interpretable
JU
@@ -17,6 +16,8 @@ Miltiadous
Peled
Perales
RAtio
+Reproducibility
+Sadeghi
Scalable
Shouval
al
@@ -35,4 +36,3 @@ ncv
preprint
reproducibility
se
-λ
diff --git a/man/FLORAL.Rd b/man/FLORAL.Rd
index 26cdd14..156520e 100644
--- a/man/FLORAL.Rd
+++ b/man/FLORAL.Rd
@@ -19,6 +19,7 @@ FLORAL(
a = 1,
mu = 1,
ncv = 5,
+ ncore = 1,
intercept = FALSE,
foldid = NULL,
step2 = TRUE,
@@ -55,6 +56,8 @@ FLORAL(
\item{ncv}{Folds of cross-validation. Use \code{NULL} if cross-validation is not wanted.}
+\item{ncore}{Number of cores for parallel computing for cross-validation. Default is 1.}
+
\item{intercept}{\code{TRUE} or \code{FALSE}, indicating whether an intercept should be estimated.}
\item{foldid}{A vector of fold indicator. Default is \code{NULL}.}