diff --git a/DESCRIPTION b/DESCRIPTION index d3c61b5..4d197dc 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: FLORAL Type: Package Title: Fit Log-Ratio Lasso Regression for Compositional Data -Version: 0.2.0 +Version: 0.2.0.9000 Date: 2023-07-04 Authors@R: c( person("Teng", "Fei", , "feit1@mskcc.org", role = c("aut", "cre", "cph"), diff --git a/NEWS.md b/NEWS.md index a3c94e5..09160bb 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,11 @@ +# FLORAL (development version) + +* Improves stability when fitting Fine-Gray model with longitudinal covariates. + +* Enables parallel computation for cross-validation. + +* Fixes several bugs as reported in Issues. + # FLORAL 0.2.0 * Including more examples in document compared to CRAN version (0.1.0.9000) diff --git a/R/FLORAL.R b/R/FLORAL.R index f98655b..6498dd1 100644 --- a/R/FLORAL.R +++ b/R/FLORAL.R @@ -15,6 +15,7 @@ #' @param a A scalar between 0 and 1: \code{a} is the weight for lasso penalty while \code{1-a} is the weight for ridge penalty. #' @param mu Value of penalty for the augmented Lagrangian #' @param ncv Folds of cross-validation. Use \code{NULL} if cross-validation is not wanted. +#' @param ncore Number of cores for parallel computing for cross-validation. Default is 1. #' @param intercept \code{TRUE} or \code{FALSE}, indicating whether an intercept should be estimated. #' @param foldid A vector of fold indicator. Default is \code{NULL}. #' @param step2 \code{TRUE} or \code{FALSE}, indicating whether a second-stage feature selection for specific ratios should be performed for the features selected by the main lasso algorithm. Will only be performed if cross validation is enabled. @@ -45,7 +46,7 @@ #' # fit <- FLORAL(dat$xcount,survival::Surv(dat$t,dat$d,type="mstate"),failcode=1, #' # family="finegray",progress=FALSE,step2=FALSE) #' -#' @import Rcpp ggplot2 survival glmnet dplyr +#' @import Rcpp ggplot2 survival glmnet dplyr doParallel foreach doRNG parallel #' @importFrom survcomp concordance.index #' @importFrom reshape melt #' @importFrom utils combn @@ -69,6 +70,7 @@ FLORAL <- function(x, a=1, mu=1, ncv=5, + ncore=1, intercept=FALSE, foldid=NULL, step2=TRUE, @@ -103,7 +105,8 @@ FLORAL <- function(x, foldid, step2, progress, - plot) + plot, + ncore=ncore) } }else if(family == "binomial"){ @@ -123,7 +126,8 @@ FLORAL <- function(x, foldid, step2, progress, - plot) + plot, + ncore=ncore) } }else if(family == "cox"){ @@ -165,7 +169,8 @@ FLORAL <- function(x, foldid, step2, progress, - plot) + plot, + ncore=ncore) } @@ -183,7 +188,8 @@ FLORAL <- function(x, foldid, step2, progress, - plot) + plot, + ncore=ncore) } }else if (family == "finegray"){ @@ -210,9 +216,10 @@ FLORAL <- function(x, if (is.null(failcode)){ warning("`failcode` is `NULL`. Using the first failure type as default") - df_FG <- finegray(Surv(tstart, tstop, dd) ~ ., id=id, data=xy, etype=failcode,timefix = FALSE) + failcode = 1 + df_FG <- finegray(Surv(tstart, tstop, dd, type="mstate") ~ ., id=id, data=xy, etype=failcode,timefix = FALSE) }else{ - df_FG <- finegray(Surv(tstart, tstop, dd) ~ ., id=id, data=xy, etype=failcode,timefix = FALSE) + df_FG <- finegray(Surv(tstart, tstop, dd, type="mstate") ~ ., id=id, data=xy, etype=failcode,timefix = FALSE) } newx <- as.matrix(df_FG[,colnames(x)]) @@ -234,7 +241,8 @@ FLORAL <- function(x, foldid, step2, progress, - plot) + plot, + ncore=ncore) } @@ -270,7 +278,8 @@ FLORAL <- function(x, foldid, step2, progress, - plot) + plot, + ncore=ncore) } @@ -378,7 +387,7 @@ FLORAL <- function(x, #' dat <- simu(n=50,p=30,model="linear") #' fit <- mcv.FLORAL(mcv=2,ncore=1,x=dat$xcount,y=dat$y,ncv=2,progress=FALSE,step2=TRUE,plot=FALSE) #' -#' @import Rcpp ggplot2 survival glmnet dplyr doParallel foreach doRNG parallel +#' @import Rcpp ggplot2 survival glmnet dplyr doParallel foreach parallel #' @importFrom survcomp concordance.index #' @importFrom reshape melt #' @importFrom utils combn @@ -444,6 +453,7 @@ mcv.FLORAL <- function(mcv=10, a, mu, ncv, + ncore=1, intercept, foldid=NULL, step2, @@ -499,6 +509,7 @@ mcv.FLORAL <- function(mcv=10, a, mu, ncv, + ncore=1, intercept, foldid=NULL, step2, @@ -715,6 +726,7 @@ a.FLORAL <- function(a=c(0.1,0.5,1), a = a[i], mu, ncv, + ncore=1, intercept, foldid=foldid, step2, @@ -769,6 +781,7 @@ a.FLORAL <- function(a=c(0.1,0.5,1), a = a[1], mu, ncv, + ncore=1, intercept, foldid=NULL, step2, @@ -796,6 +809,7 @@ a.FLORAL <- function(a=c(0.1,0.5,1), a = a[i], mu, ncv, + ncore=1, intercept, foldid=fit0$foldid, step2, diff --git a/R/LogRatioCoxLasso.R b/R/LogRatioCoxLasso.R index f4dc05b..e735cf6 100644 --- a/R/LogRatioCoxLasso.R +++ b/R/LogRatioCoxLasso.R @@ -13,7 +13,8 @@ LogRatioCoxLasso <- function(x, plot=TRUE, mcv="Deviance", loop1=FALSE, - loop2=FALSE){ + loop2=FALSE, + ncore=1){ ptm <- proc.time() @@ -66,7 +67,7 @@ LogRatioCoxLasso <- function(x, if (!is.null(ncv)){ cvdev <- matrix(0,nrow=length(lidx),ncol=ncv) - cvdevnull <- rep(0,ncol=ncv) + # cvdevnull <- rep(0,ncol=ncv) if (is.null(foldid)){ labels <- coxsplity(as.matrix(y),ncv) @@ -76,66 +77,112 @@ LogRatioCoxLasso <- function(x, # labels <- caret::createFolds(factor(d),k=ncv) - for (cv in 1:ncv){ + if (ncore == 1){ - if (progress) cat(paste0("Algorithm running for cv dataset ",cv," out of ",ncv,": \n")) - - # train.x <- x[-labels[[cv]],] - # train.d <- d[-labels[[cv]]] - # train.t <- t[-labels[[cv]]] - # test.x <- x[labels[[cv]],] - # test.d <- d[labels[[cv]]] - # test.t <- t[labels[[cv]]] - - train.x <- x[labels!=cv,] - train.d <- d[labels!=cv] - train.t <- t[labels!=cv] - test.x <- x[labels==cv,] - test.d <- d[labels==cv] - test.t <- t[labels==cv] - - cv.devnull <- 0 - train.tj <- sort(train.t[train.d==1]) - for (j in 1:length(train.tj)){ - cv.devnull <- cv.devnull + log(sum(train.t >= train.tj[j])) + for (cv in 1:ncv){ + + if (progress) cat(paste0("Algorithm running for cv dataset ",cv," out of ",ncv,": \n")) + + # train.x <- x[-labels[[cv]],] + # train.d <- d[-labels[[cv]]] + # train.t <- t[-labels[[cv]]] + # test.x <- x[labels[[cv]],] + # test.d <- d[labels[[cv]]] + # test.t <- t[labels[[cv]]] + + train.x <- x[labels!=cv,] + train.d <- d[labels!=cv] + train.t <- t[labels!=cv] + test.x <- x[labels==cv,] + test.d <- d[labels==cv] + test.t <- t[labels==cv] + + cv.devnull <- 0 + train.tj <- sort(train.t[train.d==1]) + for (j in 1:length(train.tj)){ + cv.devnull <- cv.devnull + log(sum(train.t >= train.tj[j])) + } + cv.devnull <- 2*cv.devnull + + cvfit <- cox_enet_al(train.x,train.t,train.d,train.tj,length(lidx),mu,100,lambda[lidx],wcov,a,adjust,ncov,cv.devnull,progress,loop1,loop2,notcv=FALSE) + + cv.devnull <- 0 + loglik <- rep(0,length(lidx)) + linprod <- test.x %*% cvfit$beta + + if (mcv == "Deviance"){ + + test.tj <- sort(test.t[test.d==1]) + for (j in 1:length(test.tj)){ + cv.devnull <- cv.devnull + log(sum(test.t >= test.tj[j])) + if (sum(test.t >= test.tj[j]) > 1){ + cvdev[,cv] <- cvdev[,cv] + linprod[test.t == test.tj[j],] - + log(colSums(exp(linprod[test.t >= test.tj[j],])) + 1e-8) + }else if (sum(test.t >= test.tj[j]) == 1){ + cvdev[,cv] <- cvdev[,cv] + linprod[test.t == test.tj[j],] - + log(exp(linprod[test.t >= test.tj[j],]) + 1e-8) + } + #- log(accu(link(widx))+1e-8) + } + + # cvdevnull[cv] <- 2*cv.devnull + cvdev[,cv] <- -2*cvdev[,cv] + + }else if (mcv == "Cindex"){ + + for (kk in 1:length(lidx)){ + cvdev[kk,cv] <- 1-concordance.index(x=linprod[,kk],surv.time=test.t,surv.event=test.d)$c.index + } + + } + # cvmse[,cv] <- apply(cbind(1,test.x) %*% rbind(t(cvfit$beta0),cvfit$beta),2,function(x) sum((test.y - exp(x)/(1+exp(x)))^2)/length(test.y)) + } - cv.devnull <- 2*cv.devnull - cvfit <- cox_enet_al(train.x,train.t,train.d,train.tj,length(lidx),mu,100,lambda[lidx],wcov,a,adjust,ncov,cv.devnull,progress,loop1,loop2,notcv=FALSE) + }else if (ncore > 1){ - cv.devnull <- 0 - loglik <- rep(0,length(lidx)) - linprod <- test.x %*% cvfit$beta + if (progress) warning(paste0("Using ", ncore ," core for cross-validation computation.")) - if (mcv == "Deviance"){ + cl <- makeCluster(ncore) + registerDoParallel(cl) + + cvdev <- foreach(cv=1:ncv,.combine=cbind) %dopar% { + + train.x <- x[labels!=cv,] + train.d <- d[labels!=cv] + train.t <- t[labels!=cv] + test.x <- x[labels==cv,] + test.d <- d[labels==cv] + test.t <- t[labels==cv] + + cv.devnull <- 0 + train.tj <- sort(train.t[train.d==1]) + for (j in 1:length(train.tj)){ + cv.devnull <- cv.devnull + log(sum(train.t >= train.tj[j])) + } + cv.devnull <- 2*cv.devnull + + cvfit <- cox_enet_al(train.x,train.t,train.d,train.tj,length(lidx),mu,100,lambda[lidx],wcov,a,adjust,ncov,cv.devnull,FALSE,loop1,loop2,notcv=FALSE) + + linprod <- test.x %*% cvfit$beta + cv.dev <- rep(0,length(lidx)) test.tj <- sort(test.t[test.d==1]) for (j in 1:length(test.tj)){ - cv.devnull <- cv.devnull + log(sum(test.t >= test.tj[j])) if (sum(test.t >= test.tj[j]) > 1){ - cvdev[,cv] <- cvdev[,cv] + linprod[test.t == test.tj[j],] - + cv.dev <- cv.dev + linprod[test.t == test.tj[j],] - log(colSums(exp(linprod[test.t >= test.tj[j],])) + 1e-8) }else if (sum(test.t >= test.tj[j]) == 1){ - cvdev[,cv] <- cvdev[,cv] + linprod[test.t == test.tj[j],] - + cv.dev <- cv.dev + linprod[test.t == test.tj[j],] - log(exp(linprod[test.t >= test.tj[j],]) + 1e-8) } - #- log(accu(link(widx))+1e-8) - } - - cvdevnull[cv] <- 2*cv.devnull - cvdev[,cv] <- -2*cvdev[,cv] - - }else if (mcv == "Cindex"){ - - for (kk in 1:length(lidx)){ - cvdev[kk,cv] <- 1-concordance.index(x=linprod[,kk],surv.time=test.t,surv.event=test.d)$c.index } + cv.dev <- -2*cv.dev + cv.dev } - - - # cvmse[,cv] <- apply(cbind(1,test.x) %*% rbind(t(cvfit$beta0),cvfit$beta),2,function(x) sum((test.y - exp(x)/(1+exp(x)))^2)/length(test.y)) + stopCluster(cl) } @@ -230,6 +277,8 @@ LogRatioCoxLasso <- function(x, stepglmnet <- cv.glmnet(x=x.select.min,y=Surv(t,d),type.measure = "deviance",family="cox") x.select.min <- x.select.min[,which(stepglmnet$glmnet.fit$beta[,stepglmnet$index[1]]!=0)] idxs <- idxs[,which(stepglmnet$glmnet.fit$beta[,stepglmnet$index[1]]!=0)] + }else{ + idxs <- as.vector(idxs) } df_step2 <- data.frame(t=t,d=d,x=x.select.min) @@ -259,6 +308,8 @@ LogRatioCoxLasso <- function(x, stepglmnet <- cv.glmnet(x=x.select.min,y=Surv(t,d),type.measure = "deviance",family="cox") x.select.min <- x.select.min[,which(stepglmnet$glmnet.fit$beta[,stepglmnet$index[1]]!=0)] idxs <- idxs[,which(stepglmnet$glmnet.fit$beta[,stepglmnet$index[1]]!=0)] + }else{ + idxs <- as.vector(idxs) } df_step2 <- data.frame(t=t,d=d,x=x.select.min) diff --git a/R/LogRatioFGLasso.R b/R/LogRatioFGLasso.R index 973f243..9ace4ea 100644 --- a/R/LogRatioFGLasso.R +++ b/R/LogRatioFGLasso.R @@ -13,7 +13,8 @@ LogRatioFGLasso <- function(x, step2=FALSE, progress=TRUE, plot=TRUE, - mcv="Deviance"){ + mcv="Deviance", + ncore=1){ ptm <- proc.time() @@ -78,70 +79,127 @@ LogRatioFGLasso <- function(x, if (is.null(foldid)){ labels <- coxsplitss(as.matrix(y),id,ncv) }else{ - labels <- foldid + labels <- fgfoldid(id,foldid) } # labels <- caret::createFolds(factor(d),k=ncv) - for (cv in 1:ncv){ + if (ncore == 1){ - if (progress) cat(paste0("Algorithm running for cv dataset ",cv," out of ",ncv,": \n")) - - # train.x <- x[-labels[[cv]],] - # train.d <- d[-labels[[cv]]] - # train.t <- t[-labels[[cv]]] - # test.x <- x[labels[[cv]],] - # test.d <- d[labels[[cv]]] - # test.t <- t[labels[[cv]]] - - train.x <- x[labels!=cv,] - train.d <- d[labels!=cv] - train.t0 <- t0[labels!=cv] - train.t1 <- t1[labels!=cv] - train.tt <- y[labels!=cv,1:2] - train.w <- weight[labels!=cv] - - test.x <- x[labels==cv,] - test.d <- d[labels==cv] - test.t0 <- t0[labels==cv] - test.t1 <- t1[labels==cv] - test.tt <- y[labels==cv,1:2] - test.w <- weight[labels==cv] - - cv.devnull <- 0 - train.tj <- sort(train.t1[train.d==1]) - for (j in 1:length(train.tj)){ - cv.devnull <- cv.devnull + log(sum(train.t1 >= train.tj[1] & train.t0 < train.tj[1])) + for (cv in 1:ncv){ + + if (progress) cat(paste0("Algorithm running for cv dataset ",cv," out of ",ncv,": \n")) + + # train.x <- x[-labels[[cv]],] + # train.d <- d[-labels[[cv]]] + # train.t <- t[-labels[[cv]]] + # test.x <- x[labels[[cv]],] + # test.d <- d[labels[[cv]]] + # test.t <- t[labels[[cv]]] + + train.x <- x[labels!=cv,] + train.d <- d[labels!=cv] + train.t0 <- t0[labels!=cv] + train.t1 <- t1[labels!=cv] + train.tt <- y[labels!=cv,1:2] + train.w <- weight[labels!=cv] + + test.x <- x[labels==cv,] + test.d <- d[labels==cv] + test.t0 <- t0[labels==cv] + test.t1 <- t1[labels==cv] + test.tt <- y[labels==cv,1:2] + test.w <- weight[labels==cv] + + cv.devnull <- 0 + train.tj <- sort(train.t1[train.d==1]) + for (j in 1:length(train.tj)){ + cv.devnull <- cv.devnull + log(sum(train.t1 >= train.tj[1] & train.t0 < train.tj[1])) + } + cv.devnull <- 2*cv.devnull + + cvfit <- fg_enet_al(train.x,train.t0,train.t1,train.d,train.tj,train.w,length(lidx),mu,100,lambda[lidx],wcov,a,adjust,ncov,cv.devnull,progress) + + cv.devnull <- 0 + loglik <- rep(0,length(lidx)) + linprod <- test.w * (test.x %*% cvfit$beta) + + if (mcv == "Deviance"){ + + test.tj <- sort(test.t1[test.d==1]) + for (j in 1:length(test.tj)){ + cv.devnull <- cv.devnull + log(sum(test.t1 >= test.tj[j] & test.t0 < test.tj[j])) + if (sum(test.t1 >= test.tj[j] & test.t0 < test.tj[j]) > 1){ + cvdev[,cv] <- cvdev[,cv] + linprod[test.t1 == test.tj[j],] - + log(colSums(exp(linprod[test.t1 >= test.tj[j] & test.t0 < test.tj[j],])) + 1e-8) + }else if (sum(test.t1 >= test.tj[j] & test.t0 < test.tj[j]) == 1){ + cvdev[,cv] <- cvdev[,cv] + linprod[test.t1 == test.tj[j],] - + log(exp(linprod[test.t1 >= test.tj[j] & test.t0 < test.tj[j],]) + 1e-8) + } + #- log(accu(link(widx))+1e-8) + } + + cvdevnull[cv] <- 2*cv.devnull + cvdev[,cv] <- -2*cvdev[,cv] + + } + + # cvmse[,cv] <- apply(cbind(1,test.x) %*% rbind(t(cvfit$beta0),cvfit$beta),2,function(x) sum((test.y - exp(x)/(1+exp(x)))^2)/length(test.y)) + } - cv.devnull <- 2*cv.devnull - cvfit <- fg_enet_al(train.x,train.t0,train.t1,train.d,train.tj,train.w,length(lidx),mu,100,lambda[lidx],wcov,a,adjust,ncov,cv.devnull,progress) + }else if(ncore > 1){ + + if (progress) warning(paste0("Using ", ncore ," core for cross-validation computation.")) - cv.devnull <- 0 - loglik <- rep(0,length(lidx)) - linprod <- test.w * (test.x %*% cvfit$beta) + cl <- makeCluster(ncore) + registerDoParallel(cl) - if (mcv == "Deviance"){ + cvdev <- foreach(cv=1:ncv,.combine=cbind) %dopar% { + + train.x <- x[labels!=cv,] + train.d <- d[labels!=cv] + train.t0 <- t0[labels!=cv] + train.t1 <- t1[labels!=cv] + train.tt <- y[labels!=cv,1:2] + train.w <- weight[labels!=cv] + + test.x <- x[labels==cv,] + test.d <- d[labels==cv] + test.t0 <- t0[labels==cv] + test.t1 <- t1[labels==cv] + test.tt <- y[labels==cv,1:2] + test.w <- weight[labels==cv] + + cv.devnull <- 0 + train.tj <- sort(train.t1[train.d==1]) + for (j in 1:length(train.tj)){ + cv.devnull <- cv.devnull + log(sum(train.t1 >= train.tj[1] & train.t0 < train.tj[1])) + } + cv.devnull <- 2*cv.devnull + + cvfit <- fg_enet_al(train.x,train.t0,train.t1,train.d,train.tj,train.w,length(lidx),mu,100,lambda[lidx],wcov,a,adjust,ncov,cv.devnull,FALSE) + + cv.dev <- rep(0,length(lidx)) + linprod <- test.w * (test.x %*% cvfit$beta) test.tj <- sort(test.t1[test.d==1]) for (j in 1:length(test.tj)){ - cv.devnull <- cv.devnull + log(sum(test.t1 >= test.tj[j] & test.t0 < test.tj[j])) if (sum(test.t1 >= test.tj[j] & test.t0 < test.tj[j]) > 1){ - cvdev[,cv] <- cvdev[,cv] + linprod[test.t1 == test.tj[j],] - + cv.dev <- cv.dev + linprod[test.t1 == test.tj[j],] - log(colSums(exp(linprod[test.t1 >= test.tj[j] & test.t0 < test.tj[j],])) + 1e-8) }else if (sum(test.t1 >= test.tj[j] & test.t0 < test.tj[j]) == 1){ - cvdev[,cv] <- cvdev[,cv] + linprod[test.t1 == test.tj[j],] - + cv.dev <- cv.dev + linprod[test.t1 == test.tj[j],] - log(exp(linprod[test.t1 >= test.tj[j] & test.t0 < test.tj[j],]) + 1e-8) } - #- log(accu(link(widx))+1e-8) } - cvdevnull[cv] <- 2*cv.devnull - cvdev[,cv] <- -2*cvdev[,cv] + cv.dev <- -2*cv.dev + cv.dev } - # cvmse[,cv] <- apply(cbind(1,test.x) %*% rbind(t(cvfit$beta0),cvfit$beta),2,function(x) sum((test.y - exp(x)/(1+exp(x)))^2)/length(test.y)) + stopCluster(cl) } @@ -238,6 +296,8 @@ LogRatioFGLasso <- function(x, stepglmnet <- suppressWarnings(cv.glmnet(x=x.select.min,y=Surv(t0,t1,d),weights=weight,type.measure = "deviance",family="cox")) x.select.min <- x.select.min[,which(stepglmnet$glmnet.fit$beta[,stepglmnet$index[1]]!=0)] idxs <- idxs[,which(stepglmnet$glmnet.fit$beta[,stepglmnet$index[1]]!=0)] + }else{ + idxs <- as.vector(idxs) } df_step2 <- data.frame(t0=t0,t1=t1,d=d,x=x.select.min) @@ -272,6 +332,8 @@ LogRatioFGLasso <- function(x, stepglmnet <- suppressWarnings(cv.glmnet(x=x.select.min,y=Surv(t0,t1,d),weights=weight,type.measure = "deviance",family="cox")) x.select.min <- x.select.min[,which(stepglmnet$glmnet.fit$beta[,stepglmnet$index[1]]!=0)] idxs <- idxs[,which(stepglmnet$glmnet.fit$beta[,stepglmnet$index[1]]!=0)] + }else{ + idxs <- as.vector(idxs) } df_step2 <- data.frame(t0=t0,t1=t1,d=d,x=x.select.min) diff --git a/R/LogRatioLasso.R b/R/LogRatioLasso.R index 269c065..0841888 100644 --- a/R/LogRatioLasso.R +++ b/R/LogRatioLasso.R @@ -11,7 +11,8 @@ LogRatioLasso <- function(x, foldid=NULL, step2=FALSE, progress=TRUE, - plot=TRUE){ + plot=TRUE, + ncore=1){ ptm <- proc.time() @@ -58,22 +59,47 @@ LogRatioLasso <- function(x, labels <- foldid } - for (cv in 1:ncv){ + if (ncore == 1){ - if (progress) cat(paste0("Algorithm running for cv dataset ",cv," out of ",ncv,": \n")) + for (cv in 1:ncv){ + + if (progress) cat(paste0("Algorithm running for cv dataset ",cv," out of ",ncv,": \n")) + + train.x <- x[labels!=cv,] + train.y <- y[labels!=cv] + test.x <- x[labels==cv,] + test.y <- y[labels==cv] + + cvfit <- linear_enet_al(train.x,train.y,length.lambda,mu,100,lambda,wcov,a,adjust,ncov,progress) + + cvmse[,cv] <- apply(test.x %*% cvfit$beta,2,function(x) sum((test.y-x)^2)/length(test.y)) + + } + + }else if(ncore > 1){ + + if (progress) warning(paste0("Using ", ncore ," core for cross-validation computation.")) - train.x <- x[labels!=cv,] - train.y <- y[labels!=cv] - test.x <- x[labels==cv,] - test.y <- y[labels==cv] + cl <- makeCluster(ncore) + registerDoParallel(cl) - cvfit <- linear_enet_al(train.x,train.y,length.lambda,mu,100,lambda,wcov,a,adjust,ncov,progress) + cvmse <- foreach(cv=1:ncv,.combine=cbind) %dopar% { + + train.x <- x[labels!=cv,] + train.y <- y[labels!=cv] + test.x <- x[labels==cv,] + test.y <- y[labels==cv] + + cvfit <- linear_enet_al(train.x,train.y,length.lambda,mu,100,lambda,wcov,a,adjust,ncov,FALSE) + + apply(test.x %*% cvfit$beta,2,function(x) sum((test.y-x)^2)/length(test.y)) + + } - cvmse[,cv] <- apply(test.x %*% cvfit$beta,2,function(x) sum((test.y-x)^2)/length(test.y)) + stopCluster(cl) } - mean.cvmse <- rowMeans(cvmse) se.cvmse <- apply(cvmse,1,function(x) sd(x)/sqrt(ncv)) @@ -164,6 +190,8 @@ LogRatioLasso <- function(x, stepglmnet <- cv.glmnet(x=x.select.min,y=y,type.measure = "mse",family="gaussian") x.select.min <- x.select.min[,which(stepglmnet$glmnet.fit$beta[,stepglmnet$index[1]]!=0)] idxs <- idxs[,which(stepglmnet$glmnet.fit$beta[,stepglmnet$index[1]]!=0)] + }else{ + idxs <- as.vector(idxs) } df_step2 <- data.frame(y=y,x=x.select.min) @@ -201,6 +229,8 @@ LogRatioLasso <- function(x, stepglmnet <- cv.glmnet(x=x.select.min,y=y,type.measure = "mse",family="gaussian") x.select.min <- x.select.min[,which(stepglmnet$glmnet.fit$beta[,stepglmnet$index[1]]!=0)] idxs <- idxs[,which(stepglmnet$glmnet.fit$beta[,stepglmnet$index[1]]!=0)] + }else{ + idxs <- as.vector(idxs) } df_step2 <- data.frame(y=y,x=x.select.min) step2fit <- suppressWarnings(step(glm(y~.,data=df_step2,family=gaussian),trace=0)) diff --git a/R/LogRatioLogisticLasso.R b/R/LogRatioLogisticLasso.R index cb62131..97c01f8 100644 --- a/R/LogRatioLogisticLasso.R +++ b/R/LogRatioLogisticLasso.R @@ -12,7 +12,8 @@ LogRatioLogisticLasso <- function(x, progress=TRUE, plot=TRUE, loop1=FALSE, - loop2=FALSE){ + loop2=FALSE, + ncore=1){ ptm <- proc.time() @@ -54,18 +55,44 @@ LogRatioLogisticLasso <- function(x, labels <- foldid } - for (cv in 1:ncv){ + if (ncore == 1){ - if (progress) cat(paste0("Algorithm running for cv dataset ",cv," out of ",ncv,": \n")) + for (cv in 1:ncv){ + + if (progress) cat(paste0("Algorithm running for cv dataset ",cv," out of ",ncv,": \n")) + + train.x <- x[labels!=cv,] + train.y <- y[labels!=cv] + test.x <- x[labels==cv,] + test.y <- y[labels==cv] + + cvfit <- logistic_enet_al(train.x,train.y,length.lambda,mu,100,lambda,wcov,a,adjust,ncov,progress,loop1,loop2) + + cvmse[,cv] <- apply(cbind(1,test.x) %*% rbind(t(cvfit$beta0),cvfit$beta),2,function(x) sum((test.y- exp(x)/(1+exp(x)))^2)/length(test.y)) + + } + + }else if (ncore > 1){ - train.x <- x[labels!=cv,] - train.y <- y[labels!=cv] - test.x <- x[labels==cv,] - test.y <- y[labels==cv] + if (progress) warning(paste0("Using ", ncore ," core for cross-validation computation.")) - cvfit <- logistic_enet_al(train.x,train.y,length.lambda,mu,100,lambda,wcov,a,adjust,ncov,progress,loop1,loop2) + cl <- makeCluster(ncore) + registerDoParallel(cl) + + cvmse <- foreach(cv=1:ncv,.combine=cbind) %dopar% { + + train.x <- x[labels!=cv,] + train.y <- y[labels!=cv] + test.x <- x[labels==cv,] + test.y <- y[labels==cv] + + cvfit <- logistic_enet_al(train.x,train.y,length.lambda,mu,100,lambda,wcov,a,adjust,ncov,FALSE,loop1,loop2) + + apply(cbind(1,test.x) %*% rbind(t(cvfit$beta0),cvfit$beta),2,function(x) sum((test.y- exp(x)/(1+exp(x)))^2)/length(test.y)) + + } - cvmse[,cv] <- apply(cbind(1,test.x) %*% rbind(t(cvfit$beta0),cvfit$beta),2,function(x) sum((test.y- exp(x)/(1+exp(x)))^2)/length(test.y)) + stopCluster(cl) } @@ -164,16 +191,14 @@ LogRatioLogisticLasso <- function(x, stepglmnet <- cv.glmnet(x=x.select.min,y=y,type.measure = "mse",family="binomial") x.select.min <- x.select.min[,which(stepglmnet$glmnet.fit$beta[,stepglmnet$index[1]]!=0)] idxs <- idxs[,which(stepglmnet$glmnet.fit$beta[,stepglmnet$index[1]]!=0)] + }else{ + idxs <- as.vector(idxs) } df_step2 <- data.frame(y=y,x=x.select.min) step2fit <- suppressWarnings(step(glm(y~.,data=df_step2,family=binomial),trace=0)) vars <- as.numeric(sapply(names(step2fit$coefficients),function(x) strsplit(x,split = "[.]")[[1]][2])) - if (ncol(idxs) == 1 & length(vars) == 2){ - vars = 1 - } - if (is.null(ncol(idxs))){ if (length(vars) == 2){ selected <- idxs @@ -196,7 +221,6 @@ LogRatioLogisticLasso <- function(x, if (length(which(ret$best.beta$add.1se!=0)) > 1){ idxs <- combn(which(ret$best.beta$add.1se!=0),2) - x.select.min <- matrix(NA,nrow=n,ncol=ncol(idxs)) for (k in 1:ncol(idxs)){ x.select.min[,k] <- x[,idxs[1,k]] - x[,idxs[2,k]] @@ -206,6 +230,8 @@ LogRatioLogisticLasso <- function(x, stepglmnet <- cv.glmnet(x=x.select.min,y=y,type.measure = "mse",family="binomial") x.select.min <- x.select.min[,which(stepglmnet$glmnet.fit$beta[,stepglmnet$index[1]]!=0)] idxs <- idxs[,which(stepglmnet$glmnet.fit$beta[,stepglmnet$index[1]]!=0)] + }else{ + idxs <- as.vector(idxs) } df_step2 <- data.frame(y=y,x=x.select.min) diff --git a/R/LogRatioTDCoxLasso.R b/R/LogRatioTDCoxLasso.R index 79cacdf..7d24508 100644 --- a/R/LogRatioTDCoxLasso.R +++ b/R/LogRatioTDCoxLasso.R @@ -12,7 +12,8 @@ LogRatioTDCoxLasso <- function(x, step2=FALSE, progress=TRUE, plot=TRUE, - mcv="Deviance"){ + mcv="Deviance", + ncore=1){ ptm <- proc.time() @@ -42,7 +43,7 @@ LogRatioTDCoxLasso <- function(x, sfun = d - expect # hfun <- expect - sapply(t,function(x) sum(1/denomj[which(tj <= x)]^2)) + 1e-8 # hessian # z <- sfun/hfun - + if (a > 0){ lambda0 <- max(abs(t(sfun) %*% x))/(a*n) }else if (a == 0){ @@ -82,66 +83,122 @@ LogRatioTDCoxLasso <- function(x, # labels <- caret::createFolds(factor(d),k=ncv) - for (cv in 1:ncv){ - - if (progress) cat(paste0("Algorithm running for cv dataset ",cv," out of ",ncv,": \n")) - - # train.x <- x[-labels[[cv]],] - # train.d <- d[-labels[[cv]]] - # train.t <- t[-labels[[cv]]] - # test.x <- x[labels[[cv]],] - # test.d <- d[labels[[cv]]] - # test.t <- t[labels[[cv]]] - - train.x <- x[labels!=cv,] - train.d <- d[labels!=cv] - train.t0 <- t0[labels!=cv] - train.t1 <- t1[labels!=cv] - train.tt <- y[labels!=cv,1:2] - - test.x <- x[labels==cv,] - test.d <- d[labels==cv] - test.t0 <- t0[labels==cv] - test.t1 <- t1[labels==cv] - test.tt <- y[labels==cv,1:2] + if (ncore == 1){ - cv.devnull <- 0 - train.tj <- sort(train.t1[train.d==1]) - for (j in 1:length(train.tj)){ - cv.devnull <- cv.devnull + log(sum(train.t1 >= train.tj[1] & train.t0 < train.tj[1])) + for (cv in 1:ncv){ + + if (progress) cat(paste0("Algorithm running for cv dataset ",cv," out of ",ncv,": \n")) + + # train.x <- x[-labels[[cv]],] + # train.d <- d[-labels[[cv]]] + # train.t <- t[-labels[[cv]]] + # test.x <- x[labels[[cv]],] + # test.d <- d[labels[[cv]]] + # test.t <- t[labels[[cv]]] + + train.x <- x[labels!=cv,] + train.d <- d[labels!=cv] + train.t0 <- t0[labels!=cv] + train.t1 <- t1[labels!=cv] + train.tt <- y[labels!=cv,1:2] + + test.x <- x[labels==cv,] + test.d <- d[labels==cv] + test.t0 <- t0[labels==cv] + test.t1 <- t1[labels==cv] + test.tt <- y[labels==cv,1:2] + + cv.devnull <- 0 + train.tj <- sort(train.t1[train.d==1]) + for (j in 1:length(train.tj)){ + cv.devnull <- cv.devnull + log(sum(train.t1 >= train.tj[1] & train.t0 < train.tj[1])) + } + cv.devnull <- 2*cv.devnull + + cvfit <- cox_timedep_enet_al(train.x,train.t0,train.t1,train.d,train.tj,length(lidx),mu,100,lambda[lidx],wcov,a,adjust,ncov,cv.devnull,progress) + + cv.devnull <- 0 + loglik <- rep(0,length(lidx)) + linprod <- test.x %*% cvfit$beta + + if (mcv == "Deviance"){ + + test.tj <- sort(test.t1[test.d==1]) + for (j in 1:length(test.tj)){ + cv.devnull <- cv.devnull + log(sum(test.t1 >= test.tj[j] & test.t0 < test.tj[j])) + if (sum(test.t1 >= test.tj[j] & test.t0 < test.tj[j]) > 1){ + cvdev[,cv] <- cvdev[,cv] + linprod[test.t1 == test.tj[j],] - + log(colSums(exp(linprod[test.t1 >= test.tj[j] & test.t0 < test.tj[j],])) + 1e-8) + }else if (sum(test.t1 >= test.tj[j] & test.t0 < test.tj[j]) == 1){ + cvdev[,cv] <- cvdev[,cv] + linprod[test.t1 == test.tj[j],] - + log(exp(linprod[test.t1 >= test.tj[j] & test.t0 < test.tj[j],]) + 1e-8) + } + #- log(accu(link(widx))+1e-8) + } + + cvdevnull[cv] <- 2*cv.devnull + cvdev[,cv] <- -2*cvdev[,cv] + + } + + # cvmse[,cv] <- apply(cbind(1,test.x) %*% rbind(t(cvfit$beta0),cvfit$beta),2,function(x) sum((test.y - exp(x)/(1+exp(x)))^2)/length(test.y)) + } - cv.devnull <- 2*cv.devnull - cvfit <- cox_timedep_enet_al(train.x,train.t0,train.t1,train.d,train.tj,length(lidx),mu,100,lambda[lidx],wcov,a,adjust,ncov,cv.devnull,progress) + }else if (ncore > 1){ - cv.devnull <- 0 - loglik <- rep(0,length(lidx)) - linprod <- test.x %*% cvfit$beta + if (progress) warning(paste0("Using ", ncore ," core for cross-validation computation.")) - if (mcv == "Deviance"){ + cl <- makeCluster(ncore) + registerDoParallel(cl) + + cvdev <- foreach(cv=1:ncv,.combine=cbind) %dopar% { + + train.x <- x[labels!=cv,] + train.d <- d[labels!=cv] + train.t0 <- t0[labels!=cv] + train.t1 <- t1[labels!=cv] + train.tt <- y[labels!=cv,1:2] + + test.x <- x[labels==cv,] + test.d <- d[labels==cv] + test.t0 <- t0[labels==cv] + test.t1 <- t1[labels==cv] + test.tt <- y[labels==cv,1:2] + + cv.devnull <- 0 + train.tj <- sort(train.t1[train.d==1]) + for (j in 1:length(train.tj)){ + cv.devnull <- cv.devnull + log(sum(train.t1 >= train.tj[1] & train.t0 < train.tj[1])) + } + cv.devnull <- 2*cv.devnull + + cvfit <- cox_timedep_enet_al(train.x,train.t0,train.t1,train.d,train.tj,length(lidx),mu,100,lambda[lidx],wcov,a,adjust,ncov,cv.devnull,FALSE) + + cv.dev <- rep(0,length(lidx)) + linprod <- test.x %*% cvfit$beta test.tj <- sort(test.t1[test.d==1]) for (j in 1:length(test.tj)){ - cv.devnull <- cv.devnull + log(sum(test.t1 >= test.tj[j] & test.t0 < test.tj[j])) if (sum(test.t1 >= test.tj[j] & test.t0 < test.tj[j]) > 1){ - cvdev[,cv] <- cvdev[,cv] + linprod[test.t1 == test.tj[j],] - + cv.dev <- cv.dev + linprod[test.t1 == test.tj[j],] - log(colSums(exp(linprod[test.t1 >= test.tj[j] & test.t0 < test.tj[j],])) + 1e-8) }else if (sum(test.t1 >= test.tj[j] & test.t0 < test.tj[j]) == 1){ - cvdev[,cv] <- cvdev[,cv] + linprod[test.t1 == test.tj[j],] - + cv.dev <- cv.dev + linprod[test.t1 == test.tj[j],] - log(exp(linprod[test.t1 >= test.tj[j] & test.t0 < test.tj[j],]) + 1e-8) } - #- log(accu(link(widx))+1e-8) } - cvdevnull[cv] <- 2*cv.devnull - cvdev[,cv] <- -2*cvdev[,cv] - + cv.dev <- -2*cv.dev + cv.dev } - # cvmse[,cv] <- apply(cbind(1,test.x) %*% rbind(t(cvfit$beta0),cvfit$beta),2,function(x) sum((test.y - exp(x)/(1+exp(x)))^2)/length(test.y)) + stopCluster(cl) } + + mean.cvdev <- rowMeans(cvdev) se.cvdev <- apply(cvdev,1,function(x) sd(x)/sqrt(ncv)) @@ -233,6 +290,8 @@ LogRatioTDCoxLasso <- function(x, stepglmnet <- cv.glmnet(x=x.select.min,y=Surv(t0,t1,d),type.measure = "deviance",family="cox") x.select.min <- x.select.min[,which(stepglmnet$glmnet.fit$beta[,stepglmnet$index[1]]!=0)] idxs <- idxs[,which(stepglmnet$glmnet.fit$beta[,stepglmnet$index[1]]!=0)] + }else{ + idxs <- as.vector(idxs) } df_step2 <- data.frame(t0=t0,t1=t1,d=d,x=x.select.min) @@ -267,6 +326,8 @@ LogRatioTDCoxLasso <- function(x, stepglmnet <- cv.glmnet(x=x.select.min,y=Surv(t0,t1,d),type.measure = "deviance",family="cox") x.select.min <- x.select.min[,which(stepglmnet$glmnet.fit$beta[,stepglmnet$index[1]]!=0)] idxs <- idxs[,which(stepglmnet$glmnet.fit$beta[,stepglmnet$index[1]]!=0)] + }else{ + idxs <- as.vector(idxs) } df_step2 <- data.frame(t0=t0,t1=t1,d=d,x=x.select.min) diff --git a/R/coxsplit.R b/R/coxsplit.R index 7d6bf9f..bf9ea21 100644 --- a/R/coxsplit.R +++ b/R/coxsplit.R @@ -28,5 +28,14 @@ coxsplitss=function(y, id, nfolds){ foldid <- full$foldid + return(foldid) +} + +fgfoldid=function(id, foldid){ + idfoldid <- data.frame(id=unique(id),foldid=foldid) + yfoldid <- data.frame(id=id) + mergedid <- yfoldid %>% left_join(idfoldid,by="id") + + foldid <- mergedid$foldid return(foldid) } \ No newline at end of file diff --git a/R/simu.R b/R/simu.R index e4172b7..f735798 100644 --- a/R/simu.R +++ b/R/simu.R @@ -172,7 +172,7 @@ simu <- function(n = 100, p.cif = 0.66 lambda <- exp(eta) cl=0.19 - cu=1.09 + cu=10 P1 <- 1-(1-p.cif)^(lambda) epsilon <- rep(0,n) @@ -195,10 +195,9 @@ simu <- function(n = 100, #generate censoring time c <- runif(n,cl,cu) #observed time - t <- t0*I(t0<=c) + c*I(t0>c) - + t <- ifelse(t0 == Inf, c ,t0*I(t0<=c) + c*I(t0>c)) # outcome - d <- 0*I(t == c) + epsilon*I(t < c) + d <- ifelse(t0 == Inf, 0, 0*I(t == c) + epsilon*I(t < c)) ret <- list(xcount=xcount,x=xobs,t=t,d=d,beta=c(beta,rep(0,p-weak-strong)),idx=true_set) diff --git a/README.Rmd b/README.Rmd index 200a0ec..6ce2598 100644 --- a/README.Rmd +++ b/README.Rmd @@ -24,11 +24,13 @@ knitr::opts_chunk$set( The `FLORAL` package is an open-source computational tool to perform log-ratio lasso regression modeling and compositional feature selection for continuous, binary, time-to-event, and competing risk outcomes. The proposed method adapts the augmented Lagrangian algorithm for a zero-sum constraint optimization problem while enabling a two-stage screening process for extended false-positive control. -The associated preprint for `FLORAL` is available on [bioRxiv](https://www.biorxiv.org/content/10.1101/2023.05.02.538599v1). +The associated preprint for `FLORAL` is available on [bioRxiv](https://www.biorxiv.org/content/10.1101/2023.05.02.538599). -## Installation +## System requirements and installation -You can install `FLORAL` with the following code. +The current version of `FLORAL` (0.2.0) was built in R version 4.1.1. R package dependencies can be found in the `DESCRIPTION` file. + +You can install `FLORAL` with the following code. The installation is typically complete within minutes. ``` r install.packages("FLORAL") @@ -43,9 +45,9 @@ devtools::install_github("vdblab/FLORAL") ## Example -Here is a toy example for linear regression with 10-fold cross-validation for a simulated data with 50 samples and 100 compositional features. Option `progress=TRUE` can be used to show the progress bar of the running algorithm. +Here is a toy example for linear regression with 10-fold cross-validation for a simulated data with 50 samples and 100 compositional features. Option `progress=TRUE` can be used to show the progress bar of the running algorithm. -The data simulation procedure is described in the preprint. +The data simulation procedure is described in the preprint. The expected run time for the following demo is about a minute. ```{r example} set.seed(23420) @@ -99,10 +101,14 @@ dat.fg <- simu(n=50,p=100,model="finegray") fit.fg <- FLORAL(dat.cox$xcount,survival::Surv(dat.cox$t,dat.cox$d,type="mstate"),family="finegray",ncv=10,progress=FALSE,step2=FALSE) ``` +## Repository for Reproducibility + +Reproducible code for the analyses results reported in the manuscript can be found at [this repository](https://github.com/vdblab/FLORAL-analysis). + ## Contributing The `FLORAL` package is jointly managed by [MSKCC Biostatistics service](https://www.mskcc.org/departments/epidemiology-biostatistics/biostatistics) and [the Marcel van den Brink Lab](https://vandenbrinklab.org/). Please note that the `FLORAL` project is released with a [Contributor Code of Conduct](https://github.com/vdblab/FLORAL/blob/master/.github/CODE_OF_CONDUCT.md). By contributing to this project, you agree to abide by its terms. Thank you to all contributors! ## Reference -Fei T, Funnell T, Waters NR, Raj SS, Devlin SM, Dai A, Miltiadous O, Shouval R, Lv M, Peled JU, Ponce DM, Perales M-A, Gönen M, van den Brink MRM, Scalable Log-ratio Lasso Regression Enhances Microbiome Feature Selection for Predictive Models, bioRxiv 2023.05.02.538599; doi: https://doi.org/10.1101/2023.05.02.538599. \ No newline at end of file +Fei T, Funnell T, Waters NR, Raj SS, Sadeghi K, Dai A, Miltiadous O, Shouval R, Lv M, Peled JU, Ponce DM, Perales M-A, Gönen M, van den Brink MRM, Enhanced Feature Selection for Microbiome Data using FLORAL: Scalable Log-ratio Lasso Regression, bioRxiv 2023.05.02.538599; doi: https://doi.org/10.1101/2023.05.02.538599. \ No newline at end of file diff --git a/README.md b/README.md index 88c9057..785512b 100644 --- a/README.md +++ b/README.md @@ -20,11 +20,15 @@ constraint optimization problem while enabling a two-stage screening process for extended false-positive control. The associated preprint for `FLORAL` is available on -[bioRxiv](https://www.biorxiv.org/content/10.1101/2023.05.02.538599v1). +[bioRxiv](https://www.biorxiv.org/content/10.1101/2023.05.02.538599). -## Installation +## System requirements and installation -You can install `FLORAL` with the following code. +The current version of `FLORAL` (0.2.0) was built in R version 4.1.1. R +package dependencies can be found in the `DESCRIPTION` file. + +You can install `FLORAL` with the following code. The installation is +typically complete within minutes. ``` r install.packages("FLORAL") @@ -45,7 +49,8 @@ cross-validation for a simulated data with 50 samples and 100 compositional features. Option `progress=TRUE` can be used to show the progress bar of the running algorithm. -The data simulation procedure is described in the preprint. +The data simulation procedure is described in the preprint. The expected +run time for the following demo is about a minute. ``` r set.seed(23420) @@ -62,10 +67,10 @@ coefficients, use `fit$pmse` or `fit$pcoef`: To view selected compositional features, use `fit$selected.feature`, where features are sorted by their names. Features under `min` and `1se` -correspond to penalty parameter *λ*min and *λ*1se, -respectively. Features under `min.2stage` and `1se.2stage` are obtained -after applying 2-stage filtering based on features under `min` and -`1se`, respectively. +correspond to penalty parameter $\lambda_{\min}$ and +$\lambda_{\text{1se}}$, respectively. Features under `min.2stage` and +`1se.2stage` are obtained after applying 2-stage filtering based on +features under `min` and `1se`, respectively. We recommend interpreting the selected compositional features as potential predictive markers to the outcome in the regression model in @@ -110,14 +115,14 @@ fit$step2.ratios #> [6] "taxa5/taxa8" "taxa6/taxa7" "taxa6/taxa9" "taxa7/taxa10" "taxa9/taxa32" #> #> $min.idx -#> [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] -#> [1,] 1 1 1 2 3 3 5 6 7 7 8 9 9 -#> [2,] 13 20 84 5 8 92 8 9 10 79 60 32 92 +#> [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14] +#> [1,] NA 1 1 1 2 3 3 5 6 7 7 8 9 9 +#> [2,] NA 13 20 84 5 8 92 8 9 10 79 60 32 92 #> #> $`1se.idx` -#> [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] -#> [1,] 1 1 1 2 3 5 6 6 7 9 -#> [2,] 13 20 84 5 8 8 7 9 10 32 +#> [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] +#> [1,] NA 1 1 1 2 3 5 6 6 7 9 +#> [2,] NA 13 20 84 5 8 8 7 9 10 32 ``` More detailed interpretations can be obtained for the selected @@ -153,6 +158,12 @@ dat.fg <- simu(n=50,p=100,model="finegray") fit.fg <- FLORAL(dat.cox$xcount,survival::Surv(dat.cox$t,dat.cox$d,type="mstate"),family="finegray",ncv=10,progress=FALSE,step2=FALSE) ``` +## Repository for Reproducibility + +Reproducible code for the analyses results reported in the manuscript +can be found at [this +repository](https://github.com/vdblab/FLORAL-analysis). + ## Contributing The `FLORAL` package is jointly managed by [MSKCC Biostatistics @@ -165,8 +176,8 @@ you to all contributors! ## Reference -Fei T, Funnell T, Waters NR, Raj SS, Devlin SM, Dai A, Miltiadous O, +Fei T, Funnell T, Waters NR, Raj SS, Sadeghi K, Dai A, Miltiadous O, Shouval R, Lv M, Peled JU, Ponce DM, Perales M-A, Gönen M, van den Brink -MRM, Scalable Log-ratio Lasso Regression Enhances Microbiome Feature -Selection for Predictive Models, bioRxiv 2023.05.02.538599; doi: +MRM, Enhanced Feature Selection for Microbiome Data using FLORAL: +Scalable Log-ratio Lasso Regression, bioRxiv 2023.05.02.538599; doi: . diff --git a/inst/WORDLIST b/inst/WORDLIST index 11274b8..5b0f824 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -4,7 +4,6 @@ CRC Compositional DM Dai -Devlin Gönen Interpretable JU @@ -17,6 +16,8 @@ Miltiadous Peled Perales RAtio +Reproducibility +Sadeghi Scalable Shouval al @@ -35,4 +36,3 @@ ncv preprint reproducibility se -λ diff --git a/man/FLORAL.Rd b/man/FLORAL.Rd index 26cdd14..156520e 100644 --- a/man/FLORAL.Rd +++ b/man/FLORAL.Rd @@ -19,6 +19,7 @@ FLORAL( a = 1, mu = 1, ncv = 5, + ncore = 1, intercept = FALSE, foldid = NULL, step2 = TRUE, @@ -55,6 +56,8 @@ FLORAL( \item{ncv}{Folds of cross-validation. Use \code{NULL} if cross-validation is not wanted.} +\item{ncore}{Number of cores for parallel computing for cross-validation. Default is 1.} + \item{intercept}{\code{TRUE} or \code{FALSE}, indicating whether an intercept should be estimated.} \item{foldid}{A vector of fold indicator. Default is \code{NULL}.}