Merge pull request #261 from jr-leary7/dev

Dev
jr-leary7 · Oct 25, 2024 · 6f385a9 · 6f385a9
2 parents 21fcb73 + 621866e
commit 6f385a9
Show file tree

Hide file tree

Showing 13 changed files with 73 additions and 144 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -55,19 +55,16 @@ importFrom(dplyr,contains)
 importFrom(dplyr,desc)
 importFrom(dplyr,distinct)
 importFrom(dplyr,filter)
-importFrom(dplyr,group_by)
 importFrom(dplyr,if_else)
 importFrom(dplyr,inner_join)
 importFrom(dplyr,lag)
 importFrom(dplyr,lead)
 importFrom(dplyr,mutate)
-importFrom(dplyr,ntile)
 importFrom(dplyr,pull)
 importFrom(dplyr,relocate)
 importFrom(dplyr,rename)
 importFrom(dplyr,row_number)
 importFrom(dplyr,rowwise)
-importFrom(dplyr,sample_frac)
 importFrom(dplyr,select)
 importFrom(dplyr,slice_head)
 importFrom(dplyr,summarise)

diff --git a/NEWS.md b/NEWS.md
@@ -7,6 +7,7 @@
 + Minor bug fixes. 
 + More improved matrix operations using `RcppEigen`. 
 + Improved support for `cell_data_set` objects from `monocle3`. 
++ Sped up GLMM mode. 
 
 # Changes in v0.8.4
 

diff --git a/R/fitGLMM.R b/R/fitGLMM.R
@@ -2,7 +2,7 @@
 #'
 #' @name fitGLMM
 #' @author Jack R. Leary
-#' @description Fits a negative binomial generalized linear mixed model using truncated power basis function splines as input. The basis matrix can be created adaptively using subject-specific estimation of optimal knots using \code{\link{marge2}}, or basis functions can be evenly space across quantiles. The resulting model can output subject-specific and population-level fitted values.
+#' @description Fits a negative-binomial generalized linear mixed model using truncated power basis function splines as input. The basis matrix can be created adaptively using subject-specific estimation of optimal knots using \code{\link{marge2}}, or basis functions can be evenly spaced across quantiles. The resulting model can output subject-specific and population-level fitted values.
 #' @importFrom purrr map map_dbl map_dfr pmap_dfc reduce
 #' @importFrom dplyr mutate select if_else
 #' @importFrom mpath glmregNB 

diff --git a/R/marge2.R b/R/marge2.R
@@ -8,7 +8,7 @@
 #' @description MARS fitting function for negative binomial generalized linear models (GLMs) & generalized estimating equations (GEEs).
 #' @import glm2
 #' @import magrittr
-#' @importFrom dplyr mutate ntile group_by sample_frac
+#' @importFrom dplyr mutate
 #' @importFrom geeM geem
 #' @importFrom MASS glm.nb negative.binomial theta.mm
 #' @importFrom utils tail
@@ -24,7 +24,7 @@
 #' @param cor.structure If \code{is.gee = TRUE}, a string specifying the desired correlation structure for the NB GEE. Defaults to "ar1".
 #' @param sandwich.var (Optional) Should the sandwich variance estimator be used instead of the model-based estimator? Default to FALSE.
 #' @param approx.knot (Optional) Should the set of candidate knots be subsampled in order to speed up computation? This has little effect on the final fit, but can improve computation time somewhat. Defaults to TRUE.
-#' @param n.knot.max (Optional) The maximum number of candidate knots to consider. Uses random sampling (don't worry, a random seed is set internally) to select this number of unique values from the reduced set of all candidate knots. Defaults to 50.
+#' @param n.knot.max (Optional) The maximum number of candidate knots to consider. Uses uniform sampling to select this number of unique values from the reduced set of all candidate knots. Defaults to 50.
 #' @param glm.backend (Optional) Character specifying which GLM-fitting backend should be used. Must be one of "MASS" or "speedglm". Defaults to "MASS".
 #' @param tols_score (Optional) The set tolerance for monitoring the convergence for the difference in score statistics between the parent and candidate model (this is the lack-of-fit criterion used for MARGE). Defaults to 0.00001.
 #' @param minspan (Optional) A set minimum span value. Defaults to NULL.
@@ -180,9 +180,7 @@ marge2 <- function(X_pred = NULL,
       score_knot_both_add_mat <- NULL
       score_knot_one_int_mat <- NULL
       score_knot_one_add_mat <- NULL
-
       int.count1 <- 0
-
       in.set <- ifelse(ncol(B) > 1, sum(!var_name_vec %in% var_name), 0)
 
       for (t in seq_along(X_red)) {
@@ -811,6 +809,12 @@ marge2 <- function(X_pred = NULL,
   # Some final model output, WIC, GCV etc.
   B_final <- as.matrix(B[, colnames(B) %in% cnames_2[[which.min(WIC_vec_2)]]])
   model_df <- as.data.frame(B_final)
+  # remove duplicate hinge functions if present 
+  if (length(unique(colnames(model_df))) != ncol(model_df)) {
+    unique_cols <- which(!duplicated(as.list(model_df)))
+    B_final <- B_final[, unique_cols]
+    model_df <- model_df[, unique_cols]
+  }
   if (ncol(model_df) == 1) {
     colnames(model_df) <- c("Intercept")
     model_formula <- "Y ~ -1 + Intercept"
@@ -825,8 +829,7 @@ marge2 <- function(X_pred = NULL,
   }
   if (!is.glmm) {
     if (!is.null(Y.offset)) {
-      model_df <- dplyr::mutate(model_df,
-                                cell_offset = Y.offset)
+      model_df <- dplyr::mutate(model_df, cell_offset = Y.offset)
       model_formula <- paste0(model_formula, " + ", "offset(log(1 / cell_offset))")  # this is correct i promise
     }
     model_formula <- stats::as.formula(model_formula)

diff --git a/R/score_fun_gee.R b/R/score_fun_gee.R
@@ -57,56 +57,28 @@ score_fun_gee <- function(Y = NULL,
       AWA.est_i <- AWA.est_list[[i]]
       J2_i <- J2_list[[i]]
       Sigma2_i <- Sigma2_list[[i]]
-      D.est_i <- eigenMapMatMult(A = diag(mu.est[(sum(n_vec1[seq(i)]) + 1):k], nrow = n_vec[i], ncol = n_vec[i]),
-                                 B = XA[(sum(n_vec1[seq(i)]) + 1):k, ],
-                                 n_cores = 1)
+      D.est_i <- eigenMapMatMult(A = diag(mu.est[(sum(n_vec1[seq(i)]) + 1):k], nrow = n_vec[i], ncol = n_vec[i]), B = XA[(sum(n_vec1[seq(i)]) + 1):k, ])
       D_est_i_transpose <- t(D.est_i)
-      J21 <- J21 + eigenMapMatMult(A = D_est_i_transpose,
-                                   B = t(J2_i),
-                                   n_cores = 1)
-      Sigma21 <- Sigma21 + eigenMapMatMult(A = D_est_i_transpose,
-                                           B = t(Sigma2_i),
-                                           n_cores = 1)
-      B.est <- B.est + eigenMapMatMult(A = D_est_i_transpose,
-                                       B = VS.est_i,
-                                       n_cores = 1)
-      temp_prod <- eigenMapMatMult(A = D_est_i_transpose,
-                                   B = AWA.est_i,
-                                   n_cores = 1)
-      Sigma22 <- Sigma22 + eigenMapMatMult(A = temp_prod,
-                                           B = D.est_i,
-                                           n_cores = 1)
+      J21 <- J21 + eigenMapMatMult(A = D_est_i_transpose, B = t(J2_i))
+      Sigma21 <- Sigma21 + eigenMapMatMult(A = D_est_i_transpose, B = t(Sigma2_i))
+      B.est <- B.est + eigenMapMatMult(A = D_est_i_transpose, B = VS.est_i)
+      temp_prod <- eigenMapMatMult(A = D_est_i_transpose, B = AWA.est_i)
+      Sigma22 <- Sigma22 + eigenMapMatMult(A = temp_prod, B = D.est_i)
     }
-    temp_prod_1 <- eigenMapMatMult(A = J21,
-                                   B = J11.inv,
-                                   n_cores = 1)
-    temp_prod_1 <- eigenMapMatMult(A = temp_prod_1,
-                                   B = t(Sigma21),
-                                   n_cores = 1)
-    temp_prod_2 <- eigenMapMatMult(A = Sigma21,
-                                   B = J11.inv,
-                                   n_cores = 1)
+    temp_prod_1 <- eigenMapMatMult(A = J21, B = J11.inv)
+    temp_prod_1 <- eigenMapMatMult(A = temp_prod_1, B = t(Sigma21))
+    temp_prod_2 <- eigenMapMatMult(A = Sigma21, B = J11.inv)
     J21_transpose <- t(J21)
-    temp_prod_2 <- eigenMapMatMult(A = temp_prod_2,
-                                   B = J21_transpose,
-                                   n_cores = 1)
-    temp_prod_3 <- eigenMapMatMult(A = J21,
-                                   B = JSigma11,
-                                   n_cores = 1)
-    temp_prod_3 <- eigenMapMatMult(A = temp_prod_3,
-                                   B = J21_transpose,
-                                   n_cores = 1)
+    temp_prod_2 <- eigenMapMatMult(A = temp_prod_2, B = J21_transpose)
+    temp_prod_3 <- eigenMapMatMult(A = J21, B = JSigma11)
+    temp_prod_3 <- eigenMapMatMult(A = temp_prod_3, B = J21_transpose)
     Sigma <- Sigma22 - temp_prod_1 - temp_prod_2 + temp_prod_3
-    Sigma_inv <- try({ eigenMapMatrixInvert(Sigma, n_cores = 1L) }, silent = TRUE)
+    Sigma_inv <- try({ eigenMapMatrixInvert(Sigma) }, silent = TRUE)
     if (inherits(Sigma_inv, "try-error")) {
-      Sigma_inv <- eigenMapPseudoInverse(Sigma, n_cores = 1L)
+      Sigma_inv <- eigenMapPseudoInverse(Sigma)
     }
-    temp_prod <- eigenMapMatMult(A = t(B.est),
-                                 B = Sigma_inv,
-                                 n_cores = 1)
-    score <- eigenMapMatMult(A = temp_prod,
-                             B = B.est,
-                             n_cores = 1)
+    temp_prod <- eigenMapMatMult(A = t(B.est), B = Sigma_inv)
+    score <- eigenMapMatMult(A = temp_prod, B = B.est)
   }
   res <- list(score = score)
   return(res)

diff --git a/R/score_fun_glm.R b/R/score_fun_glm.R
@@ -36,32 +36,18 @@ score_fun_glm <- function(Y = NULL,
     VS.est_i <- unlist(VS.est_list)
     A_list_i <- unlist(A_list)
     B1_list_i <- unlist(B1_list)
-    B_list_i <- eigenMapMatMult(A = B1_list_i,
-                                B = XA,
-                                n_cores = 1)
-    D_list_i <- eigenMapMatMult(A = t(XA),
-                                B = (XA * c(mu.est^2 / V.est)),
-                                n_cores = 1)
-    temp_prod <- eigenMapMatMult(A = t(B_list_i),
-                                 B = A_list_i,
-                                 n_cores = 1)
-    temp_prod <- eigenMapMatMult(A = temp_prod,
-                                 B = B_list_i,
-                                 n_cores = 1)
+    B_list_i <- eigenMapMatMult(A = B1_list_i, B = XA)
+    D_list_i <- eigenMapMatMult(A = t(XA), B = (XA * c(mu.est^2 / V.est)))
+    temp_prod <- eigenMapMatMult(A = t(B_list_i), B = A_list_i)
+    temp_prod <- eigenMapMatMult(A = temp_prod, B = B_list_i)
     inv.XVX_22 <- D_list_i - temp_prod
-    B.est <- eigenMapMatMult(A = t(mu.est * VS.est_i),
-                             B = XA,
-                             n_cores = 1)
-    XVX_22 <- try({ eigenMapMatrixInvert(inv.XVX_22, n_cores = 1L) }, silent = TRUE)
+    B.est <- eigenMapMatMult(A = t(mu.est * VS.est_i), B = XA)
+    XVX_22 <- try({ eigenMapMatrixInvert(inv.XVX_22) }, silent = TRUE)
     if (inherits(XVX_22, "try-error")) {
-      XVX_22 <- eigenMapPseudoInverse(inv.XVX_22, n_cores = 1L)
+      XVX_22 <- eigenMapPseudoInverse(inv.XVX_22)
     }
-    temp_prod <- eigenMapMatMult(A = B.est,
-                                 B = XVX_22,
-                                 n_cores = 1)
-    score <- eigenMapMatMult(A = temp_prod,
-                             B = t(B.est),
-                             n_cores = 1)
+    temp_prod <- eigenMapMatMult(A = B.est, B = XVX_22)
+    score <- eigenMapMatMult(A = temp_prod, B = t(B.est))
   }
   res <- list(score = score)
   return(res)

diff --git a/R/stat_out_score_gee_null.R b/R/stat_out_score_gee_null.R
@@ -66,52 +66,26 @@ stat_out_score_gee_null <- function(Y = NULL,
     diag_sqrt_V_est <- diag(sqrt(V_est[temp_seq_n]),
                             nrow = n_vec[i],
                             ncol = n_vec[i])
-    temp_prod <- eigenMapMatMult(A = diag_sqrt_V_est,
-                                 B = R_alpha,
-                                 n_cores = 1)
-    V_est_i <- eigenMapMatMult(A = temp_prod,
-                               B = diag_sqrt_V_est,
-                               n_cores = 1)
-    V_est_i_inv <- try({ eigenMapMatrixInvert(V_est_i, n_cores = 1L) }, silent = TRUE)
+    temp_prod <- eigenMapMatMult(A = diag_sqrt_V_est, B = R_alpha)
+    V_est_i <- eigenMapMatMult(A = temp_prod, B = diag_sqrt_V_est)
+    V_est_i_inv <- try({ eigenMapMatrixInvert(V_est_i) }, silent = TRUE)
     if (inherits(V_est_i_inv, "try-error")) {
-      V_est_i_inv <- eigenMapPseudoInverse(V_est_i, n_cores = 1L)
+      V_est_i_inv <- eigenMapPseudoInverse(V_est_i)
     }
     S_est_i <- t(Y)[temp_seq_n] - mu_est_sub
-    temp_prod <- eigenMapMatMult(A = S_est_i,
-                                 B = t(S_est_i),
-                                 n_cores = 1)
-    temp_prod <- eigenMapMatMult(A = V_est_i_inv,
-                                 B = temp_prod,
-                                 n_cores = 1)
-    AWA_est_i <- eigenMapMatMult(A = temp_prod,
-                                 B = V_est_i_inv,
-                                 n_cores = 1)
-    D_est_i <- eigenMapMatMult(A = diag(mu_est_sub,
-                                        nrow = n_vec[i],
-                                        ncol = n_vec[i]),
-                               B = B_null[temp_seq_n, ],
-                               n_cores = 1)
+    temp_prod <- eigenMapMatMult(A = S_est_i, B = t(S_est_i))
+    temp_prod <- eigenMapMatMult(A = V_est_i_inv, B = temp_prod)
+    AWA_est_i <- eigenMapMatMult(A = temp_prod, B = V_est_i_inv)
+    D_est_i <- eigenMapMatMult(A = diag(mu_est_sub, nrow = n_vec[i], ncol = n_vec[i]), B = B_null[temp_seq_n, ])
     D_est_i_transpose <- t(D_est_i)
-    temp_prod <- eigenMapMatMult(A = D_est_i_transpose,
-                                 B = V_est_i_inv,
-                                 n_cores = 1)
-    J1_i <- eigenMapMatMult(A = temp_prod,
-                            B = D_est_i,
-                            n_cores = 1)
+    temp_prod <- eigenMapMatMult(A = D_est_i_transpose, B = V_est_i_inv)
+    J1_i <- eigenMapMatMult(A = temp_prod, B = D_est_i)
     J11 <- J11 + J1_i
-    J2_i <- eigenMapMatMult(A = D_est_i_transpose,
-                            B = V_est_i_inv,
-                            n_cores = 1)
-    Sigma2_i <- eigenMapMatMult(A = D_est_i_transpose,
-                                B = AWA_est_i,
-                                n_cores = 1)
-    Sigma1_i <- eigenMapMatMult(A = Sigma2_i,
-                                B = D_est_i,
-                                n_cores = 1)
+    J2_i <- eigenMapMatMult(A = D_est_i_transpose, B = V_est_i_inv)
+    Sigma2_i <- eigenMapMatMult(A = D_est_i_transpose, B = AWA_est_i)
+    Sigma1_i <- eigenMapMatMult(A = Sigma2_i, B = D_est_i)
     Sigma11 <- Sigma11 + Sigma1_i
-    V_est_list_elem_2 <- eigenMapMatMult(A = V_est_i_inv,
-                                         B = S_est_i,
-                                         n_cores = 1)
+    V_est_list_elem_2 <- eigenMapMatMult(A = V_est_i_inv, B = S_est_i)
     VS_est_list[[i]] <- V_est_list_elem_2
     AWA_est_list[[i]] <- AWA_est_i
     J2_list[[i]] <- J2_i
@@ -120,17 +94,13 @@ stat_out_score_gee_null <- function(Y = NULL,
   if (nrow(J11) == 1 && ncol(J11) == 1) {
     J11_inv <- 1 / J11
   } else {
-    J11_inv <- try({ eigenMapMatrixInvert(J11, n_cores = 1L) }, silent = TRUE)
+    J11_inv <- try({ eigenMapMatrixInvert(J11) }, silent = TRUE)
     if (inherits(J11_inv, "try-error")) {
-      J11_inv <- eigenMapPseudoInverse(J11, n_cores = 1L)
+      J11_inv <- eigenMapPseudoInverse(J11)
     }
   }
-  temp_prod <- eigenMapMatMult(A = J11_inv,
-                               B = Sigma11,
-                               n_cores = 1)
-  JSigma11 <- eigenMapMatMult(A = temp_prod,
-                              B = J11_inv,
-                              n_cores = 1)
+  temp_prod <- eigenMapMatMult(A = J11_inv, B = Sigma11)
+  JSigma11 <- eigenMapMatMult(A = temp_prod, B = J11_inv)
   res <- list(VS.est_list = VS_est_list,
               AWA.est_list = AWA_est_list,
               J2_list = J2_list,

diff --git a/R/stat_out_score_glm_null.R b/R/stat_out_score_glm_null.R
@@ -28,23 +28,17 @@ stat_out_score_glm_null <- function(Y = NULL, B_null = NULL) {
   V_est <- mu_est * (1 + mu_est * sigma_est)  # Type I NB variance = mu (1 + mu * sigma); sigma = 1 / theta
   VS_est_list <- (Y - mu_est) / V_est
   mu_V_diag <- diag(c(mu_est^2 / V_est))
-  temp_prod <- eigenMapMatMult(A = t(B_null),
-                               B = mu_V_diag,
-                               n_cores = 1)
-  A_list_inv <- eigenMapMatMult(A = temp_prod,
-                                B = B_null,
-                                n_cores = 1)
+  temp_prod <- eigenMapMatMult(A = t(B_null), B = mu_V_diag)
+  A_list_inv <- eigenMapMatMult(A = temp_prod, B = B_null)
   if (ncol(A_list_inv) == 1 && nrow(A_list_inv) == 1) {
     A_list <- 1 / A_list_inv
   } else {
-    A_list <- try({ eigenMapMatrixInvert(A_list_inv, n_cores = 1L) }, silent = TRUE)
+    A_list <- try({ eigenMapMatrixInvert(A_list_inv) }, silent = TRUE)
     if (inherits(A_list, "try-error")) {
-      A_list <- eigenMapPseudoInverse(A_list_inv, n_cores = 1L)
+      A_list <- eigenMapPseudoInverse(A_list_inv)
     }
   }
-  B1_list <- eigenMapMatMult(A = t(B_null),
-                             B = mu_V_diag,
-                             n_cores = 1)
+  B1_list <- eigenMapMatMult(A = t(B_null), B = mu_V_diag)
   res <- list(VS.est_list = VS_est_list,
               A_list = A_list,
               B1_list = B1_list,

diff --git a/man/fitGLMM.Rd b/man/fitGLMM.Rd
diff --git a/man/marge2.Rd b/man/marge2.Rd
diff --git a/src/eigenMapInvert.cpp b/src/eigenMapInvert.cpp
@@ -3,8 +3,10 @@
 // [[Rcpp::depends(RcppEigen)]]
 // [[Rcpp::export]]
 
-Eigen::MatrixXd eigenMapMatrixInvert(const Eigen::Map<Eigen::MatrixXd> A, int n_cores = 1){
-  Eigen::setNbThreads(n_cores);
+Eigen::MatrixXd eigenMapMatrixInvert(const Eigen::Map<Eigen::MatrixXd> A, int n_cores = 1) {
+  if (n_cores > 1) {
+    Eigen::setNbThreads(n_cores);
+  }
   Eigen::MatrixXd B = A.llt().solve(Eigen::MatrixXd::Identity(A.rows(), A.cols()));
   return B;
 }
diff --git a/src/eigenMapMatMult.cpp b/src/eigenMapMatMult.cpp
@@ -6,7 +6,9 @@
 Eigen::MatrixXd eigenMapMatMult(const Eigen::Map<Eigen::MatrixXd> A,
                                 const Eigen::Map<Eigen::MatrixXd> B,
                                 int n_cores = 1) {
-  Eigen::setNbThreads(n_cores);
+  if (n_cores > 1) {
+    Eigen::setNbThreads(n_cores);
+  }
   Eigen::MatrixXd C;
   C.noalias() = A * B;
   return C;

diff --git a/src/eigenMapPseudoInverse.cpp b/src/eigenMapPseudoInverse.cpp
@@ -3,10 +3,12 @@
 // [[Rcpp::depends(RcppEigen)]]
 // [[Rcpp::export]]
 
-Eigen::MatrixXd eigenMapPseudoInverse(const Eigen::Map<Eigen::MatrixXd> A, 
-                                      double tolerance = 1e-6, 
+Eigen::MatrixXd eigenMapPseudoInverse(const Eigen::Map<Eigen::MatrixXd> A,
+                                      double tolerance = 1e-6,
                                       int n_cores = 1) {
-  Eigen::setNbThreads(n_cores);
+  if (n_cores > 1) {
+    Eigen::setNbThreads(n_cores);
+  }
   Eigen::JacobiSVD<Eigen::MatrixXd> svd(A, Eigen::ComputeThinU | Eigen::ComputeThinV);
   Eigen::MatrixXd singularValues = svd.singularValues();
   Eigen::MatrixXd singularValuesInv(A.cols(), A.rows());