From 75c7d9305d0682471cdad5d65656ad77902789cc Mon Sep 17 00:00:00 2001 From: Jack Leary Date: Wed, 9 Oct 2024 21:29:06 -0400 Subject: [PATCH] minor speedups all around --- NAMESPACE | 2 ++ R/RcppExports.R | 4 ++-- R/marge2.R | 4 ++-- R/score_fun_gee.R | 18 +++++++++--------- R/score_fun_glm.R | 11 ++++++----- R/stat_out_score_gee_null.R | 25 +++++++++++++------------ R/stat_out_score_glm_null.R | 9 +++++---- src/Makevars | 3 +++ src/Makevars.win | 3 +++ src/eigenMapInvert.cpp | 2 +- src/eigenMapMatMult.cpp | 2 +- 11 files changed, 47 insertions(+), 36 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 3fb7de2..866bca0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -39,7 +39,9 @@ importFrom(MASS,theta.mm) importFrom(Matrix,Matrix) importFrom(Matrix,bdiag) importFrom(Matrix,colSums) +importFrom(Matrix,diag) importFrom(Matrix,rowMeans) +importFrom(Matrix,solve) importFrom(Matrix,t) importFrom(Rcpp,sourceCpp) importFrom(bigstatsr,as_FBM) diff --git a/R/RcppExports.R b/R/RcppExports.R index 2cdc192..55561f2 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -1,11 +1,11 @@ # Generated by using Rcpp::compileAttributes() -> do not edit by hand # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 -eigenMapMatrixInvert <- function(A, n_cores) { +eigenMapMatrixInvert <- function(A, n_cores = 1L) { .Call(`_scLANE_eigenMapMatrixInvert`, A, n_cores) } -eigenMapMatMult <- function(A, B, n_cores) { +eigenMapMatMult <- function(A, B, n_cores = 1L) { .Call(`_scLANE_eigenMapMatMult`, A, B, n_cores) } diff --git a/R/marge2.R b/R/marge2.R index 10848d6..0b5b36a 100644 --- a/R/marge2.R +++ b/R/marge2.R @@ -82,7 +82,7 @@ marge2 <- function(X_pred = NULL, q <- ncol(X_pred) # Number of predictor variables B <- as.matrix(rep(1, NN)) # Start with the intercept model. # estimate "known" theta for GEE models - if (is.gee) { + if (is.gee || !is.glmm) { theta_hat <- MASS::theta.mm(y = Y, mu = mean(Y), dfr = NN - 1) @@ -840,7 +840,7 @@ marge2 <- function(X_pred = NULL, data = model_df, method = "glm.fit2", link = log, - init.theta = 1, + init.theta = theta_hat, y = FALSE, model = FALSE) } diff --git a/R/score_fun_gee.R b/R/score_fun_gee.R index d26a773..a6e1712 100644 --- a/R/score_fun_gee.R +++ b/R/score_fun_gee.R @@ -5,6 +5,7 @@ #' @author David I. Warton #' @author Jack Leary #' @importFrom stats lm.fit +#' @importFrom Matrix diag t solve #' @importFrom MASS ginv #' @description Calculate the score statistic for a GEE model. #' @param Y The response variable. Defaults to NULL. @@ -51,22 +52,21 @@ score_fun_gee <- function(Y = NULL, B.est <- matrix(0, nrow = p, ncol = 1) Sigma22 <- matrix(0, nrow = p, ncol = p) J21 <- Sigma21 <- matrix(0, nrow = p, ncol = p1) - for (i in seq(N)) { k <- sum(n_vec[seq(i)]) VS.est_i <- VS.est_list[[i]] AWA.est_i <- AWA.est_list[[i]] J2_i <- J2_list[[i]] Sigma2_i <- Sigma2_list[[i]] - D.est_i <- eigenMapMatMult(A = diag((mu.est[(sum(n_vec1[seq(i)]) + 1):k]), nrow = n_vec[i], ncol = n_vec[i]), + D.est_i <- eigenMapMatMult(A = Matrix::diag((mu.est[(sum(n_vec1[seq(i)]) + 1):k]), nrow = n_vec[i], ncol = n_vec[i]), B = XA[(sum(n_vec1[seq(i)]) + 1):k, ], n_cores = 1) - D_est_i_transpose <- t(D.est_i) + D_est_i_transpose <- Matrix::t(D.est_i) J21 <- J21 + eigenMapMatMult(A = D_est_i_transpose, - B = t(J2_i), + B = Matrix::t(J2_i), n_cores = 1) Sigma21 <- Sigma21 + eigenMapMatMult(A = D_est_i_transpose, - B = t(Sigma2_i), + B = Matrix::t(Sigma2_i), n_cores = 1) B.est <- B.est + eigenMapMatMult(A = D_est_i_transpose, B = VS.est_i, @@ -82,12 +82,12 @@ score_fun_gee <- function(Y = NULL, B = J11.inv, n_cores = 1) temp_prod_1 <- eigenMapMatMult(A = temp_prod_1, - B = t(Sigma21), + B = Matrix::t(Sigma21), n_cores = 1) temp_prod_2 <- eigenMapMatMult(A = Sigma21, B = J11.inv, n_cores = 1) - J21_transpose <- t(J21) + J21_transpose <- Matrix::t(J21) temp_prod_2 <- eigenMapMatMult(A = temp_prod_2, B = J21_transpose, n_cores = 1) @@ -98,11 +98,11 @@ score_fun_gee <- function(Y = NULL, B = J21_transpose, n_cores = 1) Sigma <- Sigma22 - temp_prod_1 - temp_prod_2 + temp_prod_3 - Sigma_inv <- try({ eigenMapMatrixInvert(Sigma, n_cores = 1) }, silent = TRUE) + Sigma_inv <- try({ Matrix::solve(Sigma) }, silent = TRUE) if (inherits(Sigma_inv, "try-error")) { Sigma_inv <- MASS::ginv(Sigma) } - temp_prod <- eigenMapMatMult(A = t(B.est), + temp_prod <- eigenMapMatMult(A = Matrix::t(B.est), B = Sigma_inv, n_cores = 1) score <- eigenMapMatMult(A = temp_prod, diff --git a/R/score_fun_glm.R b/R/score_fun_glm.R index 26a5b39..6521b2b 100644 --- a/R/score_fun_glm.R +++ b/R/score_fun_glm.R @@ -5,6 +5,7 @@ #' @author David I. Warton #' @author Jack Leary #' @importFrom stats lm.fit +#' @importFrom Matrix t #' @importFrom MASS ginv #' @description Calculate the score statistic for a GLM model. #' @param Y The response variable. Defaults to NULL. @@ -40,20 +41,20 @@ score_fun_glm <- function(Y = NULL, B_list_i <- eigenMapMatMult(A = B1_list_i, B = XA, n_cores = 1) - D_list_i <- eigenMapMatMult(A = t(XA), + D_list_i <- eigenMapMatMult(A = Matrix::t(XA), B = (XA * c(mu.est^2 / V.est)), n_cores = 1) - temp_prod <- eigenMapMatMult(A = t(B_list_i), + temp_prod <- eigenMapMatMult(A = Matrix::t(B_list_i), B = A_list_i, n_cores = 1) temp_prod <- eigenMapMatMult(A = temp_prod, B = B_list_i, n_cores = 1) inv.XVX_22 <- D_list_i - temp_prod - B.est <- eigenMapMatMult(A = t(mu.est * VS.est_i), + B.est <- eigenMapMatMult(A = Matrix::t(mu.est * VS.est_i), B = XA, n_cores = 1) - XVX_22 <- try({ eigenMapMatrixInvert(inv.XVX_22, n_cores = 1) }, silent = TRUE) + XVX_22 <- try({ solve(inv.XVX_22) }, silent = TRUE) if (inherits(XVX_22, "try-error")) { XVX_22 <- MASS::ginv(inv.XVX_22) } @@ -61,7 +62,7 @@ score_fun_glm <- function(Y = NULL, B = XVX_22, n_cores = 1) score <- eigenMapMatMult(A = temp_prod, - B = t(B.est), + B = Matrix::t(B.est), n_cores = 1) } res <- list(score = score) diff --git a/R/stat_out_score_gee_null.R b/R/stat_out_score_gee_null.R index 574c747..084eb7a 100644 --- a/R/stat_out_score_gee_null.R +++ b/R/stat_out_score_gee_null.R @@ -7,6 +7,7 @@ #' @importFrom geeM geem #' @importFrom MASS negative.binomial ginv #' @importFrom stats fitted.values +#' @importFrom Matrix diag t solve #' @description A function that calculates parts of the score statistic for GEEs only (it is used for the full path for forward selection). #' @param Y The response variable Defaults to NULL. #' @param B_null The design matrix matrix under the null model Defaults to NULL. @@ -50,10 +51,10 @@ stat_out_score_gee_null <- function(Y = NULL, k <- sum(n_vec[seq(i)]) # set up working correlation matrix structure if (cor.structure == "independence") { - R_alpha <- diag(1, n_vec[i], n_vec[i]) + R_alpha <- Matrix::diag(1, n_vec[i], n_vec[i]) } else if (cor.structure == "exchangeable") { R_alpha <- matrix(alpha_est, nrow = n_vec[i], ncol = n_vec[i]) - R_alpha <- R_alpha + diag(c(1 - alpha_est), nrow = n_vec[i], ncol = n_vec[i]) + R_alpha <- R_alpha + Matrix::diag(c(1 - alpha_est), nrow = n_vec[i], ncol = n_vec[i]) } else if (cor.structure == "ar1") { R_alpha <- alpha_est^outer(seq(n_vec[i]), seq(n_vec[i]), \(x, y) abs(x - y)) } else { @@ -62,9 +63,9 @@ stat_out_score_gee_null <- function(Y = NULL, # compute iteration values for each statistic temp_seq_n <- (sum(n_vec1[seq(i)]) + 1):k mu_est_sub <- mu_est[temp_seq_n] - diag_sqrt_V_est <- diag(sqrt(V_est[temp_seq_n]), - nrow = n_vec[i], - ncol = n_vec[i]) + diag_sqrt_V_est <- Matrix::diag(sqrt(V_est[temp_seq_n]), + nrow = n_vec[i], + ncol = n_vec[i]) temp_prod <- eigenMapMatMult(A = diag_sqrt_V_est, B = R_alpha, n_cores = 1) @@ -72,9 +73,9 @@ stat_out_score_gee_null <- function(Y = NULL, B = diag_sqrt_V_est, n_cores = 1) V_est_i_inv <- eigenMapMatrixInvert(A = V_est_i, n_cores = 1) - S_est_i <- t(Y)[temp_seq_n] - mu_est_sub + S_est_i <- Matrix::t(Y)[temp_seq_n] - mu_est_sub temp_prod <- eigenMapMatMult(A = S_est_i, - B = t(S_est_i), + B = Matrix::t(S_est_i), n_cores = 1) temp_prod <- eigenMapMatMult(A = V_est_i_inv, B = temp_prod, @@ -82,12 +83,12 @@ stat_out_score_gee_null <- function(Y = NULL, AWA_est_i <- eigenMapMatMult(A = temp_prod, B = V_est_i_inv, n_cores = 1) - D_est_i <- eigenMapMatMult(A = diag(mu_est_sub, - nrow = n_vec[i], - ncol = n_vec[i]), + D_est_i <- eigenMapMatMult(A = Matrix::diag(mu_est_sub, + nrow = n_vec[i], + ncol = n_vec[i]), B = B_null[temp_seq_n, ], n_cores = 1) - D_est_i_transpose <- t(D_est_i) + D_est_i_transpose <- Matrix::t(D_est_i) temp_prod <- eigenMapMatMult(A = D_est_i_transpose, B = V_est_i_inv, n_cores = 1) @@ -116,7 +117,7 @@ stat_out_score_gee_null <- function(Y = NULL, if (nrow(J11) == 1 && ncol(J11) == 1) { J11_inv <- 1 / J11 } else { - J11_inv <- try({ eigenMapMatrixInvert(J11, n_cores = 1) }, silent = TRUE) + J11_inv <- try({ Matrix::solve(J11) }, silent = TRUE) if (inherits(J11_inv, "try-error")) { J11_inv <- MASS::ginv(J11) } diff --git a/R/stat_out_score_glm_null.R b/R/stat_out_score_glm_null.R index c3836f3..07abdf8 100644 --- a/R/stat_out_score_glm_null.R +++ b/R/stat_out_score_glm_null.R @@ -6,6 +6,7 @@ #' @author Jack Leary #' @importFrom gamlss gamlss #' @importFrom stats fitted.values +#' @importFrom Matrix diag t #' @importFrom MASS ginv #' @description A function that calculates parts of the score statistic for GLMs only (it is used for the full path for forward selection). #' @param Y : The response variable Defaults to NULL. @@ -28,8 +29,8 @@ stat_out_score_glm_null <- function(Y = NULL, B_null = NULL) { mu_est <- as.matrix(stats::fitted.values(ests)) V_est <- mu_est * (1 + mu_est * sigma_est) # Type I NB variance = mu (1 + mu * sigma); sigma = 1 / theta VS_est_list <- (Y - mu_est) / V_est - mu_V_diag <- diag(c(mu_est^2 / V_est)) - temp_prod <- eigenMapMatMult(A = t(B_null), + mu_V_diag <- Matrix::diag(c(mu_est^2 / V_est)) + temp_prod <- eigenMapMatMult(A = Matrix::t(B_null), B = mu_V_diag, n_cores = 1) A_list_inv <- eigenMapMatMult(A = temp_prod, @@ -38,12 +39,12 @@ stat_out_score_glm_null <- function(Y = NULL, B_null = NULL) { if (ncol(A_list_inv) == 1 && nrow(A_list_inv) == 1) { A_list <- 1 / A_list_inv } else { - A_list <- try({ eigenMapMatrixInvert(A_list_inv, n_cores = 1) }, silent = TRUE) + A_list <- try({ solve(A_list_inv) }, silent = TRUE) if (inherits(A_list, "try-error")) { A_list <- MASS::ginv(A_list_inv) } } - B1_list <- eigenMapMatMult(A = t(B_null), + B1_list <- eigenMapMatMult(A = Matrix::t(B_null), B = mu_V_diag, n_cores = 1) res <- list(VS.est_list = VS_est_list, diff --git a/src/Makevars b/src/Makevars index 3245729..937570c 100644 --- a/src/Makevars +++ b/src/Makevars @@ -5,3 +5,6 @@ ## With R 3.1.0 or later, you can uncomment the following line to tell R to ## enable compilation with C++11 (or even C++14) where available CXX_STD = CXX11 + +## enable CPU speedups +CXXFLAGS += -O3 -march=native diff --git a/src/Makevars.win b/src/Makevars.win index 3245729..937570c 100644 --- a/src/Makevars.win +++ b/src/Makevars.win @@ -5,3 +5,6 @@ ## With R 3.1.0 or later, you can uncomment the following line to tell R to ## enable compilation with C++11 (or even C++14) where available CXX_STD = CXX11 + +## enable CPU speedups +CXXFLAGS += -O3 -march=native diff --git a/src/eigenMapInvert.cpp b/src/eigenMapInvert.cpp index 901ab05..e09b374 100644 --- a/src/eigenMapInvert.cpp +++ b/src/eigenMapInvert.cpp @@ -3,7 +3,7 @@ // [[Rcpp::depends(RcppEigen)]] // [[Rcpp::export]] -SEXP eigenMapMatrixInvert(const Eigen::Map A, int n_cores){ +SEXP eigenMapMatrixInvert(const Eigen::Map A, int n_cores = 1){ Eigen::setNbThreads(n_cores); Eigen::MatrixXd B = A.llt().solve(Eigen::MatrixXd::Identity(A.rows(), A.cols())); return Rcpp::wrap(B); diff --git a/src/eigenMapMatMult.cpp b/src/eigenMapMatMult.cpp index 2a8f3e6..7a5c368 100644 --- a/src/eigenMapMatMult.cpp +++ b/src/eigenMapMatMult.cpp @@ -5,7 +5,7 @@ SEXP eigenMapMatMult(const Eigen::Map A, const Eigen::Map B, - int n_cores){ + int n_cores = 1) { Eigen::setNbThreads(n_cores); Eigen::MatrixXd C; C.noalias() = A * B;