From b3958d62265a98f8f60017dbab5ff14a99d7bac0 Mon Sep 17 00:00:00 2001 From: Jack Leary Date: Tue, 22 Oct 2024 13:36:27 -0400 Subject: [PATCH 1/4] cleaned up several of the scoring fns and improved c++ fns --- NAMESPACE | 3 -- R/marge2.R | 9 ++--- R/score_fun_gee.R | 60 +++++++++---------------------- R/score_fun_glm.R | 32 +++++------------ R/stat_out_score_gee_null.R | 66 ++++++++++------------------------- R/stat_out_score_glm_null.R | 16 +++------ man/marge2.Rd | 2 +- src/eigenMapInvert.cpp | 6 ++-- src/eigenMapMatMult.cpp | 4 ++- src/eigenMapPseudoInverse.cpp | 8 +++-- 10 files changed, 64 insertions(+), 142 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 915f26b..91f24bd 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -55,19 +55,16 @@ importFrom(dplyr,contains) importFrom(dplyr,desc) importFrom(dplyr,distinct) importFrom(dplyr,filter) -importFrom(dplyr,group_by) importFrom(dplyr,if_else) importFrom(dplyr,inner_join) importFrom(dplyr,lag) importFrom(dplyr,lead) importFrom(dplyr,mutate) -importFrom(dplyr,ntile) importFrom(dplyr,pull) importFrom(dplyr,relocate) importFrom(dplyr,rename) importFrom(dplyr,row_number) importFrom(dplyr,rowwise) -importFrom(dplyr,sample_frac) importFrom(dplyr,select) importFrom(dplyr,slice_head) importFrom(dplyr,summarise) diff --git a/R/marge2.R b/R/marge2.R index 4e24a08..9cd95a9 100644 --- a/R/marge2.R +++ b/R/marge2.R @@ -8,7 +8,7 @@ #' @description MARS fitting function for negative binomial generalized linear models (GLMs) & generalized estimating equations (GEEs). #' @import glm2 #' @import magrittr -#' @importFrom dplyr mutate ntile group_by sample_frac +#' @importFrom dplyr mutate #' @importFrom geeM geem #' @importFrom MASS glm.nb negative.binomial theta.mm #' @importFrom utils tail @@ -24,7 +24,7 @@ #' @param cor.structure If \code{is.gee = TRUE}, a string specifying the desired correlation structure for the NB GEE. Defaults to "ar1". #' @param sandwich.var (Optional) Should the sandwich variance estimator be used instead of the model-based estimator? Default to FALSE. #' @param approx.knot (Optional) Should the set of candidate knots be subsampled in order to speed up computation? This has little effect on the final fit, but can improve computation time somewhat. Defaults to TRUE. -#' @param n.knot.max (Optional) The maximum number of candidate knots to consider. Uses random sampling (don't worry, a random seed is set internally) to select this number of unique values from the reduced set of all candidate knots. Defaults to 50. +#' @param n.knot.max (Optional) The maximum number of candidate knots to consider. Uses uniform sampling to select this number of unique values from the reduced set of all candidate knots. Defaults to 50. #' @param glm.backend (Optional) Character specifying which GLM-fitting backend should be used. Must be one of "MASS" or "speedglm". Defaults to "MASS". #' @param tols_score (Optional) The set tolerance for monitoring the convergence for the difference in score statistics between the parent and candidate model (this is the lack-of-fit criterion used for MARGE). Defaults to 0.00001. #' @param minspan (Optional) A set minimum span value. Defaults to NULL. @@ -180,9 +180,7 @@ marge2 <- function(X_pred = NULL, score_knot_both_add_mat <- NULL score_knot_one_int_mat <- NULL score_knot_one_add_mat <- NULL - int.count1 <- 0 - in.set <- ifelse(ncol(B) > 1, sum(!var_name_vec %in% var_name), 0) for (t in seq_along(X_red)) { @@ -825,8 +823,7 @@ marge2 <- function(X_pred = NULL, } if (!is.glmm) { if (!is.null(Y.offset)) { - model_df <- dplyr::mutate(model_df, - cell_offset = Y.offset) + model_df <- dplyr::mutate(model_df, ßcell_offset = Y.offset) model_formula <- paste0(model_formula, " + ", "offset(log(1 / cell_offset))") # this is correct i promise } model_formula <- stats::as.formula(model_formula) diff --git a/R/score_fun_gee.R b/R/score_fun_gee.R index adde7df..cbef03a 100644 --- a/R/score_fun_gee.R +++ b/R/score_fun_gee.R @@ -57,56 +57,28 @@ score_fun_gee <- function(Y = NULL, AWA.est_i <- AWA.est_list[[i]] J2_i <- J2_list[[i]] Sigma2_i <- Sigma2_list[[i]] - D.est_i <- eigenMapMatMult(A = diag(mu.est[(sum(n_vec1[seq(i)]) + 1):k], nrow = n_vec[i], ncol = n_vec[i]), - B = XA[(sum(n_vec1[seq(i)]) + 1):k, ], - n_cores = 1) + D.est_i <- eigenMapMatMult(A = diag(mu.est[(sum(n_vec1[seq(i)]) + 1):k], nrow = n_vec[i], ncol = n_vec[i]), B = XA[(sum(n_vec1[seq(i)]) + 1):k, ]) D_est_i_transpose <- t(D.est_i) - J21 <- J21 + eigenMapMatMult(A = D_est_i_transpose, - B = t(J2_i), - n_cores = 1) - Sigma21 <- Sigma21 + eigenMapMatMult(A = D_est_i_transpose, - B = t(Sigma2_i), - n_cores = 1) - B.est <- B.est + eigenMapMatMult(A = D_est_i_transpose, - B = VS.est_i, - n_cores = 1) - temp_prod <- eigenMapMatMult(A = D_est_i_transpose, - B = AWA.est_i, - n_cores = 1) - Sigma22 <- Sigma22 + eigenMapMatMult(A = temp_prod, - B = D.est_i, - n_cores = 1) + J21 <- J21 + eigenMapMatMult(A = D_est_i_transpose, B = t(J2_i)) + Sigma21 <- Sigma21 + eigenMapMatMult(A = D_est_i_transpose, B = t(Sigma2_i)) + B.est <- B.est + eigenMapMatMult(A = D_est_i_transpose, B = VS.est_i) + temp_prod <- eigenMapMatMult(A = D_est_i_transpose, B = AWA.est_i) + Sigma22 <- Sigma22 + eigenMapMatMult(A = temp_prod, B = D.est_i) } - temp_prod_1 <- eigenMapMatMult(A = J21, - B = J11.inv, - n_cores = 1) - temp_prod_1 <- eigenMapMatMult(A = temp_prod_1, - B = t(Sigma21), - n_cores = 1) - temp_prod_2 <- eigenMapMatMult(A = Sigma21, - B = J11.inv, - n_cores = 1) + temp_prod_1 <- eigenMapMatMult(A = J21, B = J11.inv) + temp_prod_1 <- eigenMapMatMult(A = temp_prod_1, B = t(Sigma21)) + temp_prod_2 <- eigenMapMatMult(A = Sigma21, B = J11.inv) J21_transpose <- t(J21) - temp_prod_2 <- eigenMapMatMult(A = temp_prod_2, - B = J21_transpose, - n_cores = 1) - temp_prod_3 <- eigenMapMatMult(A = J21, - B = JSigma11, - n_cores = 1) - temp_prod_3 <- eigenMapMatMult(A = temp_prod_3, - B = J21_transpose, - n_cores = 1) + temp_prod_2 <- eigenMapMatMult(A = temp_prod_2, B = J21_transpose) + temp_prod_3 <- eigenMapMatMult(A = J21, B = JSigma11) + temp_prod_3 <- eigenMapMatMult(A = temp_prod_3, B = J21_transpose) Sigma <- Sigma22 - temp_prod_1 - temp_prod_2 + temp_prod_3 - Sigma_inv <- try({ eigenMapMatrixInvert(Sigma, n_cores = 1L) }, silent = TRUE) + Sigma_inv <- try({ eigenMapMatrixInvert(Sigma) }, silent = TRUE) if (inherits(Sigma_inv, "try-error")) { - Sigma_inv <- eigenMapPseudoInverse(Sigma, n_cores = 1L) + Sigma_inv <- eigenMapPseudoInverse(Sigma) } - temp_prod <- eigenMapMatMult(A = t(B.est), - B = Sigma_inv, - n_cores = 1) - score <- eigenMapMatMult(A = temp_prod, - B = B.est, - n_cores = 1) + temp_prod <- eigenMapMatMult(A = t(B.est), B = Sigma_inv) + score <- eigenMapMatMult(A = temp_prod, B = B.est) } res <- list(score = score) return(res) diff --git a/R/score_fun_glm.R b/R/score_fun_glm.R index 36b6d1f..8aa00f9 100644 --- a/R/score_fun_glm.R +++ b/R/score_fun_glm.R @@ -36,32 +36,18 @@ score_fun_glm <- function(Y = NULL, VS.est_i <- unlist(VS.est_list) A_list_i <- unlist(A_list) B1_list_i <- unlist(B1_list) - B_list_i <- eigenMapMatMult(A = B1_list_i, - B = XA, - n_cores = 1) - D_list_i <- eigenMapMatMult(A = t(XA), - B = (XA * c(mu.est^2 / V.est)), - n_cores = 1) - temp_prod <- eigenMapMatMult(A = t(B_list_i), - B = A_list_i, - n_cores = 1) - temp_prod <- eigenMapMatMult(A = temp_prod, - B = B_list_i, - n_cores = 1) + B_list_i <- eigenMapMatMult(A = B1_list_i, B = XA) + D_list_i <- eigenMapMatMult(A = t(XA), B = (XA * c(mu.est^2 / V.est))) + temp_prod <- eigenMapMatMult(A = t(B_list_i), B = A_list_i) + temp_prod <- eigenMapMatMult(A = temp_prod, B = B_list_i) inv.XVX_22 <- D_list_i - temp_prod - B.est <- eigenMapMatMult(A = t(mu.est * VS.est_i), - B = XA, - n_cores = 1) - XVX_22 <- try({ eigenMapMatrixInvert(inv.XVX_22, n_cores = 1L) }, silent = TRUE) + B.est <- eigenMapMatMult(A = t(mu.est * VS.est_i), B = XA) + XVX_22 <- try({ eigenMapMatrixInvert(inv.XVX_22) }, silent = TRUE) if (inherits(XVX_22, "try-error")) { - XVX_22 <- eigenMapPseudoInverse(inv.XVX_22, n_cores = 1L) + XVX_22 <- eigenMapPseudoInverse(inv.XVX_22) } - temp_prod <- eigenMapMatMult(A = B.est, - B = XVX_22, - n_cores = 1) - score <- eigenMapMatMult(A = temp_prod, - B = t(B.est), - n_cores = 1) + temp_prod <- eigenMapMatMult(A = B.est, B = XVX_22) + score <- eigenMapMatMult(A = temp_prod, B = t(B.est)) } res <- list(score = score) return(res) diff --git a/R/stat_out_score_gee_null.R b/R/stat_out_score_gee_null.R index 4fea157..667bb20 100644 --- a/R/stat_out_score_gee_null.R +++ b/R/stat_out_score_gee_null.R @@ -66,52 +66,26 @@ stat_out_score_gee_null <- function(Y = NULL, diag_sqrt_V_est <- diag(sqrt(V_est[temp_seq_n]), nrow = n_vec[i], ncol = n_vec[i]) - temp_prod <- eigenMapMatMult(A = diag_sqrt_V_est, - B = R_alpha, - n_cores = 1) - V_est_i <- eigenMapMatMult(A = temp_prod, - B = diag_sqrt_V_est, - n_cores = 1) - V_est_i_inv <- try({ eigenMapMatrixInvert(V_est_i, n_cores = 1L) }, silent = TRUE) + temp_prod <- eigenMapMatMult(A = diag_sqrt_V_est, B = R_alpha) + V_est_i <- eigenMapMatMult(A = temp_prod, B = diag_sqrt_V_est) + V_est_i_inv <- try({ eigenMapMatrixInvert(V_est_i) }, silent = TRUE) if (inherits(V_est_i_inv, "try-error")) { - V_est_i_inv <- eigenMapPseudoInverse(V_est_i, n_cores = 1L) + V_est_i_inv <- eigenMapPseudoInverse(V_est_i) } S_est_i <- t(Y)[temp_seq_n] - mu_est_sub - temp_prod <- eigenMapMatMult(A = S_est_i, - B = t(S_est_i), - n_cores = 1) - temp_prod <- eigenMapMatMult(A = V_est_i_inv, - B = temp_prod, - n_cores = 1) - AWA_est_i <- eigenMapMatMult(A = temp_prod, - B = V_est_i_inv, - n_cores = 1) - D_est_i <- eigenMapMatMult(A = diag(mu_est_sub, - nrow = n_vec[i], - ncol = n_vec[i]), - B = B_null[temp_seq_n, ], - n_cores = 1) + temp_prod <- eigenMapMatMult(A = S_est_i, B = t(S_est_i)) + temp_prod <- eigenMapMatMult(A = V_est_i_inv, B = temp_prod) + AWA_est_i <- eigenMapMatMult(A = temp_prod, B = V_est_i_inv) + D_est_i <- eigenMapMatMult(A = diag(mu_est_sub, nrow = n_vec[i], ncol = n_vec[i]), B = B_null[temp_seq_n, ]) D_est_i_transpose <- t(D_est_i) - temp_prod <- eigenMapMatMult(A = D_est_i_transpose, - B = V_est_i_inv, - n_cores = 1) - J1_i <- eigenMapMatMult(A = temp_prod, - B = D_est_i, - n_cores = 1) + temp_prod <- eigenMapMatMult(A = D_est_i_transpose, B = V_est_i_inv) + J1_i <- eigenMapMatMult(A = temp_prod, B = D_est_i) J11 <- J11 + J1_i - J2_i <- eigenMapMatMult(A = D_est_i_transpose, - B = V_est_i_inv, - n_cores = 1) - Sigma2_i <- eigenMapMatMult(A = D_est_i_transpose, - B = AWA_est_i, - n_cores = 1) - Sigma1_i <- eigenMapMatMult(A = Sigma2_i, - B = D_est_i, - n_cores = 1) + J2_i <- eigenMapMatMult(A = D_est_i_transpose, B = V_est_i_inv) + Sigma2_i <- eigenMapMatMult(A = D_est_i_transpose, B = AWA_est_i) + Sigma1_i <- eigenMapMatMult(A = Sigma2_i, B = D_est_i) Sigma11 <- Sigma11 + Sigma1_i - V_est_list_elem_2 <- eigenMapMatMult(A = V_est_i_inv, - B = S_est_i, - n_cores = 1) + V_est_list_elem_2 <- eigenMapMatMult(A = V_est_i_inv, B = S_est_i) VS_est_list[[i]] <- V_est_list_elem_2 AWA_est_list[[i]] <- AWA_est_i J2_list[[i]] <- J2_i @@ -120,17 +94,13 @@ stat_out_score_gee_null <- function(Y = NULL, if (nrow(J11) == 1 && ncol(J11) == 1) { J11_inv <- 1 / J11 } else { - J11_inv <- try({ eigenMapMatrixInvert(J11, n_cores = 1L) }, silent = TRUE) + J11_inv <- try({ eigenMapMatrixInvert(J11) }, silent = TRUE) if (inherits(J11_inv, "try-error")) { - J11_inv <- eigenMapPseudoInverse(J11, n_cores = 1L) + J11_inv <- eigenMapPseudoInverse(J11) } } - temp_prod <- eigenMapMatMult(A = J11_inv, - B = Sigma11, - n_cores = 1) - JSigma11 <- eigenMapMatMult(A = temp_prod, - B = J11_inv, - n_cores = 1) + temp_prod <- eigenMapMatMult(A = J11_inv, B = Sigma11) + JSigma11 <- eigenMapMatMult(A = temp_prod, B = J11_inv) res <- list(VS.est_list = VS_est_list, AWA.est_list = AWA_est_list, J2_list = J2_list, diff --git a/R/stat_out_score_glm_null.R b/R/stat_out_score_glm_null.R index 0d30dc5..4f017f5 100644 --- a/R/stat_out_score_glm_null.R +++ b/R/stat_out_score_glm_null.R @@ -28,23 +28,17 @@ stat_out_score_glm_null <- function(Y = NULL, B_null = NULL) { V_est <- mu_est * (1 + mu_est * sigma_est) # Type I NB variance = mu (1 + mu * sigma); sigma = 1 / theta VS_est_list <- (Y - mu_est) / V_est mu_V_diag <- diag(c(mu_est^2 / V_est)) - temp_prod <- eigenMapMatMult(A = t(B_null), - B = mu_V_diag, - n_cores = 1) - A_list_inv <- eigenMapMatMult(A = temp_prod, - B = B_null, - n_cores = 1) + temp_prod <- eigenMapMatMult(A = t(B_null), B = mu_V_diag) + A_list_inv <- eigenMapMatMult(A = temp_prod, B = B_null) if (ncol(A_list_inv) == 1 && nrow(A_list_inv) == 1) { A_list <- 1 / A_list_inv } else { - A_list <- try({ eigenMapMatrixInvert(A_list_inv, n_cores = 1L) }, silent = TRUE) + A_list <- try({ eigenMapMatrixInvert(A_list_inv) }, silent = TRUE) if (inherits(A_list, "try-error")) { - A_list <- eigenMapPseudoInverse(A_list_inv, n_cores = 1L) + A_list <- eigenMapPseudoInverse(A_list_inv) } } - B1_list <- eigenMapMatMult(A = t(B_null), - B = mu_V_diag, - n_cores = 1) + B1_list <- eigenMapMatMult(A = t(B_null), B = mu_V_diag) res <- list(VS.est_list = VS_est_list, A_list = A_list, B1_list = B1_list, diff --git a/man/marge2.Rd b/man/marge2.Rd index 962edc1..847124a 100644 --- a/man/marge2.Rd +++ b/man/marge2.Rd @@ -45,7 +45,7 @@ marge2( \item{approx.knot}{(Optional) Should the set of candidate knots be subsampled in order to speed up computation? This has little effect on the final fit, but can improve computation time somewhat. Defaults to TRUE.} -\item{n.knot.max}{(Optional) The maximum number of candidate knots to consider. Uses random sampling (don't worry, a random seed is set internally) to select this number of unique values from the reduced set of all candidate knots. Defaults to 50.} +\item{n.knot.max}{(Optional) The maximum number of candidate knots to consider. Uses uniform sampling to select this number of unique values from the reduced set of all candidate knots. Defaults to 50.} \item{glm.backend}{(Optional) Character specifying which GLM-fitting backend should be used. Must be one of "MASS" or "speedglm". Defaults to "MASS".} diff --git a/src/eigenMapInvert.cpp b/src/eigenMapInvert.cpp index 6eef2f4..a8d45e3 100644 --- a/src/eigenMapInvert.cpp +++ b/src/eigenMapInvert.cpp @@ -3,8 +3,10 @@ // [[Rcpp::depends(RcppEigen)]] // [[Rcpp::export]] -Eigen::MatrixXd eigenMapMatrixInvert(const Eigen::Map A, int n_cores = 1){ - Eigen::setNbThreads(n_cores); +Eigen::MatrixXd eigenMapMatrixInvert(const Eigen::Map A, int n_cores = 1) { + if (n_cores > 1) { + Eigen::setNbThreads(n_cores); + } Eigen::MatrixXd B = A.llt().solve(Eigen::MatrixXd::Identity(A.rows(), A.cols())); return B; } diff --git a/src/eigenMapMatMult.cpp b/src/eigenMapMatMult.cpp index cf697ef..b760b57 100644 --- a/src/eigenMapMatMult.cpp +++ b/src/eigenMapMatMult.cpp @@ -6,7 +6,9 @@ Eigen::MatrixXd eigenMapMatMult(const Eigen::Map A, const Eigen::Map B, int n_cores = 1) { - Eigen::setNbThreads(n_cores); + if (n_cores > 1) { + Eigen::setNbThreads(n_cores); + } Eigen::MatrixXd C; C.noalias() = A * B; return C; diff --git a/src/eigenMapPseudoInverse.cpp b/src/eigenMapPseudoInverse.cpp index 88758ef..c0f369b 100644 --- a/src/eigenMapPseudoInverse.cpp +++ b/src/eigenMapPseudoInverse.cpp @@ -3,10 +3,12 @@ // [[Rcpp::depends(RcppEigen)]] // [[Rcpp::export]] -Eigen::MatrixXd eigenMapPseudoInverse(const Eigen::Map A, - double tolerance = 1e-6, +Eigen::MatrixXd eigenMapPseudoInverse(const Eigen::Map A, + double tolerance = 1e-6, int n_cores = 1) { - Eigen::setNbThreads(n_cores); + if (n_cores > 1) { + Eigen::setNbThreads(n_cores); + } Eigen::JacobiSVD svd(A, Eigen::ComputeThinU | Eigen::ComputeThinV); Eigen::MatrixXd singularValues = svd.singularValues(); Eigen::MatrixXd singularValuesInv(A.cols(), A.rows()); From 5631ca4212bb4e94fbf1552cf8ce3b0df7a31810 Mon Sep 17 00:00:00 2001 From: Jack Leary Date: Mon, 21 Oct 2024 13:25:17 -0400 Subject: [PATCH 2/4] minor doc changes --- NEWS.md | 1 + R/fitGLMM.R | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 6ea9d0d..fadef48 100644 --- a/NEWS.md +++ b/NEWS.md @@ -7,6 +7,7 @@ + Minor bug fixes. + More improved matrix operations using `RcppEigen`. + Improved support for `cell_data_set` objects from `monocle3`. ++ Sped up GLMM mode. # Changes in v0.8.4 diff --git a/R/fitGLMM.R b/R/fitGLMM.R index 405327f..690e484 100644 --- a/R/fitGLMM.R +++ b/R/fitGLMM.R @@ -2,7 +2,7 @@ #' #' @name fitGLMM #' @author Jack R. Leary -#' @description Fits a negative binomial generalized linear mixed model using truncated power basis function splines as input. The basis matrix can be created adaptively using subject-specific estimation of optimal knots using \code{\link{marge2}}, or basis functions can be evenly space across quantiles. The resulting model can output subject-specific and population-level fitted values. +#' @description Fits a negative-binomial generalized linear mixed model using truncated power basis function splines as input. The basis matrix can be created adaptively using subject-specific estimation of optimal knots using \code{\link{marge2}}, or basis functions can be evenly spaced across quantiles. The resulting model can output subject-specific and population-level fitted values. #' @importFrom purrr map map_dbl map_dfr pmap_dfc reduce #' @importFrom dplyr mutate select if_else #' @importFrom mpath glmregNB From 40c2adeade364baf00bfcd356ca0ad8ff98b1ec7 Mon Sep 17 00:00:00 2001 From: Jack Leary Date: Tue, 22 Oct 2024 15:32:38 -0400 Subject: [PATCH 3/4] very minor docs update --- man/fitGLMM.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/fitGLMM.Rd b/man/fitGLMM.Rd index aa9f510..da98953 100644 --- a/man/fitGLMM.Rd +++ b/man/fitGLMM.Rd @@ -45,7 +45,7 @@ fitGLMM( An object of class \code{marge} containing the fitted model & other optional quantities of interest (basis function matrix, GCV, etc.). } \description{ -Fits a negative binomial generalized linear mixed model using truncated power basis function splines as input. The basis matrix can be created adaptively using subject-specific estimation of optimal knots using \code{\link{marge2}}, or basis functions can be evenly space across quantiles. The resulting model can output subject-specific and population-level fitted values. +Fits a negative-binomial generalized linear mixed model using truncated power basis function splines as input. The basis matrix can be created adaptively using subject-specific estimation of optimal knots using \code{\link{marge2}}, or basis functions can be evenly spaced across quantiles. The resulting model can output subject-specific and population-level fitted values. } \examples{ data(sim_counts) From 621866e7526abf6d1099fec26210e70619d4def8 Mon Sep 17 00:00:00 2001 From: Jack Leary Date: Wed, 23 Oct 2024 14:33:02 -0400 Subject: [PATCH 4/4] fixed bug in marge2() related to duplicate basis fns -- closes #260 --- R/marge2.R | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/R/marge2.R b/R/marge2.R index 9cd95a9..f2ff978 100644 --- a/R/marge2.R +++ b/R/marge2.R @@ -809,6 +809,12 @@ marge2 <- function(X_pred = NULL, # Some final model output, WIC, GCV etc. B_final <- as.matrix(B[, colnames(B) %in% cnames_2[[which.min(WIC_vec_2)]]]) model_df <- as.data.frame(B_final) + # remove duplicate hinge functions if present + if (length(unique(colnames(model_df))) != ncol(model_df)) { + unique_cols <- which(!duplicated(as.list(model_df))) + B_final <- B_final[, unique_cols] + model_df <- model_df[, unique_cols] + } if (ncol(model_df) == 1) { colnames(model_df) <- c("Intercept") model_formula <- "Y ~ -1 + Intercept" @@ -823,7 +829,7 @@ marge2 <- function(X_pred = NULL, } if (!is.glmm) { if (!is.null(Y.offset)) { - model_df <- dplyr::mutate(model_df, ßcell_offset = Y.offset) + model_df <- dplyr::mutate(model_df, cell_offset = Y.offset) model_formula <- paste0(model_formula, " + ", "offset(log(1 / cell_offset))") # this is correct i promise } model_formula <- stats::as.formula(model_formula)