From 7236efc74befd0622c5cb3985a236aaea49d868b Mon Sep 17 00:00:00 2001 From: fmorgante Date: Wed, 1 Mar 2023 16:07:36 -0500 Subject: [PATCH 001/103] Copy files for rss version --- R/bayes_reg_mv_ss.R | 282 +++++++++++++++++++ R/elbo_rss.R | 32 +++ R/mr_mash_rss.R | 598 ++++++++++++++++++++++++++++++++++++++++ R/mr_mash_rss_updates.R | 255 +++++++++++++++++ 4 files changed, 1167 insertions(+) create mode 100644 R/bayes_reg_mv_ss.R create mode 100644 R/elbo_rss.R create mode 100644 R/mr_mash_rss.R create mode 100644 R/mr_mash_rss_updates.R diff --git a/R/bayes_reg_mv_ss.R b/R/bayes_reg_mv_ss.R new file mode 100644 index 0000000..3f512b3 --- /dev/null +++ b/R/bayes_reg_mv_ss.R @@ -0,0 +1,282 @@ +# Bayesian multivariate regression with Normal prior +# +# The outputs are: b, the least-squares estimate of the regression +# coefficients; S, the covariance of b; mu1, the posterior mean of the +# regression coefficients; S1, the posterior covariance of the +# regression coefficients; logbf, the log-Bayes factor. +bayes_mvr_ridge <- function (x, Y, V, S0) { + + # Compute the least-squares estimate and its covariance. + b <- drop(x %*% Y)/sum(x^2) + S <- V/sum(x^2) + + # Compute the log-Bayes factor. + # logbf <- mvtnorm::dmvnorm(x=b, sigma=(S+S0), log=T) - mvtnorm::dmvnorm(x=b, sigma=S, log=T) ##Slow + # logbf <- (log(prod(abs(Re(diag(qr(S)$qr))))) + + # - log(prod(abs(Re(diag(qr(S0+S)$qr))))) + # + dot(b,solve(S,b)) - dot(b,solve(S0 + S,b)))/2 ##Not as fast as with determinant() but more stable + logbf <- (as.numeric(determinant(S)$modulus) + + - as.numeric(determinant(S0 + S)$modulus) + + dot(b,solve(S,b)) - dot(b,solve(S0 + S,b)))/2 + + # Compute the posterior mean and covariance assuming a multivariate + # normal prior with zero mean and covariance S0. + # r <- ncol(Y) + # I <- diag(r) + # S1 <- solve(solve(S0) + solve(S)) + # S1 <- S0%*%solve(S+S0)%*%S + #Avoid inverting matrices + SplusS0_chol <- chol(S+S0) + S1 <- S0%*%backsolve(SplusS0_chol, forwardsolve(t(SplusS0_chol), S)) + # mu1 <- solve(S %*% solve(S0) + I,b) + # mu1 <- drop(S1%*%solve(S)%*%b) + #Avoid inverting matrices + S_chol <- chol(S) + mu1 <- drop(S1%*%backsolve(S_chol, forwardsolve(t(S_chol), b))) + + # Return the least-squares estimate and covariance (b, S), the posterior mean and + # covariance (mu1, S1), and the log-Bayes factor (logbf) + return(list(b = b,S = S,mu1 = mu1,S1 = S1,logbf = logbf)) +} + + +# Bayesian multivariate regression with Normal prior with standardized X +# +# The outputs are: mu1, the posterior mean of the +# regression coefficients; logbf, the log-Bayes factor. +bayes_mvr_ridge_standardized_X <- function (b, S0, S, S1, SplusS0_chol, S_chol) { + + # Compute the log-Bayes factor. + logbf <- (chol2ldet(S_chol) - chol2ldet(SplusS0_chol) + + dot(b,backsolve(S_chol, forwardsolve(t(S_chol), b))) - + dot(b,backsolve(SplusS0_chol, forwardsolve(t(SplusS0_chol), b))))/2 + + # Compute the posterior mean assuming a multivariate + # normal prior with zero mean and covariance S0. + mu1 <- drop(S1%*%backsolve(S_chol, forwardsolve(t(S_chol), b))) + + # Return the posterior mean + # (mu1), and the log-Bayes factor (logbf) + return(list(mu1 = mu1, logbf = logbf)) +} + + +# Bayesian multivariate regression with Normal prior with transformed +# xtilde = x%*%solve(chol(V)) [this calculation is not needed but useful +# to understand the derivation] that allows to precompute some +# quantities +# +# The outputs are: mu1, the posterior mean of the +# regression coefficients; S1, the posterior covariance of the +# regression coefficients; logbf, the log-Bayes factor. +bayes_mvr_ridge_centered_X <- function (V, b, S, S0, xtx, Vinv, V_chol, S_chol, d, QtimesV_chol) { + + # Compute the log-Bayes factor. + SplusS0_chol <- chol(S+S0) + logbf <- (chol2ldet(S_chol) - chol2ldet(SplusS0_chol) + + dot(b,backsolve(S_chol, forwardsolve(t(S_chol), b))) - + dot(b,backsolve(SplusS0_chol, forwardsolve(t(SplusS0_chol), b))))/2 + #logbf <- dmvnorm(b, rep(0, times=length(b)), (S+S0)) - dmvnorm(b, rep(0, times=length(b)), S) + + # Compute the posterior mean assuming a multivariate + # normal prior with zero mean and covariance S0. + dx <- d/(1 + xtx*d) + A <- sqrt(dx)*QtimesV_chol + S1 <- crossprod(A) + mu1 <- drop(crossprod(A, (A %*% (Vinv %*% (xtx*b))))) + # Return the posterior mean and covariance + # (mu1, S1), and the log-Bayes factor (logbf) + return(list(mu1 = mu1, S1=S1, logbf=logbf)) +} + + +# Bayesian multivariate regression with mixture-of-normals prior +# (mixture weights w0 and covariance matrices S0) +# +# The outputs are: the log-Bayes factor (logbf), the posterior assignment probabilities +# (w1), the posterior mean of the coefficients given that all the +# coefficients are not nonzero (mu1), and the posterior covariance of +# the coefficients given that all the coefficients are not zero (S1). +bayes_mvr_mix <- function (x, Y, V, w0, S0, eps) { + + # Get the number of variables (n) and the number of mixture + # components (k). + r <- ncol(Y) + K <- length(S0) + + # Compute the Bayes factors and posterior statistics separately for + # each mixture component. + # out <- vector("list",K) + # for (k in 1:K){ + # out[[k]] <- bayes_mvr_ridge(x,Y,V,S0[[k]]) + # } + bayes_mvr_ridge_lapply <- function(i){ + bayes_mvr_ridge(x, Y, V, S0[[i]]) + } + out <- lapply(1:K, bayes_mvr_ridge_lapply) + + # Compute the posterior assignment probabilities for the latent + # indicator variable. + logbf <- sapply(out,function (x) x$logbf) + w1 <- softmax(logbf + log(w0 + eps)) + + # Compute the posterior mean (mu1_mix) and covariance (S1_mix) of the + # regression coefficients. + A <- matrix(0,r,r) + mu1_mix <- rep(0,r) + for (k in 1:K) { + wk <- w1[k] + muk <- out[[k]]$mu1 + Sk <- out[[k]]$S1 + mu1_mix <- mu1_mix + wk*muk + A <- A + wk*(Sk + tcrossprod(muk)) + } + S1_mix <- A - tcrossprod(mu1_mix) + ##The following code does not work in the univariate case + # muk <- t(sapply(out, function (x) x$mu1)) + # Sk <- lapply(out, function (x) x$S1) + # mu1_mix <- colSums(muk*w1) + # S1_mix <- Reduce("+", lapply(1:K, function(i){w1[i]*(Sk[[i]] + tcrossprod(muk[i, ]))})) - tcrossprod(mu1_mix) + + # Compute the log-Bayes factor for the mixture as a linear combination of the + # individual BFs foreach mixture component. + u <- max(logbf) + logbf_mix <- u + log(sum(w0 * exp(logbf - u))) + + # Return the log-Bayes factor for the mixture (logbf), the posterior assignment probabilities (w1), the + # posterior mean of the coefficients (mu1), and the posterior + # covariance of the coefficients (S1). + return(list(logbf = logbf_mix,w1 = w1,mu1 = mu1_mix,S1 = S1_mix)) +} + + +# Bayesian multivariate regression with mixture-of-normals prior +# (mixture weights w0 and covariance matrices S0) with standardized X. +# +# The outputs are: the log-Bayes factor (logbf), the posterior assignment probabilities +# (w1), the posterior mean of the coefficients given that all the +# coefficients are not nonzero (mu1), and the posterior covariance of +# the coefficients given that all the coefficients are not zero (S1). +bayes_mvr_mix_standardized_X <- function (x, Y, w0, S0, S, S1, SplusS0_chol, S_chol, eps) { + + + # Get the number of conditions (r), the number of mixture + # components (K), and the number of samples (n). + r <- ncol(Y) + K <- length(S0) + n <- nrow(Y) + + # Compute the least-squares estimate. + b <- drop(x %*% Y)/(n-1) + + # Compute the Bayes factors and posterior statistics separately for + # each mixture component. + # out <- vector("list",K) + # for (k in 1:K){ + # out[[k]] <- bayes_mvr_ridge_standardized_X(b, S0[[k]], S, S1[[k]], SplusS0_chol[[k]], S_chol) + # } + bayes_mvr_ridge_lapply <- function(i){ + bayes_mvr_ridge_standardized_X(b, S0[[i]], S, S1[[i]], SplusS0_chol[[i]], S_chol) + } + out <- lapply(1:K, bayes_mvr_ridge_lapply) + + # Compute the posterior assignment probabilities for the latent + # indicator variable. + logbf <- sapply(out,function (x) x$logbf) + w1 <- softmax(logbf + log(w0 + eps)) + + # Compute the posterior mean (mu1_mix) and covariance (S1_mix) of the + # regression coefficients. + A <- matrix(0,r,r) + mu1_mix <- rep(0,r) + for (k in 1:K) { + wk <- w1[k] + muk <- out[[k]]$mu1 + Sk <- S1[[k]] + mu1_mix <- mu1_mix + wk*muk + A <- A + wk*(Sk + tcrossprod(muk)) + } + S1_mix <- A - tcrossprod(mu1_mix) + ##The following code does not work in the univariate case + # muk <- t(sapply(out, function (x) x$mu1)) + # Sk <- lapply(out, function (x) x$S1) + # mu1_mix <- colSums(muk*w1) + # S1_mix <- Reduce("+", lapply(1:K, function(i){w1[i]*(Sk[[i]] + tcrossprod(muk[i, ]))})) - tcrossprod(mu1_mix) + + # Compute the log-Bayes factor for the mixture as a linear combination of the + # individual BFs foreach mixture component. + u <- max(logbf) + logbf_mix <- u + log(sum(w0 * exp(logbf - u))) + + # Return the log-Bayes factor for the mixture (logbf), the posterior assignment probabilities (w1), the + # posterior mean of the coefficients (mu1), and the posterior + # covariance of the coefficients (S1). + return(list(logbf = logbf_mix,w1 = w1,mu1 = mu1_mix,S1 = S1_mix)) +} + + +# Bayesian multivariate regression with mixture-of-normals prior +# (mixture weights w0 and covariance matrices S0) and centered X +# +# The outputs are: the log-Bayes factor (logbf), the posterior assignment probabilities +# (w1), the posterior mean of the coefficients given that all the +# coefficients are not nonzero (mu1), and the posterior covariance of +# the coefficients given that all the coefficients are not zero (S1). +bayes_mvr_mix_centered_X <- function (x, Y, V, w0, S0, xtx, Vinv, V_chol, d, QtimesV_chol, eps) { + + # Get the number of variables (n) and the number of mixture + # components (k). + r <- ncol(Y) + K <- length(S0) + + # Compute the least-squares estimate and covariance. + b <- drop(x %*% Y)/xtx + S <- V/xtx + + # Compute quantities needed for bayes_mvr_ridge_centered_X() + S_chol <- V_chol/sqrt(xtx) + + # Compute the Bayes factors and posterior statistics separately for + # each mixture component. + # out <- vector("list",K) + # for (k in 1:K){ + # out[[k]] <- bayes_mvr_ridge_centered_X(V, b, S, S0[[k]], xtx, V_chol, d[[k]], QtimesV_chol[[k]]) + # } + bayes_mvr_ridge_lapply <- function(i){ + bayes_mvr_ridge_centered_X(V, b, S, S0[[i]], xtx, Vinv, V_chol, S_chol, d[[i]], QtimesV_chol[[i]]) + } + out <- lapply(1:K, bayes_mvr_ridge_lapply) + + # Compute the posterior assignment probabilities for the latent + # indicator variable. + logbf <- sapply(out,function (x) x$logbf) + w1 <- softmax(logbf + log(w0 + eps)) + + # Compute the posterior mean (mu1_mix) and covariance (S1_mix) of the + # regression coefficients. + A <- matrix(0,r,r) + mu1_mix <- rep(0,r) + for (k in 1:K) { + wk <- w1[k] + muk <- out[[k]]$mu1 + Sk <- out[[k]]$S1 + mu1_mix <- mu1_mix + wk*muk + A <- A + wk*(Sk + tcrossprod(muk)) + } + S1_mix <- A - tcrossprod(mu1_mix) + ##The following code does not work in the univariate case + # muk <- t(sapply(out, function (x) x$mu1)) + # Sk <- lapply(out, function (x) x$S1) + # mu1_mix <- colSums(muk*w1) + # S1_mix <- Reduce("+", lapply(1:K, function(i){w1[i]*(Sk[[i]] + tcrossprod(muk[i, ]))})) - tcrossprod(mu1_mix) + + # Compute the log-Bayes factor for the mixture as a linear combination of the + # individual BFs foreach mixture component. + u <- max(logbf) + logbf_mix <- u + log(sum(w0 * exp(logbf - u))) + + # Return the log-Bayes factor for the mixture (logbf), the posterior assignment probabilities (w1), the + # posterior mean of the coefficients (mu1), and the posterior + # covariance of the coefficients (S1). + return(list(logbf = logbf_mix,w1 = w1,mu1 = mu1_mix,S1 = S1_mix)) +} \ No newline at end of file diff --git a/R/elbo_rss.R b/R/elbo_rss.R new file mode 100644 index 0000000..6d50f46 --- /dev/null +++ b/R/elbo_rss.R @@ -0,0 +1,32 @@ +###Compute ELBO from intermediate components +compute_ELBO_fun <- function(Rbar, Vinv, ldetV, var_part_tr_wERSS, neg_KL, Y_cov, sum_neg_ent_Y_miss){ + n <- nrow(Rbar) + r <- ncol(Rbar) + # tr_wERSS <- tr(Vinv%*%(crossprod(Rbar))) + var_part_tr_wERSS + tr_wERSS <- sum(Vinv*(crossprod(Rbar))) + var_part_tr_wERSS + if(is.null(Y_cov)){ + e2 <- 0 + } else { + e2 <- sum(Vinv*Y_cov) + } + + ELBO <- -log(n)/2 - (n*r)/2*log(2*pi) - n/2 * ldetV - 0.5*(tr_wERSS+e2) + neg_KL - sum_neg_ent_Y_miss + + return(ELBO) +} + +###Compute intermediate components of the ELBO +compute_ELBO_terms <- function(var_part_tr_wERSS, neg_KL, x_j, Rbar_j, bfit, xtx, Vinv){ + mu1_mat <- matrix(bfit$mu1, ncol=1) + # var_part_tr_wERSS <- var_part_tr_wERSS + (tr(Vinv%*%bfit$S1)*xtx) + # neg_KL <- neg_KL + (bfit$logbf +0.5*(-2*tr(tcrossprod(Vinv, Rbar_j)%*%tcrossprod(matrix(x_j, ncol=1), mu1_mat))+ + # tr(Vinv%*%(bfit$S1+tcrossprod(mu1_mat)))*xtx)) + ##Equivalent to the above but more efficient + var_part_tr_wERSS <- var_part_tr_wERSS + (sum(Vinv*bfit$S1)*xtx) + neg_KL <- neg_KL + (bfit$logbf +0.5*(-2*sum(tcrossprod(Vinv, Rbar_j)*t(tcrossprod(matrix(x_j, ncol=1), mu1_mat)))+ + sum(Vinv*(bfit$S1+tcrossprod(mu1_mat)))*xtx)) + + + return(list(var_part_tr_wERSS=var_part_tr_wERSS, neg_KL=neg_KL)) +} + diff --git a/R/mr_mash_rss.R b/R/mr_mash_rss.R new file mode 100644 index 0000000..bd32532 --- /dev/null +++ b/R/mr_mash_rss.R @@ -0,0 +1,598 @@ +#' @title Multiple Regression with Multivariate Adaptive Shrinkage +#' from summary data. +#' +#' @description Performs multivariate multiple regression with +#' mixture-of-normals prior. +#' +#' @param Y n x r matrix of responses. +#' +#' @param X n x p matrix of covariates. +#' +#' @param V r x r residual covariance matrix. +#' +#' @param S0 List of length K containing the desired r x r prior +#' covariance matrices on the regression coefficients. +#' +#' @param w0 K-vector with prior mixture weights, each associated with +#' the respective covariance matrix in \code{S0}. +#' +#' @param mu1_init p x r matrix of initial estimates of the posterior +#' mean regression coefficients. These should be on the same scale as +#' the X provided. If \code{standardize=TRUE}, mu1_init will be scaled +#' appropriately after standardizing X. +#' +#' @param convergence_criterion Criterion to use for convergence check. +#' +#' @param tol Convergence tolerance. +#' +#' @param max_iter Maximum number of iterations for the optimization +#' algorithm. +#' +#' @param update_w0 If \code{TRUE}, prior weights are updated. +#' +#' @param update_w0_method Method to update prior weights. Only EM is +#' currently supported. +#' +#' @param w0_threshold Drop mixture components with weight less than this value. +#' Components are dropped at each iteration after 15 initial iterations. +#' This is done to prevent from dropping some poetentially important +#' components prematurely. +#' +#' @param update_V if \code{TRUE}, residual covariance is updated. +#' +#' @param update_V_method Method to update residual covariance. So far, +#' "full" and "diagonal" are supported. If \code{update_V=TRUE} and V +#' is not provided by the user, this option will determine how V is +#' computed (and fixed) internally from \code{mu1_init}. +#' +#' @param compute_ELBO If \code{TRUE}, ELBO is computed. +#' +#' @param standardize If \code{TRUE}, X is "standardized" using the +#' sample means and sample standard deviations. Standardizing X +#' allows a faster implementation, but the prior has a different +#' interpretation. Coefficients and covariances are returned on the +#' original scale. +#' +#' @param version Whether to use R or C++ code to perform the +#' coordinate ascent updates. +#' +#' @param verbose If \code{TRUE}, some information about the +#' algorithm's process is printed at each iteration. +#' +#' @param e A small number to add to the diagonal elements of the +#' prior matrices to improve numerical stability of the updates. +#' +#' @param ca_update_order The order with which coordinates are +#' updated. So far, "consecutive", "decreasing_logBF", +#' "increasing_logBF" are supported. +#' +#' @param nthreads Number of RcppParallel threads to use for the +#' updates. When \code{nthreads} is \code{NA}, the default number of +#' threads is used; see +#' \code{\link[RcppParallel]{defaultNumThreads}}. This setting is +#' ignored when \code{version = "R"}. +#' +#' @return A mr.mash fit, stored as a list with some or all of the +#' following elements: +#' +#' \item{mu1}{p x r matrix of posterior means for the regression +#' coeffcients.} +#' +#' \item{S1}{r x r x p array of posterior covariances for the +#' regression coeffcients.} +#' +#' \item{w1}{p x K matrix of posterior assignment probabilities to the +#' mixture components.} +#' +#' \item{V}{r x r residual covariance matrix} +#' +#' \item{w0}{K-vector with (updated, if \code{update_w0=TRUE}) prior mixture weights, each associated with +#' the respective covariance matrix in \code{S0}}. +#' +#' \item{S0}{r x r x K array of prior covariance matrices +#' on the regression coefficients}. +#' +#' \item{intercept}{r-vector containing posterior mean estimate of the +#' intercept.} +#' +#' \item{fitted}{n x r matrix of fitted values.} +#' +#' \item{G}{r x r covariance matrix of fitted values.} +#' +#' \item{pve}{r-vector of proportion of variance explained by the covariates.} +#' +#' \item{ELBO}{Evidence Lower Bound (ELBO) at last iteration.} +#' +#' \item{progress}{A data frame including information regarding +#' convergence criteria at each iteration.} +#' +#' \item{converged}{\code{TRUE} or \code{FALSE}, indicating whether +#' the optimization algorithm converged to a solution within the chosen tolerance +#' level.} +#' +#' \item{Y}{n x r matrix of responses at last iteration (only relevant when missing values +#' are present in the input Y).} +#' +#' @examples +#' ###Set seed +#' set.seed(123) +#' +#' ###Simulate X and Y +#' ##Set parameters +#' n <- 1000 +#' p <- 100 +#' p_causal <- 20 +#' r <- 5 +#' +#' ###Simulate data +#' out <- simulate_mr_mash_data(n, p, p_causal, r, pve=0.5, B_cor=1, +#' B_scale=1, X_cor=0, X_scale=1, V_cor=0) +#' +#' ###Split the data in training and test sets +#' Ytrain <- out$Y[-c(1:200), ] +#' Xtrain <- out$X[-c(1:200), ] +#' Ytest <- out$Y[c(1:200), ] +#' Xtest <- out$X[c(1:200), ] +#' +#' ###Specify the covariance matrices for the mixture-of-normals prior. +#' univ_sumstats <- compute_univariate_sumstats(Xtrain, Ytrain, +#' standardize=TRUE, standardize.response=FALSE) +#' grid <- autoselect.mixsd(univ_sumstats, mult=sqrt(2))^2 +#' S0 <- compute_canonical_covs(ncol(Ytrain), singletons=TRUE, +#' hetgrid=c(0, 0.25, 0.5, 0.75, 1)) +#' S0 <- expand_covs(S0, grid, zeromat=TRUE) +#' +#' ###Fit mr.mash +#' fit <- mr.mash.rss(Xtrain, Ytrain, S0, update_V=TRUE) +#' +#' # Compare the "fitted" values of Y against the true Y in the training set. +#' plot(fit$fitted,Ytrain,pch = 20,col = "darkblue",xlab = "true", +#' ylab = "fitted") +#' abline(a = 0,b = 1,col = "magenta",lty = "dotted") +#' +#' # Predict the multivariate outcomes in the test set using the fitted model. +#' Ytest_est <- predict(fit,Xtest) +#' plot(Ytest_est,Ytest,pch = 20,col = "darkblue",xlab = "true", +#' ylab = "predicted") +#' abline(a = 0,b = 1,col = "magenta",lty = "dotted") +#' +#' @importFrom stats cov +#' @importFrom RcppParallel defaultNumThreads +#' @importFrom RcppParallel setThreadOptions +#' +#' @export +#' +mr.mash.rss <- function(X, Y, S0, w0=rep(1/(length(S0)), length(S0)), V=NULL, + mu1_init=matrix(0, nrow=ncol(X), ncol=ncol(Y)), tol=1e-4, convergence_criterion=c("mu1", "ELBO"), + max_iter=5000, update_w0=TRUE, update_w0_method="EM", + w0_threshold=0, compute_ELBO=TRUE, standardize=TRUE, verbose=TRUE, + update_V=FALSE, update_V_method=c("full", "diagonal"), version=c("Rcpp", "R"), e=1e-8, + ca_update_order=c("consecutive", "decreasing_logBF", "increasing_logBF"), + nthreads=as.integer(NA)) { + + if(verbose){ + tic <- Sys.time() + cat("Processing the inputs... ") + } + + # CHECK AND PROCESS INPUTS + # ------------------------ + ###Select method to check for convergence (if not specified by user, mu1 + ###will be used) + convergence_criterion <- match.arg(convergence_criterion) + + ###Select method to update the weights (if not specified by user, EM + ###will be used) + update_w0_method <- match.arg(update_w0_method) + + ###Select method to update the residual covariance (if not specified by user, full + ###will be used) + update_V_method <- match.arg(update_V_method) + + ###Select version of the inner loop (if not specified by user, Rcpp + ###will be used) + version <- match.arg(version) + + ###Select ordering of the coordinate ascent updates (if not specified by user, + ###consecutive will be used + ca_update_order <- match.arg(ca_update_order) + + ###Initialize the RcppParallel multithreading using a pre-specified number + ###of threads, or using the default number of threads when nthreads is NA. + if(version=="Rcpp"){ + if (is.na(nthreads)) { + setThreadOptions() + nthreads <- defaultNumThreads() + } else + setThreadOptions(numThreads = nthreads) + } + + ###Check that the inputs are in the correct format + if(!is.matrix(Y)) + stop("Y must be a matrix.") + if(!is.matrix(X)) + stop("X must be a matrix.") + if(any(is.na(X))) + stop("X must not contain missing values.") + if(!is.null(V)){ + if(!is.matrix(V) || !isSymmetric(V)) + stop("V must be a symmetric matrix.") + } + if(!is.list(S0)) + stop("S0 must be a list.") + if(!is.vector(w0)) + stop("w0 must be a vector.") + if(length(w0)<2) + stop("At least 2 mixture components must be present.") + if(abs(sum(w0) - 1) > 1e-10) + stop("Elements of w0 must sum to 1.") + if(length(S0)!=length(w0)) + stop("S0 and w0 must have the same length.") + if(!is.matrix(mu1_init)) + stop("mu1_init must be a matrix.") + if(convergence_criterion=="ELBO" && !compute_ELBO) + stop("ELBO needs to be computed with convergence_criterion=\"ELBO\".") + if(ca_update_order!="consecutive" && any(is.na(Y))) + stop("ca_update_order=\"consecutive\" is the only option when Y has missing values.") + + ###Obtain dimensions needed from inputs + p <- ncol(X) + n <- nrow(X) + r <- ncol(Y) + K <- length(S0) + + # PRE-PROCESSING STEPS + # -------------------- + ###Store dimensions names of the inputs + X_colnames <- colnames(X) + Y_colnames <- colnames(Y) + Y_rownames <- rownames(Y) + + ###Add number to diagonal elements of the prior matrices (improves + ###numerical stability) + S0 <- lapply(S0, makePD, e=e) + + ###Check if Y has missing values + Y_has_missing <- any(is.na(Y)) + + ###Throw an error if Y has missing values in the univariate case + if(Y_has_missing && r==1) + stop("Y must not contain missing values in the univariate case.") + + ###Center (and, optionally, scale) X + outX <- scale_fast2(X, scale=standardize) + mux <- outX$means + if (standardize) + sx <- outX$sds + X <- outX$M + rm(outX) + + ###Center Y, if no missing value is present + if(!Y_has_missing){ + outY <- scale_fast2(Y, scale=FALSE) + muy <- outY$means + Y <- outY$M + rm(outY) + } + + ###Extract per-individual Y missingness patterns, if missing values are present + if(Y_has_missing){ + Y_miss_patterns <- extract_missing_Y_pattern(Y) + } else { + Y_miss_patterns <- NULL + } + + ###Scale mu1_init, if X is standardized + if(standardize) + mu1_init <- mu1_init*sx + + ###Initilize mu1, S1, w1, delta_mu1, delta_ELBO, delta_conv, ELBO, iterator, progress, + ###missing Ys, and intercept (i.e., muy) + mu1_t <- mu1_init + delta_mu1 <- matrix(Inf, nrow=p, ncol=r) + delta_ELBO <- Inf + if(convergence_criterion=="mu1") + delta_conv <- max(delta_mu1) + else if(convergence_criterion=="ELBO") + delta_conv <- delta_ELBO + ELBO <- -Inf + t <- 0 + progress <- as.data.frame(matrix(as.numeric(NA), nrow=max_iter, ncol=3)) + colnames(progress) <- c("iter", "timing", "mu1_max.diff") + if(compute_ELBO){ + progress$ELBO_diff <- as.numeric(NA) + progress$ELBO <- as.numeric(NA) + } + if(Y_has_missing){ + muy <- colMeans(Y, na.rm=TRUE) + for(l in 1:r){ + Y[is.na(Y[, l]), l] <- muy[l] + } + } + + ###Compute V, if not provided by the user + if(is.null(V)){ + if(!Y_has_missing){ + V <- compute_V_init(X, Y, mu1_init, rep(0, r), method="cov") + } else { + V <- compute_V_init(X, Y, mu1_init, muy, method="flash") + } + + if(update_V_method=="diagonal") + V <- diag(diag(V)) + } + + ###Set eps + eps <- .Machine$double.eps + + ###Precompute quantities + comps <- precompute_quants(X, V, S0, standardize, version) + if(!standardize){ + xtx <- colSums(X^2) + comps$xtx <- xtx + } + + if(Y_has_missing || compute_ELBO || !standardize) + ###Compute inverse of V (needed for imputing missing Ys, the ELBO and unstandardized X) + Vinv <- chol2inv(comps$V_chol) + else { + if(version=="R") + Vinv <- NULL + else if(version=="Rcpp") + Vinv <- matrix(0, nrow=r, ncol=r) + } + + if(compute_ELBO) + ###Compute log determinant of V (needed for the ELBO) + ldetV <- chol2ldet(comps$V_chol) + else + ldetV <- NULL + + ###Set the ordering of the coordinate ascent updates + if(ca_update_order=="consecutive"){ + update_order <- 1:p + } else if(ca_update_order=="decreasing_logBF"){ + update_order <- compute_rank_variables_BFmix(X, Y, V, Vinv, w0, S0, comps, standardize, version, + decreasing=TRUE, eps, nthreads) + } else if(ca_update_order=="increasing_logBF"){ + update_order <- compute_rank_variables_BFmix(X, Y, V, Vinv, w0, S0, comps, standardize, version, + decreasing=FALSE, eps, nthreads) + } + + if(!Y_has_missing){ + Y_cov <- matrix(0, nrow=r, ncol=r) + } + + if(verbose) + cat("Done!\n") + + # MAIN LOOP + # --------- + if(verbose){ + if(version=="Rcpp" && nthreads>1) + cat(sprintf("Fitting the optimization algorithm using %d RcppParallel threads... \n", nthreads)) + else + cat("Fitting the optimization algorithm... \n") + cat(" iter mu1_max.diff") + if(compute_ELBO) + cat(" ELBO_diff ELBO\n") + else + cat("\n") + } + + ###Repeat the following until convergence, or until maximum number + ###of iterations is reached. + while(delta_conv>tol){ + + ##Start timing + time1 <- proc.time() + + ##Save current estimates. + mu1_old <- mu1_t + + ##Set last value of ELBO as ELBO_old + ELBO_old <- ELBO + + ##Update iterator + t <- t+1 + + ##Compute expected Y + if(Y_has_missing){ + mu <- addtocols(X%*%mu1_t, muy) + } else { + mu <- X%*%mu1_t + } + + ##Exit loop if maximum number of iterations is reached + if(t>max_iter){ + warning("Max number of iterations reached. Try increasing max_iter.") + break + } + + # M-STEP + # ------ + if(t > 1){ + ##Update V if requested + if(update_V){ + V <- update_V_fun(Y, mu, var_part_ERSS, Y_cov) + if(update_V_method=="diagonal") + V <- diag(diag(V)) + + #Recompute precomputed quantities after updating V + comps <- precompute_quants(X, V, S0, standardize, version) + if(!standardize) + comps$xtx <- xtx + if(compute_ELBO || !standardize) + Vinv <- chol2inv(comps$V_chol) + if(compute_ELBO) + ldetV <- chol2ldet(comps$V_chol) + } + + ##Update w0 if requested + if(update_w0){ + w0 <- update_weights_em(w1_t) + + #Drop components with mixture weight <= w0_threshold + if(t>15 && any(w0 < w0_threshold)){ + to_keep <- which(w0 >= w0_threshold) + w0 <- w0[to_keep] + w0 <- w0/sum(w0) + S0 <- S0[to_keep] + if(length(to_keep) > 1){ + comps <- filter_precomputed_quants(comps, to_keep, standardize, version) + } else if(length(to_keep) == 1 & all((S0[[to_keep]] - (diag(nrow(S0[[to_keep]]))*e)) < eps)){ #null component is the only one left + mu1_t <- matrix(0, nrow=p, ncol=r) + S1_t <- array(0, c(r, r, p)) + w1_t <- matrix(1, nrow=p, ncol=1) + warning("Only the null component is left. Estimated coefficients are set to 0.") + break + } else { #some other component is the only one left + stop("Only one component (different from the null) left. Consider lowering w0_threshold.") + } + } + } + } + + # E-STEP + # ------ + ###Update variational parameters + ups <- mr_mash_update_general(X=X, Y=Y, mu1_t=mu1_t, mu=mu, V=V, + Vinv=Vinv, ldetV=ldetV, w0=w0, S0=S0, + precomp_quants=comps, + compute_ELBO=compute_ELBO, + standardize=standardize, + update_V=update_V, version=version, + update_order=update_order, eps=eps, + nthreads=nthreads, + Y_miss_patterns=Y_miss_patterns) + mu1_t <- ups$mu1_t + S1_t <- ups$S1_t + w1_t <- ups$w1_t + if(compute_ELBO) + ELBO <- ups$ELBO + if(update_V) + var_part_ERSS <- ups$var_part_ERSS + if(Y_has_missing){ + Y <- ups$Y + muy <- ups$muy + Y_cov <- ups$Y_cov + } + + ##End timing + time2 <- proc.time() + + ##Compute difference in mu1 and ELBO between two successive iterations, + ##and assign the requested criterion to delta_conv + delta_mu1 <- abs(mu1_t - mu1_old) + delta_ELBO <- ELBO - ELBO_old + if(convergence_criterion=="mu1") + delta_conv <- max(delta_mu1) + else if(convergence_criterion=="ELBO") + delta_conv <- delta_ELBO + + ##Update progress data.frame + progress[t, c(1:3)] <- c(t, time2["elapsed"] - time1["elapsed"], max(delta_mu1)) + if(compute_ELBO) + progress[t, c(4, 5)] <- c(delta_ELBO, ELBO) + + if(verbose){ + ##Print out useful info + cat(sprintf("%4d %9.2e", t, max(delta_mu1))) + if(compute_ELBO) + cat(sprintf(" %9.2e %0.20e\n", delta_ELBO, ELBO)) + else + cat("\n") + } + } + + ###Record convergence status + if(t>max_iter) + converged <- FALSE + else + converged <- TRUE + + if(verbose){ + cat("Done!\n") + cat("Processing the outputs... ") + } + + # POST-PROCESSING STEPS + # -------------------- + ###Compute the "fitted" values. + fitted_vals <- addtocols(X %*% mu1_t, muy) + + ###Compute covariance of fitted values and PVE + cov_fitted <- cov(fitted_vals) + var_fitted <- diag(cov_fitted) + pve <- var_fitted/(var_fitted+diag(V)) + + if(standardize){ + ###Rescale posterior means and covariance of coefficients. In the + ###context of predicting Y, this rescaling is equivalent to + ###rescaling each column j of a given matrix, Xnew, by sx[j]. + post_rescaled <- rescale_post_mean_covar_fast(mu1_t, S1_t, sx) + mu1_t <- post_rescaled$mu1_orig + S1_t <- post_rescaled$S1_orig + } + + ###Compute posterior mean estimate of intercept. Note that when + ###columns of X are standardized, the intercept should be computed + ###with respect to the *rescaled* coefficients to recover the + ###correct fitted values. This is why this is done after rescaling + ###the coefficients above. + intercept <- drop(muy - mux %*% mu1_t) + + ###Assign names to outputs dimensions + S0_names <- names(S0) + rownames(mu1_t) <- X_colnames + colnames(mu1_t) <- Y_colnames + dimnames(S1_t)[[1]] <- Y_colnames + dimnames(S1_t)[[2]] <- Y_colnames + dimnames(S1_t)[[3]] <- X_colnames + S0 <- lapply(S0, function(x){rownames(x) <- colnames(x) <- Y_colnames; return(x)}) + rownames(w1_t) <- X_colnames + colnames(w1_t) <- S0_names + names(w0) <- S0_names + rownames(V) <- Y_colnames + colnames(V) <- Y_colnames + rownames(Y) <- Y_rownames + colnames(Y) <- Y_colnames + rownames(fitted_vals) <- Y_rownames + colnames(fitted_vals) <- Y_colnames + rownames(cov_fitted) <- Y_colnames + colnames(cov_fitted) <- Y_colnames + names(pve) <- Y_colnames + names(intercept) <- Y_colnames + + ###Remove unused rows of progress + progress <- progress[rowSums(is.na(progress)) != ncol(progress), ] + + ###Return the posterior assignment probabilities (w1), the + ###posterior mean of the coefficients (mu1), and the posterior + ###covariance of the coefficients (S1), the residual covariance (V), + ###the prior weights (w0), the intercept (intercept), the fitted values (fitted), + ###and the progress data frame (progress), the prior covariance (S0), convergence + ###status, the covariance of the fitted values (G), the proportion of variance explained (pve), + ###the Evidence Lower Bound (ELBO; if computed) and imputed responses (Y; if + ###missing values were present). + out <- list(mu1=mu1_t, S1=S1_t, w1=w1_t, V=V, w0=w0, S0=simplify2array_custom(S0), + intercept=intercept, fitted=fitted_vals, G=cov_fitted, pve=pve, progress=progress, + converged=converged) + if(compute_ELBO) + ###Append ELBO to the output + out$ELBO <- ELBO + if(Y_has_missing) + ###Append Y to the output + out$Y <- Y + + class(out) <- c("mr.mash", "list") + + if(verbose){ + cat("Done!\n") + toc <- Sys.time() + cat("mr.mash successfully executed in", difftime(toc, tic, units="mins"), + "minutes!\n") + } + + return(out) +} diff --git a/R/mr_mash_rss_updates.R b/R/mr_mash_rss_updates.R new file mode 100644 index 0000000..e902d1e --- /dev/null +++ b/R/mr_mash_rss_updates.R @@ -0,0 +1,255 @@ +###Update variational parameters, expected residuals, and ELBO components with or without scaling X +inner_loop_general_R <- function(X, Rbar, mu1, V, Vinv, w0, S0, ###note: V is only needed when not scaling X + precomp_quants, standardize, compute_ELBO, update_V, + update_order, eps){ + ###Create variables to store quantities + n <- nrow(Rbar) + r <- ncol(Rbar) + p <- ncol(X) + K <- length(S0) + S1 <- array(0, c(r, r, p)) + w1 <- matrix(0, nrow=p, ncol=K) + var_part_tr_wERSS <- 0 + neg_KL <- 0 + var_part_ERSS <- matrix(0, nrow=r, ncol=r) + + ##Loop through the variables + for(j in update_order){ + + #Remove j-th effect from expected residuals + Rbar_j <- Rbar + outer(X[, j], mu1[j, ]) + + #Run Bayesian SLR + if(standardize){ + bfit <- bayes_mvr_mix_standardized_X(X[, j], Rbar_j, w0, S0, precomp_quants$S, precomp_quants$S1, + precomp_quants$SplusS0_chol, precomp_quants$S_chol, eps) + } else { + bfit <- bayes_mvr_mix_centered_X(X[, j], Rbar_j, V, w0, S0, precomp_quants$xtx[j], Vinv, + precomp_quants$V_chol, precomp_quants$d, + precomp_quants$QtimesV_chol, eps) + } + + #Update variational parameters + mu1[j, ] <- bfit$mu1 + S1[, , j] <- bfit$S1 + w1[j, ] <- bfit$w1 + + #Compute ELBO params + if(compute_ELBO){ + if(standardize){ + xtx <- n-1 + } else { + xtx <- precomp_quants$xtx[j] + } + ELBO_parts <- compute_ELBO_terms(var_part_tr_wERSS, neg_KL, X[, j], Rbar_j, bfit, xtx, Vinv) + var_part_tr_wERSS <- ELBO_parts$var_part_tr_wERSS + neg_KL <- ELBO_parts$neg_KL + } + + #Compute V params + if(update_V){ + if(standardize){ + xtx <- n-1 + } else { + xtx <- precomp_quants$xtx[j] + } + var_part_ERSS <- compute_var_part_ERSS(var_part_ERSS, bfit, xtx) + } + + #Update expected residuals + Rbar <- Rbar_j - outer(X[, j], mu1[j, ]) + } + + ###Return output + if(compute_ELBO && update_V){ + return(list(Rbar=Rbar, mu1=mu1, S1=S1, w1=w1, var_part_tr_wERSS=var_part_tr_wERSS, neg_KL=neg_KL, var_part_ERSS=var_part_ERSS)) + } else if(compute_ELBO && !update_V){ + return(list(Rbar=Rbar, mu1=mu1, S1=S1, w1=w1, var_part_tr_wERSS=var_part_tr_wERSS, neg_KL=neg_KL)) + } else if(!compute_ELBO && update_V) { + return(list(Rbar=Rbar, mu1=mu1, S1=S1, w1=w1, var_part_ERSS=var_part_ERSS)) + } else { + return(list(Rbar=Rbar, mu1=mu1, S1=S1, w1=w1)) + } +} + +### Wrapper for the Rcpp function to update variational parameters, +### expected residuals, and ELBO components with or without scaling X. +# +#' @importFrom Rcpp evalCpp +#' @importFrom RcppParallel RcppParallelLibs +#' @useDynLib mr.mash.alpha +#' +inner_loop_general_Rcpp <- function(X, Rbar, mu1, V, Vinv, w0, S0, precomp_quants, + standardize, compute_ELBO, update_V, update_order, + eps, nthreads){ + + out <- inner_loop_general_rcpp(X, Rbar, mu1, V, Vinv, w0, S0, precomp_quants, + standardize, compute_ELBO, update_V, update_order, + eps, nthreads) + + ###Return output + if(compute_ELBO && update_V){ + return(list(Rbar=out$Rbar, mu1=out$mu1, S1=out$S1, w1=out$w1, var_part_tr_wERSS=out$var_part_tr_wERSS, + neg_KL=out$neg_KL, var_part_ERSS=out$var_part_ERSS)) + } else if(compute_ELBO && !update_V){ + return(list(Rbar=out$Rbar, mu1=out$mu1, S1=out$S1, w1=out$w1, var_part_tr_wERSS=out$var_part_tr_wERSS, + neg_KL=out$neg_KL)) + } else if(!compute_ELBO && update_V) { + return(list(Rbar=out$Rbar, mu1=out$mu1, S1=out$S1, w1=out$w1, var_part_ERSS=out$var_part_ERSS)) + } else { + return(list(Rbar=out$Rbar, mu1=out$mu1, S1=out$S1, w1=out$w1)) + } +} + +###Wrapper of the inner loop with R or Rcpp +inner_loop_general <- function(X, Rbar, mu1, V, Vinv, w0, S0, precomp_quants, + standardize, compute_ELBO, update_V, version, + update_order, eps, nthreads){ + if(version=="R"){ + out <- inner_loop_general_R(X, Rbar, mu1, V, Vinv, w0, S0, precomp_quants, + standardize, compute_ELBO, update_V, update_order, eps) + } else if(version=="Rcpp"){ + update_order <- as.integer(update_order-1) + out <- inner_loop_general_Rcpp(X, Rbar, mu1, V, Vinv, w0, simplify2array_custom(S0), precomp_quants, + standardize, compute_ELBO, update_V, update_order, eps, nthreads) + } + + return(out) +} + + +###Perform one iteration of the outer loop with or without scaling X +mr_mash_update_general <- function(X, Y, mu1_t, mu, V, Vinv, ldetV, w0, S0, + precomp_quants, compute_ELBO, standardize, + update_V, version, update_order, eps, + nthreads, Y_miss_patterns){ + + + if(!is.null(Y_miss_patterns)){ + ##Impute missing Ys + outY <- impute_missing_Y(Y=Y, mu=mu, Vinv=Vinv, miss=Y_miss_patterns$miss, non_miss=Y_miss_patterns$non_miss, + version=version) + Y <- outY$Y + Y_cov <- outY$Y_cov + sum_neg_ent_Y_miss <- outY$sum_neg_ent_Y_miss + + # Update the intercept + muy <- colMeans(Y) + + ##Compute expected residuals + Rbar <- scale_fast2(Y, scale=FALSE)$M - X%*%mu1_t + + } else { + Y_cov <- NULL + sum_neg_ent_Y_miss <- 0 + + ##Compute expected residuals + Rbar <- Y - mu + } + + ##Update variational parameters, expected residuals, and ELBO components + updates <- inner_loop_general(X=X, Rbar=Rbar, mu1=mu1_t, V=V, Vinv=Vinv, w0=w0, S0=S0, + precomp_quants=precomp_quants, standardize=standardize, + compute_ELBO=compute_ELBO, update_V=update_V, version=version, + update_order=update_order, eps=eps, nthreads=nthreads) + mu1_t <- updates$mu1 + S1_t <- updates$S1 + w1_t <- updates$w1 + Rbar <- updates$Rbar + + out <- list(mu1_t=mu1_t, S1_t=S1_t, w1_t=w1_t) + + if(!is.null(Y_miss_patterns)){ + out$Y <- Y + out$muy <- muy + out$Y_cov <- Y_cov + } + + if(compute_ELBO && update_V){ + ##Compute ELBO + var_part_tr_wERSS <- updates$var_part_tr_wERSS + neg_KL <- updates$neg_KL + out$ELBO <- compute_ELBO_fun(Rbar=Rbar, Vinv=Vinv, ldetV=ldetV, var_part_tr_wERSS=var_part_tr_wERSS, + neg_KL=neg_KL, Y_cov=Y_cov, sum_neg_ent_Y_miss=sum_neg_ent_Y_miss) + + out$var_part_ERSS <- updates$var_part_ERSS + + } else if(compute_ELBO && !update_V){ + ##Compute ELBO + var_part_tr_wERSS <- updates$var_part_tr_wERSS + neg_KL <- updates$neg_KL + out$ELBO <- compute_ELBO_fun(Rbar=Rbar, Vinv=Vinv, ldetV=ldetV, var_part_tr_wERSS=var_part_tr_wERSS, + neg_KL=neg_KL, Y_cov=Y_cov, sum_neg_ent_Y_miss=sum_neg_ent_Y_miss) + + } else if(!compute_ELBO && update_V){ + out$var_part_ERSS <- updates$var_part_ERSS + } + + return(out) +} + + +###Update V +update_V_fun <- function(Y, mu, var_part_ERSS, Y_cov){ + n <- nrow(Y) + + Rbar <- Y - mu + ERSS <- crossprod(Rbar) + var_part_ERSS + Y_cov + V <- ERSS/n + + return(V) +} + + +###Update mixture weights +update_weights_em <- function(x){ + w <- colSums(x) + w <- w/sum(w) + return(w) +} + + +###Impute/update missing Y +impute_missing_Y_R <- function(Y, mu, Vinv, miss, non_miss){ + n <- nrow(Y) + r <- ncol(Y) + + Y_cov <- matrix(0, r, r) + sum_neg_ent_Y_miss <- 0 + + for (i in 1:n){ + non_miss_i <- non_miss[[i]] + miss_i <- miss[[i]] + Vinv_mo <- Vinv[miss_i, non_miss_i, drop=FALSE] + Vinv_mm <- Vinv[miss_i, miss_i, drop=FALSE] + if(any(miss_i)){ + # Compute variance + Y_cov_i <- matrix(0, r, r) + Vinv_mm_chol <- chol(Vinv_mm) + Y_cov_mm <- chol2inv(Vinv_mm_chol) + Y_cov_i[miss_i, miss_i] <- Y_cov_mm + + Y_cov <- Y_cov + Y_cov_i + + # Compute mean + Y[i, miss_i] <- mu[i, miss_i] - Y_cov_mm %*% Vinv_mo %*% (Y[i, non_miss_i] - mu[i, non_miss_i]) + + # Compute sum of the negative entropy of Y missing + #sum_neg_ent_Y_miss <- sum_neg_ent_Y_miss + (0.5 * as.numeric(determinant(1/(2*pi*exp(1))*Vinv_mm, logarithm = TRUE)$modulus)) + sum_neg_ent_Y_miss <- sum_neg_ent_Y_miss + (0.5 * (ncol(Vinv_mm_chol)*log((1/(2*pi*exp(1)))) + chol2ldet(Vinv_mm_chol))) # log(det(kA)) = r*log(k) + log(det(A)) where is the size of the matrix + } + } + + return(list(Y=Y, Y_cov=Y_cov, sum_neg_ent_Y_miss=sum_neg_ent_Y_miss)) +} + +###Wrapper of impute/update missing Y with R or Rcpp +impute_missing_Y <- function(Y, mu, Vinv, miss, non_miss, version){ + if(version=="R"){ + out <- impute_missing_Y_R(Y, mu, Vinv, miss, non_miss) + } else if(version=="Rcpp"){ + out <- impute_missing_Y_rcpp(Y, mu, Vinv, simplify2array_custom(miss), simplify2array_custom(non_miss)) + } + + return(out) +} \ No newline at end of file From dc371e5e31747745463844f6e2f64fbf66447cf3 Mon Sep 17 00:00:00 2001 From: fmorgante Date: Fri, 3 Mar 2023 08:52:49 -0500 Subject: [PATCH 002/103] Rename file --- R/{bayes_reg_mv_ss.R => bayes_reg_mv_rss.R} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename R/{bayes_reg_mv_ss.R => bayes_reg_mv_rss.R} (100%) diff --git a/R/bayes_reg_mv_ss.R b/R/bayes_reg_mv_rss.R similarity index 100% rename from R/bayes_reg_mv_ss.R rename to R/bayes_reg_mv_rss.R From 36b4b380057e61662d7886a592dd76bb4cb40634 Mon Sep 17 00:00:00 2001 From: fmorgante Date: Fri, 3 Mar 2023 22:12:03 -0500 Subject: [PATCH 003/103] Start modifying functions to use xtx and xtY --- R/bayes_reg_mv_rss.R | 147 ++++------------------------------------ R/mr_mash_rss_updates.R | 15 ++-- 2 files changed, 21 insertions(+), 141 deletions(-) diff --git a/R/bayes_reg_mv_rss.R b/R/bayes_reg_mv_rss.R index 3f512b3..7d827d7 100644 --- a/R/bayes_reg_mv_rss.R +++ b/R/bayes_reg_mv_rss.R @@ -1,50 +1,8 @@ -# Bayesian multivariate regression with Normal prior -# -# The outputs are: b, the least-squares estimate of the regression -# coefficients; S, the covariance of b; mu1, the posterior mean of the -# regression coefficients; S1, the posterior covariance of the -# regression coefficients; logbf, the log-Bayes factor. -bayes_mvr_ridge <- function (x, Y, V, S0) { - - # Compute the least-squares estimate and its covariance. - b <- drop(x %*% Y)/sum(x^2) - S <- V/sum(x^2) - - # Compute the log-Bayes factor. - # logbf <- mvtnorm::dmvnorm(x=b, sigma=(S+S0), log=T) - mvtnorm::dmvnorm(x=b, sigma=S, log=T) ##Slow - # logbf <- (log(prod(abs(Re(diag(qr(S)$qr))))) + - # - log(prod(abs(Re(diag(qr(S0+S)$qr))))) - # + dot(b,solve(S,b)) - dot(b,solve(S0 + S,b)))/2 ##Not as fast as with determinant() but more stable - logbf <- (as.numeric(determinant(S)$modulus) + - - as.numeric(determinant(S0 + S)$modulus) - + dot(b,solve(S,b)) - dot(b,solve(S0 + S,b)))/2 - - # Compute the posterior mean and covariance assuming a multivariate - # normal prior with zero mean and covariance S0. - # r <- ncol(Y) - # I <- diag(r) - # S1 <- solve(solve(S0) + solve(S)) - # S1 <- S0%*%solve(S+S0)%*%S - #Avoid inverting matrices - SplusS0_chol <- chol(S+S0) - S1 <- S0%*%backsolve(SplusS0_chol, forwardsolve(t(SplusS0_chol), S)) - # mu1 <- solve(S %*% solve(S0) + I,b) - # mu1 <- drop(S1%*%solve(S)%*%b) - #Avoid inverting matrices - S_chol <- chol(S) - mu1 <- drop(S1%*%backsolve(S_chol, forwardsolve(t(S_chol), b))) - - # Return the least-squares estimate and covariance (b, S), the posterior mean and - # covariance (mu1, S1), and the log-Bayes factor (logbf) - return(list(b = b,S = S,mu1 = mu1,S1 = S1,logbf = logbf)) -} - - # Bayesian multivariate regression with Normal prior with standardized X # # The outputs are: mu1, the posterior mean of the # regression coefficients; logbf, the log-Bayes factor. -bayes_mvr_ridge_standardized_X <- function (b, S0, S, S1, SplusS0_chol, S_chol) { +bayes_mvr_ridge_standardized_X_rss <- function (b, S0, S, S1, SplusS0_chol, S_chol) { # Compute the log-Bayes factor. logbf <- (chol2ldet(S_chol) - chol2ldet(SplusS0_chol) + @@ -69,7 +27,7 @@ bayes_mvr_ridge_standardized_X <- function (b, S0, S, S1, SplusS0_chol, S_chol) # The outputs are: mu1, the posterior mean of the # regression coefficients; S1, the posterior covariance of the # regression coefficients; logbf, the log-Bayes factor. -bayes_mvr_ridge_centered_X <- function (V, b, S, S0, xtx, Vinv, V_chol, S_chol, d, QtimesV_chol) { +bayes_mvr_ridge_centered_X_rss <- function (V, b, S, S0, xtx, Vinv, V_chol, S_chol, d, QtimesV_chol) { # Compute the log-Bayes factor. SplusS0_chol <- chol(S+S0) @@ -90,66 +48,6 @@ bayes_mvr_ridge_centered_X <- function (V, b, S, S0, xtx, Vinv, V_chol, S_chol, } -# Bayesian multivariate regression with mixture-of-normals prior -# (mixture weights w0 and covariance matrices S0) -# -# The outputs are: the log-Bayes factor (logbf), the posterior assignment probabilities -# (w1), the posterior mean of the coefficients given that all the -# coefficients are not nonzero (mu1), and the posterior covariance of -# the coefficients given that all the coefficients are not zero (S1). -bayes_mvr_mix <- function (x, Y, V, w0, S0, eps) { - - # Get the number of variables (n) and the number of mixture - # components (k). - r <- ncol(Y) - K <- length(S0) - - # Compute the Bayes factors and posterior statistics separately for - # each mixture component. - # out <- vector("list",K) - # for (k in 1:K){ - # out[[k]] <- bayes_mvr_ridge(x,Y,V,S0[[k]]) - # } - bayes_mvr_ridge_lapply <- function(i){ - bayes_mvr_ridge(x, Y, V, S0[[i]]) - } - out <- lapply(1:K, bayes_mvr_ridge_lapply) - - # Compute the posterior assignment probabilities for the latent - # indicator variable. - logbf <- sapply(out,function (x) x$logbf) - w1 <- softmax(logbf + log(w0 + eps)) - - # Compute the posterior mean (mu1_mix) and covariance (S1_mix) of the - # regression coefficients. - A <- matrix(0,r,r) - mu1_mix <- rep(0,r) - for (k in 1:K) { - wk <- w1[k] - muk <- out[[k]]$mu1 - Sk <- out[[k]]$S1 - mu1_mix <- mu1_mix + wk*muk - A <- A + wk*(Sk + tcrossprod(muk)) - } - S1_mix <- A - tcrossprod(mu1_mix) - ##The following code does not work in the univariate case - # muk <- t(sapply(out, function (x) x$mu1)) - # Sk <- lapply(out, function (x) x$S1) - # mu1_mix <- colSums(muk*w1) - # S1_mix <- Reduce("+", lapply(1:K, function(i){w1[i]*(Sk[[i]] + tcrossprod(muk[i, ]))})) - tcrossprod(mu1_mix) - - # Compute the log-Bayes factor for the mixture as a linear combination of the - # individual BFs foreach mixture component. - u <- max(logbf) - logbf_mix <- u + log(sum(w0 * exp(logbf - u))) - - # Return the log-Bayes factor for the mixture (logbf), the posterior assignment probabilities (w1), the - # posterior mean of the coefficients (mu1), and the posterior - # covariance of the coefficients (S1). - return(list(logbf = logbf_mix,w1 = w1,mu1 = mu1_mix,S1 = S1_mix)) -} - - # Bayesian multivariate regression with mixture-of-normals prior # (mixture weights w0 and covariance matrices S0) with standardized X. # @@ -157,26 +55,21 @@ bayes_mvr_mix <- function (x, Y, V, w0, S0, eps) { # (w1), the posterior mean of the coefficients given that all the # coefficients are not nonzero (mu1), and the posterior covariance of # the coefficients given that all the coefficients are not zero (S1). -bayes_mvr_mix_standardized_X <- function (x, Y, w0, S0, S, S1, SplusS0_chol, S_chol, eps) { +bayes_mvr_mix_standardized_X_rss <- function (n, xtY, w0, S0, S, S1, SplusS0_chol, S_chol, eps) { # Get the number of conditions (r), the number of mixture - # components (K), and the number of samples (n). - r <- ncol(Y) + # components (K). + r <- length(xtY) K <- length(S0) - n <- nrow(Y) # Compute the least-squares estimate. - b <- drop(x %*% Y)/(n-1) + b <- xtY/(n-1) # Compute the Bayes factors and posterior statistics separately for # each mixture component. - # out <- vector("list",K) - # for (k in 1:K){ - # out[[k]] <- bayes_mvr_ridge_standardized_X(b, S0[[k]], S, S1[[k]], SplusS0_chol[[k]], S_chol) - # } bayes_mvr_ridge_lapply <- function(i){ - bayes_mvr_ridge_standardized_X(b, S0[[i]], S, S1[[i]], SplusS0_chol[[i]], S_chol) + bayes_mvr_ridge_standardized_X_rss(b, S0[[i]], S, S1[[i]], SplusS0_chol[[i]], S_chol) } out <- lapply(1:K, bayes_mvr_ridge_lapply) @@ -197,12 +90,7 @@ bayes_mvr_mix_standardized_X <- function (x, Y, w0, S0, S, S1, SplusS0_chol, S_c A <- A + wk*(Sk + tcrossprod(muk)) } S1_mix <- A - tcrossprod(mu1_mix) - ##The following code does not work in the univariate case - # muk <- t(sapply(out, function (x) x$mu1)) - # Sk <- lapply(out, function (x) x$S1) - # mu1_mix <- colSums(muk*w1) - # S1_mix <- Reduce("+", lapply(1:K, function(i){w1[i]*(Sk[[i]] + tcrossprod(muk[i, ]))})) - tcrossprod(mu1_mix) - + # Compute the log-Bayes factor for the mixture as a linear combination of the # individual BFs foreach mixture component. u <- max(logbf) @@ -222,15 +110,15 @@ bayes_mvr_mix_standardized_X <- function (x, Y, w0, S0, S, S1, SplusS0_chol, S_c # (w1), the posterior mean of the coefficients given that all the # coefficients are not nonzero (mu1), and the posterior covariance of # the coefficients given that all the coefficients are not zero (S1). -bayes_mvr_mix_centered_X <- function (x, Y, V, w0, S0, xtx, Vinv, V_chol, d, QtimesV_chol, eps) { +bayes_mvr_mix_centered_X_rss <- function (xtY, V, w0, S0, xtx, Vinv, V_chol, d, QtimesV_chol, eps) { # Get the number of variables (n) and the number of mixture # components (k). - r <- ncol(Y) + r <- length(xtY) K <- length(S0) # Compute the least-squares estimate and covariance. - b <- drop(x %*% Y)/xtx + b <- xtY/xtx S <- V/xtx # Compute quantities needed for bayes_mvr_ridge_centered_X() @@ -238,12 +126,8 @@ bayes_mvr_mix_centered_X <- function (x, Y, V, w0, S0, xtx, Vinv, V_chol, d, Qti # Compute the Bayes factors and posterior statistics separately for # each mixture component. - # out <- vector("list",K) - # for (k in 1:K){ - # out[[k]] <- bayes_mvr_ridge_centered_X(V, b, S, S0[[k]], xtx, V_chol, d[[k]], QtimesV_chol[[k]]) - # } bayes_mvr_ridge_lapply <- function(i){ - bayes_mvr_ridge_centered_X(V, b, S, S0[[i]], xtx, Vinv, V_chol, S_chol, d[[i]], QtimesV_chol[[i]]) + bayes_mvr_ridge_centered_X_rss(V, b, S, S0[[i]], xtx, Vinv, V_chol, S_chol, d[[i]], QtimesV_chol[[i]]) } out <- lapply(1:K, bayes_mvr_ridge_lapply) @@ -264,12 +148,7 @@ bayes_mvr_mix_centered_X <- function (x, Y, V, w0, S0, xtx, Vinv, V_chol, d, Qti A <- A + wk*(Sk + tcrossprod(muk)) } S1_mix <- A - tcrossprod(mu1_mix) - ##The following code does not work in the univariate case - # muk <- t(sapply(out, function (x) x$mu1)) - # Sk <- lapply(out, function (x) x$S1) - # mu1_mix <- colSums(muk*w1) - # S1_mix <- Reduce("+", lapply(1:K, function(i){w1[i]*(Sk[[i]] + tcrossprod(muk[i, ]))})) - tcrossprod(mu1_mix) - + # Compute the log-Bayes factor for the mixture as a linear combination of the # individual BFs foreach mixture component. u <- max(logbf) diff --git a/R/mr_mash_rss_updates.R b/R/mr_mash_rss_updates.R index e902d1e..d4124f6 100644 --- a/R/mr_mash_rss_updates.R +++ b/R/mr_mash_rss_updates.R @@ -1,11 +1,10 @@ ###Update variational parameters, expected residuals, and ELBO components with or without scaling X -inner_loop_general_R <- function(X, Rbar, mu1, V, Vinv, w0, S0, ###note: V is only needed when not scaling X +inner_loop_general_rss_R <- function(n, XtXmu1, mu1, V, Vinv, w0, S0, ###note: V is only needed when not scaling X precomp_quants, standardize, compute_ELBO, update_V, update_order, eps){ ###Create variables to store quantities - n <- nrow(Rbar) - r <- ncol(Rbar) - p <- ncol(X) + r <- ncol(XtXmu1) + p <- ncol(XtX) K <- length(S0) S1 <- array(0, c(r, r, p)) w1 <- matrix(0, nrow=p, ncol=K) @@ -16,15 +15,17 @@ inner_loop_general_R <- function(X, Rbar, mu1, V, Vinv, w0, S0, ###note: V is on ##Loop through the variables for(j in update_order){ + xtx <- XtX[j , j] + #Remove j-th effect from expected residuals - Rbar_j <- Rbar + outer(X[, j], mu1[j, ]) + xTRbar_j <- XtY[j, ] - XtXmu1[j, ] + xtx*mu1[j,] #Run Bayesian SLR if(standardize){ - bfit <- bayes_mvr_mix_standardized_X(X[, j], Rbar_j, w0, S0, precomp_quants$S, precomp_quants$S1, + bfit <- bayes_mvr_mix_standardized_X(n, xTRbar_j, w0, S0, precomp_quants$S, precomp_quants$S1, precomp_quants$SplusS0_chol, precomp_quants$S_chol, eps) } else { - bfit <- bayes_mvr_mix_centered_X(X[, j], Rbar_j, V, w0, S0, precomp_quants$xtx[j], Vinv, + bfit <- bayes_mvr_mix_centered_X(xTRbar_j, V, w0, S0, precomp_quants$xtx[j], Vinv, precomp_quants$V_chol, precomp_quants$d, precomp_quants$QtimesV_chol, eps) } From 7eb2d3d86717890819de8cfc2737b14de596fd71 Mon Sep 17 00:00:00 2001 From: fmorgante Date: Mon, 6 Mar 2023 16:05:59 -0500 Subject: [PATCH 004/103] Keep modifying rss functions --- R/bayes_reg_mv_rss.R | 54 +---------- R/elbo_rss.R | 30 +++--- R/mr_mash_rss.R | 209 +++++++++++++++++++--------------------- R/mr_mash_rss_updates.R | 147 +++++++--------------------- 4 files changed, 146 insertions(+), 294 deletions(-) diff --git a/R/bayes_reg_mv_rss.R b/R/bayes_reg_mv_rss.R index 7d827d7..dab1eed 100644 --- a/R/bayes_reg_mv_rss.R +++ b/R/bayes_reg_mv_rss.R @@ -1,53 +1,3 @@ -# Bayesian multivariate regression with Normal prior with standardized X -# -# The outputs are: mu1, the posterior mean of the -# regression coefficients; logbf, the log-Bayes factor. -bayes_mvr_ridge_standardized_X_rss <- function (b, S0, S, S1, SplusS0_chol, S_chol) { - - # Compute the log-Bayes factor. - logbf <- (chol2ldet(S_chol) - chol2ldet(SplusS0_chol) + - dot(b,backsolve(S_chol, forwardsolve(t(S_chol), b))) - - dot(b,backsolve(SplusS0_chol, forwardsolve(t(SplusS0_chol), b))))/2 - - # Compute the posterior mean assuming a multivariate - # normal prior with zero mean and covariance S0. - mu1 <- drop(S1%*%backsolve(S_chol, forwardsolve(t(S_chol), b))) - - # Return the posterior mean - # (mu1), and the log-Bayes factor (logbf) - return(list(mu1 = mu1, logbf = logbf)) -} - - -# Bayesian multivariate regression with Normal prior with transformed -# xtilde = x%*%solve(chol(V)) [this calculation is not needed but useful -# to understand the derivation] that allows to precompute some -# quantities -# -# The outputs are: mu1, the posterior mean of the -# regression coefficients; S1, the posterior covariance of the -# regression coefficients; logbf, the log-Bayes factor. -bayes_mvr_ridge_centered_X_rss <- function (V, b, S, S0, xtx, Vinv, V_chol, S_chol, d, QtimesV_chol) { - - # Compute the log-Bayes factor. - SplusS0_chol <- chol(S+S0) - logbf <- (chol2ldet(S_chol) - chol2ldet(SplusS0_chol) + - dot(b,backsolve(S_chol, forwardsolve(t(S_chol), b))) - - dot(b,backsolve(SplusS0_chol, forwardsolve(t(SplusS0_chol), b))))/2 - #logbf <- dmvnorm(b, rep(0, times=length(b)), (S+S0)) - dmvnorm(b, rep(0, times=length(b)), S) - - # Compute the posterior mean assuming a multivariate - # normal prior with zero mean and covariance S0. - dx <- d/(1 + xtx*d) - A <- sqrt(dx)*QtimesV_chol - S1 <- crossprod(A) - mu1 <- drop(crossprod(A, (A %*% (Vinv %*% (xtx*b))))) - # Return the posterior mean and covariance - # (mu1, S1), and the log-Bayes factor (logbf) - return(list(mu1 = mu1, S1=S1, logbf=logbf)) -} - - # Bayesian multivariate regression with mixture-of-normals prior # (mixture weights w0 and covariance matrices S0) with standardized X. # @@ -69,7 +19,7 @@ bayes_mvr_mix_standardized_X_rss <- function (n, xtY, w0, S0, S, S1, SplusS0_cho # Compute the Bayes factors and posterior statistics separately for # each mixture component. bayes_mvr_ridge_lapply <- function(i){ - bayes_mvr_ridge_standardized_X_rss(b, S0[[i]], S, S1[[i]], SplusS0_chol[[i]], S_chol) + bayes_mvr_ridge_standardized_X(b, S0[[i]], S, S1[[i]], SplusS0_chol[[i]], S_chol) } out <- lapply(1:K, bayes_mvr_ridge_lapply) @@ -127,7 +77,7 @@ bayes_mvr_mix_centered_X_rss <- function (xtY, V, w0, S0, xtx, Vinv, V_chol, d, # Compute the Bayes factors and posterior statistics separately for # each mixture component. bayes_mvr_ridge_lapply <- function(i){ - bayes_mvr_ridge_centered_X_rss(V, b, S, S0[[i]], xtx, Vinv, V_chol, S_chol, d[[i]], QtimesV_chol[[i]]) + bayes_mvr_ridge_centered_X(V, b, S, S0[[i]], xtx, Vinv, V_chol, S_chol, d[[i]], QtimesV_chol[[i]]) } out <- lapply(1:K, bayes_mvr_ridge_lapply) diff --git a/R/elbo_rss.R b/R/elbo_rss.R index 6d50f46..8926c26 100644 --- a/R/elbo_rss.R +++ b/R/elbo_rss.R @@ -1,31 +1,23 @@ ###Compute ELBO from intermediate components -compute_ELBO_fun <- function(Rbar, Vinv, ldetV, var_part_tr_wERSS, neg_KL, Y_cov, sum_neg_ent_Y_miss){ - n <- nrow(Rbar) - r <- ncol(Rbar) - # tr_wERSS <- tr(Vinv%*%(crossprod(Rbar))) + var_part_tr_wERSS - tr_wERSS <- sum(Vinv*(crossprod(Rbar))) + var_part_tr_wERSS - if(is.null(Y_cov)){ - e2 <- 0 - } else { - e2 <- sum(Vinv*Y_cov) - } - - ELBO <- -log(n)/2 - (n*r)/2*log(2*pi) - n/2 * ldetV - 0.5*(tr_wERSS+e2) + neg_KL - sum_neg_ent_Y_miss +compute_ELBO_rss_fun <- function(n, YtY, XtY, XtX, mu1_t, Vinv, ldetV, var_part_tr_wERSS, neg_KL){ + r <- ncol(YtY) + + tr_wERSS <- sum(Vinv*RbartRbar) + var_part_tr_wERSS + + ELBO <- -log(n)/2 - (n*r)/2*log(2*pi) - n/2 * ldetV - 0.5*(tr_wERSS) + neg_KL return(ELBO) } ###Compute intermediate components of the ELBO -compute_ELBO_terms <- function(var_part_tr_wERSS, neg_KL, x_j, Rbar_j, bfit, xtx, Vinv){ +compute_ELBO_rss_terms <- function(var_part_tr_wERSS, neg_KL, xtRbar_j, bfit, xtx, Vinv){ mu1_mat <- matrix(bfit$mu1, ncol=1) - # var_part_tr_wERSS <- var_part_tr_wERSS + (tr(Vinv%*%bfit$S1)*xtx) - # neg_KL <- neg_KL + (bfit$logbf +0.5*(-2*tr(tcrossprod(Vinv, Rbar_j)%*%tcrossprod(matrix(x_j, ncol=1), mu1_mat))+ - # tr(Vinv%*%(bfit$S1+tcrossprod(mu1_mat)))*xtx)) - ##Equivalent to the above but more efficient + var_part_tr_wERSS <- var_part_tr_wERSS + (sum(Vinv*bfit$S1)*xtx) - neg_KL <- neg_KL + (bfit$logbf +0.5*(-2*sum(tcrossprod(Vinv, Rbar_j)*t(tcrossprod(matrix(x_j, ncol=1), mu1_mat)))+ - sum(Vinv*(bfit$S1+tcrossprod(mu1_mat)))*xtx)) + Cm <- -mu1_mat%*%xtR_j - tcrossprod(xtR_j, mu1_mat) + tcrossprod(mu1_mat)*xtx + bfit$S1*xtx + + neg_KL <- neg_KL + (bfit$logbf + 0.5*(sum(Vinv*Cm))) return(list(var_part_tr_wERSS=var_part_tr_wERSS, neg_KL=neg_KL)) } diff --git a/R/mr_mash_rss.R b/R/mr_mash_rss.R index bd32532..ef59b1d 100644 --- a/R/mr_mash_rss.R +++ b/R/mr_mash_rss.R @@ -4,9 +4,20 @@ #' @description Performs multivariate multiple regression with #' mixture-of-normals prior. #' -#' @param Y n x r matrix of responses. +#' @param Bhat p x r matrix of regression coefficients from univariate +#' simple linear regression. #' -#' @param X n x p matrix of covariates. +#' @param Shat p x r matrix of standard errors of the regression coefficients +#' from univariate simple linear regression. +#' +#' @param Z p x r matrix of Z-scores from univariate +#' simple linear regression. +#' +#' @param R p x p correlation matrix among the variables. +#' +#' @param covY r x r covariance matrix across responses. +#' +#' @param n scalar indicating the sample size. #' #' @param V r x r residual covariance matrix. #' @@ -162,7 +173,7 @@ #' #' @export #' -mr.mash.rss <- function(X, Y, S0, w0=rep(1/(length(S0)), length(S0)), V=NULL, +mr.mash.rss <- function(Bhat, Shat, Z, R, covY, n, S0, w0=rep(1/(length(S0)), length(S0)), V=NULL, mu1_init=matrix(0, nrow=ncol(X), ncol=ncol(Y)), tol=1e-4, convergence_criterion=c("mu1", "ELBO"), max_iter=5000, update_w0=TRUE, update_w0_method="EM", w0_threshold=0, compute_ELBO=TRUE, standardize=TRUE, verbose=TRUE, @@ -208,12 +219,12 @@ mr.mash.rss <- function(X, Y, S0, w0=rep(1/(length(S0)), length(S0)), V=NULL, } ###Check that the inputs are in the correct format - if(!is.matrix(Y)) - stop("Y must be a matrix.") - if(!is.matrix(X)) - stop("X must be a matrix.") - if(any(is.na(X))) - stop("X must not contain missing values.") + # if(!is.matrix(Y)) + # stop("Y must be a matrix.") + # if(!is.matrix(X)) + # stop("X must be a matrix.") + # if(any(is.na(X))) + # stop("X must not contain missing values.") if(!is.null(V)){ if(!is.matrix(V) || !isSymmetric(V)) stop("V must be a symmetric matrix.") @@ -236,58 +247,39 @@ mr.mash.rss <- function(X, Y, S0, w0=rep(1/(length(S0)), length(S0)), V=NULL, stop("ca_update_order=\"consecutive\" is the only option when Y has missing values.") ###Obtain dimensions needed from inputs - p <- ncol(X) - n <- nrow(X) - r <- ncol(Y) + if(!missing(Bhat)){ + p <- nrow(Bhat) + r <- ncol(Bhat) + } else if(!missing(Z)){ + p <- nrow(Z) + r <- ncol(Z) + } else{ + stop("Z or Bhat should be provided.") + } + K <- length(S0) # PRE-PROCESSING STEPS # -------------------- + + ##Compute Z scores + if(missing(Z)){ + Z <- Bhat/Shat + } + ###Store dimensions names of the inputs - X_colnames <- colnames(X) - Y_colnames <- colnames(Y) - Y_rownames <- rownames(Y) + Z_colnames <- colnames(Z) + Z_rownames <- rownames(Z) ###Add number to diagonal elements of the prior matrices (improves ###numerical stability) S0 <- lapply(S0, makePD, e=e) - ###Check if Y has missing values - Y_has_missing <- any(is.na(Y)) - - ###Throw an error if Y has missing values in the univariate case - if(Y_has_missing && r==1) - stop("Y must not contain missing values in the univariate case.") - - ###Center (and, optionally, scale) X - outX <- scale_fast2(X, scale=standardize) - mux <- outX$means - if (standardize) - sx <- outX$sds - X <- outX$M - rm(outX) - - ###Center Y, if no missing value is present - if(!Y_has_missing){ - outY <- scale_fast2(Y, scale=FALSE) - muy <- outY$means - Y <- outY$M - rm(outY) - } - - ###Extract per-individual Y missingness patterns, if missing values are present - if(Y_has_missing){ - Y_miss_patterns <- extract_missing_Y_pattern(Y) - } else { - Y_miss_patterns <- NULL - } - ###Scale mu1_init, if X is standardized if(standardize) mu1_init <- mu1_init*sx - ###Initilize mu1, S1, w1, delta_mu1, delta_ELBO, delta_conv, ELBO, iterator, progress, - ###missing Ys, and intercept (i.e., muy) + ###Initilize mu1, S1, w1, delta_mu1, delta_ELBO, delta_conv, ELBO, iterator, progress mu1_t <- mu1_init delta_mu1 <- matrix(Inf, nrow=p, ncol=r) delta_ELBO <- Inf @@ -303,21 +295,11 @@ mr.mash.rss <- function(X, Y, S0, w0=rep(1/(length(S0)), length(S0)), V=NULL, progress$ELBO_diff <- as.numeric(NA) progress$ELBO <- as.numeric(NA) } - if(Y_has_missing){ - muy <- colMeans(Y, na.rm=TRUE) - for(l in 1:r){ - Y[is.na(Y[, l]), l] <- muy[l] - } - } - + ###Compute V, if not provided by the user if(is.null(V)){ - if(!Y_has_missing){ - V <- compute_V_init(X, Y, mu1_init, rep(0, r), method="cov") - } else { - V <- compute_V_init(X, Y, mu1_init, muy, method="flash") - } - + # How to do so with sumstats?? + if(update_V_method=="diagonal") V <- diag(diag(V)) } @@ -332,7 +314,7 @@ mr.mash.rss <- function(X, Y, S0, w0=rep(1/(length(S0)), length(S0)), V=NULL, comps$xtx <- xtx } - if(Y_has_missing || compute_ELBO || !standardize) + if(compute_ELBO || !standardize) ###Compute inverse of V (needed for imputing missing Ys, the ELBO and unstandardized X) Vinv <- chol2inv(comps$V_chol) else { @@ -351,16 +333,33 @@ mr.mash.rss <- function(X, Y, S0, w0=rep(1/(length(S0)), length(S0)), V=NULL, ###Set the ordering of the coordinate ascent updates if(ca_update_order=="consecutive"){ update_order <- 1:p - } else if(ca_update_order=="decreasing_logBF"){ - update_order <- compute_rank_variables_BFmix(X, Y, V, Vinv, w0, S0, comps, standardize, version, - decreasing=TRUE, eps, nthreads) - } else if(ca_update_order=="increasing_logBF"){ - update_order <- compute_rank_variables_BFmix(X, Y, V, Vinv, w0, S0, comps, standardize, version, - decreasing=FALSE, eps, nthreads) + } # else if(ca_update_order=="decreasing_logBF"){ + # update_order <- compute_rank_variables_BFmix(X, Y, V, Vinv, w0, S0, comps, standardize, version, + # decreasing=TRUE, eps, nthreads) + # } else if(ca_update_order=="increasing_logBF"){ + # update_order <- compute_rank_variables_BFmix(X, Y, V, Vinv, w0, S0, comps, standardize, version, + # decreasing=FALSE, eps, nthreads) + # } + + ##Compute pve-adjusted Z scores, if n is provided + if(!missing(n)) { + adj <- (n-1)/(Z^2 + n - 2) + Z <- sqrt(adj) * Z } - if(!Y_has_missing){ - Y_cov <- matrix(0, nrow=r, ncol=r) + ##If covariance of Y and standard errors are provided, + ##the effects are on the *original scale*. + if(!missing(covY) & !missing(Shat)){ + XTXdiag <- rowMeans(matrix(diag(covY), nrow=p, ncol=r, byrow=TRUE) * adj/(Shat^2)) + XTX <- t(R * sqrt(XTXdiag)) * sqrt(XTXdiag) + XTX <- (XTX + t(XTX))/2 + XTY <- Z * sqrt(adj) * matrix(diag(covY), nrow=p, ncol=r, byrow=TRUE) / Shat + + } else { + ##The effects are on the *standardized* X, y scale. + XTX <- R*(n-1) + XTY <- Z*sqrt(n-1) + covY <- cov2cor(V) } if(verbose) @@ -396,13 +395,6 @@ mr.mash.rss <- function(X, Y, S0, w0=rep(1/(length(S0)), length(S0)), V=NULL, ##Update iterator t <- t+1 - ##Compute expected Y - if(Y_has_missing){ - mu <- addtocols(X%*%mu1_t, muy) - } else { - mu <- X%*%mu1_t - } - ##Exit loop if maximum number of iterations is reached if(t>max_iter){ warning("Max number of iterations reached. Try increasing max_iter.") @@ -414,7 +406,7 @@ mr.mash.rss <- function(X, Y, S0, w0=rep(1/(length(S0)), length(S0)), V=NULL, if(t > 1){ ##Update V if requested if(update_V){ - V <- update_V_fun(Y, mu, var_part_ERSS, Y_cov) + V <- update_V_rss_fun(n, RbartRbar, var_part_ERSS) if(update_V_method=="diagonal") V <- diag(diag(V)) @@ -455,16 +447,19 @@ mr.mash.rss <- function(X, Y, S0, w0=rep(1/(length(S0)), length(S0)), V=NULL, # E-STEP # ------ + mr_mash_update_general_rss <- function(n, XtX, XtY, mu1, V, Vinv, ldetV, w0, S0, + precomp_quants, compute_ELBO, standardize, + update_V, version, update_order, eps, + nthreads) ###Update variational parameters - ups <- mr_mash_update_general(X=X, Y=Y, mu1_t=mu1_t, mu=mu, V=V, + ups <- mr_mash_update_general_rss(n=n, XtX=XtX, XtY=XtY, mu1_t=mu1_t, mu=mu, V=V, Vinv=Vinv, ldetV=ldetV, w0=w0, S0=S0, precomp_quants=comps, compute_ELBO=compute_ELBO, standardize=standardize, update_V=update_V, version=version, update_order=update_order, eps=eps, - nthreads=nthreads, - Y_miss_patterns=Y_miss_patterns) + nthreads=nthreads) mu1_t <- ups$mu1_t S1_t <- ups$S1_t w1_t <- ups$w1_t @@ -472,12 +467,7 @@ mr.mash.rss <- function(X, Y, S0, w0=rep(1/(length(S0)), length(S0)), V=NULL, ELBO <- ups$ELBO if(update_V) var_part_ERSS <- ups$var_part_ERSS - if(Y_has_missing){ - Y <- ups$Y - muy <- ups$muy - Y_cov <- ups$Y_cov - } - + ##End timing time2 <- proc.time() @@ -519,12 +509,12 @@ mr.mash.rss <- function(X, Y, S0, w0=rep(1/(length(S0)), length(S0)), V=NULL, # POST-PROCESSING STEPS # -------------------- ###Compute the "fitted" values. - fitted_vals <- addtocols(X %*% mu1_t, muy) +# fitted_vals <- addtocols(X %*% mu1_t, muy) ###Compute covariance of fitted values and PVE - cov_fitted <- cov(fitted_vals) - var_fitted <- diag(cov_fitted) - pve <- var_fitted/(var_fitted+diag(V)) +# cov_fitted <- cov(fitted_vals) +# var_fitted <- diag(cov_fitted) +# pve <- var_fitted/(var_fitted+diag(V)) if(standardize){ ###Rescale posterior means and covariance of coefficients. In the @@ -540,29 +530,27 @@ mr.mash.rss <- function(X, Y, S0, w0=rep(1/(length(S0)), length(S0)), V=NULL, ###with respect to the *rescaled* coefficients to recover the ###correct fitted values. This is why this is done after rescaling ###the coefficients above. - intercept <- drop(muy - mux %*% mu1_t) +# intercept <- drop(muy - mux %*% mu1_t) ###Assign names to outputs dimensions S0_names <- names(S0) - rownames(mu1_t) <- X_colnames - colnames(mu1_t) <- Y_colnames - dimnames(S1_t)[[1]] <- Y_colnames - dimnames(S1_t)[[2]] <- Y_colnames - dimnames(S1_t)[[3]] <- X_colnames - S0 <- lapply(S0, function(x){rownames(x) <- colnames(x) <- Y_colnames; return(x)}) - rownames(w1_t) <- X_colnames + rownames(mu1_t) <- Z_rownames + colnames(mu1_t) <- Z_colnames + dimnames(S1_t)[[1]] <- Z_colnames + dimnames(S1_t)[[2]] <- Z_colnames + dimnames(S1_t)[[3]] <- Z_rownames + S0 <- lapply(S0, function(x){rownames(x) <- colnames(x) <- Z_colnames; return(x)}) + rownames(w1_t) <- Z_rownames colnames(w1_t) <- S0_names names(w0) <- S0_names - rownames(V) <- Y_colnames - colnames(V) <- Y_colnames - rownames(Y) <- Y_rownames - colnames(Y) <- Y_colnames - rownames(fitted_vals) <- Y_rownames - colnames(fitted_vals) <- Y_colnames - rownames(cov_fitted) <- Y_colnames - colnames(cov_fitted) <- Y_colnames - names(pve) <- Y_colnames - names(intercept) <- Y_colnames + rownames(V) <- Z_colnames + colnames(V) <- Z_colnames + # rownames(fitted_vals) <- Y_rownames + # colnames(fitted_vals) <- Y_colnames + # rownames(cov_fitted) <- Y_colnames + # colnames(cov_fitted) <- Y_colnames + # names(pve) <- Y_colnames + # names(intercept) <- Y_colnames ###Remove unused rows of progress progress <- progress[rowSums(is.na(progress)) != ncol(progress), ] @@ -576,14 +564,11 @@ mr.mash.rss <- function(X, Y, S0, w0=rep(1/(length(S0)), length(S0)), V=NULL, ###the Evidence Lower Bound (ELBO; if computed) and imputed responses (Y; if ###missing values were present). out <- list(mu1=mu1_t, S1=S1_t, w1=w1_t, V=V, w0=w0, S0=simplify2array_custom(S0), - intercept=intercept, fitted=fitted_vals, G=cov_fitted, pve=pve, progress=progress, + progress=progress, #intercept=intercept, fitted=fitted_vals, G=cov_fitted, pve=pve, converged=converged) if(compute_ELBO) ###Append ELBO to the output out$ELBO <- ELBO - if(Y_has_missing) - ###Append Y to the output - out$Y <- Y class(out) <- c("mr.mash", "list") diff --git a/R/mr_mash_rss_updates.R b/R/mr_mash_rss_updates.R index d4124f6..06f0562 100644 --- a/R/mr_mash_rss_updates.R +++ b/R/mr_mash_rss_updates.R @@ -15,17 +15,21 @@ inner_loop_general_rss_R <- function(n, XtXmu1, mu1, V, Vinv, w0, S0, ###note: V ##Loop through the variables for(j in update_order){ - xtx <- XtX[j , j] + if(standardize){ + xtx <- n-1 + } else { + xtx <- precomp_quants$xtx[j] + } #Remove j-th effect from expected residuals - xTRbar_j <- XtY[j, ] - XtXmu1[j, ] + xtx*mu1[j,] + xtRbar_j <- XtY[j, ] - XtXmu1[j, ] + xtx*mu1[j,] #Run Bayesian SLR if(standardize){ - bfit <- bayes_mvr_mix_standardized_X(n, xTRbar_j, w0, S0, precomp_quants$S, precomp_quants$S1, + bfit <- bayes_mvr_mix_standardized_X(n, xtRbar_j, w0, S0, precomp_quants$S, precomp_quants$S1, precomp_quants$SplusS0_chol, precomp_quants$S_chol, eps) } else { - bfit <- bayes_mvr_mix_centered_X(xTRbar_j, V, w0, S0, precomp_quants$xtx[j], Vinv, + bfit <- bayes_mvr_mix_centered_X(xtRbar_j, V, w0, S0, xtx, Vinv, precomp_quants$V_chol, precomp_quants$d, precomp_quants$QtimesV_chol, eps) } @@ -37,11 +41,6 @@ inner_loop_general_rss_R <- function(n, XtXmu1, mu1, V, Vinv, w0, S0, ###note: V #Compute ELBO params if(compute_ELBO){ - if(standardize){ - xtx <- n-1 - } else { - xtx <- precomp_quants$xtx[j] - } ELBO_parts <- compute_ELBO_terms(var_part_tr_wERSS, neg_KL, X[, j], Rbar_j, bfit, xtx, Vinv) var_part_tr_wERSS <- ELBO_parts$var_part_tr_wERSS neg_KL <- ELBO_parts$neg_KL @@ -49,16 +48,8 @@ inner_loop_general_rss_R <- function(n, XtXmu1, mu1, V, Vinv, w0, S0, ###note: V #Compute V params if(update_V){ - if(standardize){ - xtx <- n-1 - } else { - xtx <- precomp_quants$xtx[j] - } var_part_ERSS <- compute_var_part_ERSS(var_part_ERSS, bfit, xtx) } - - #Update expected residuals - Rbar <- Rbar_j - outer(X[, j], mu1[j, ]) } ###Return output @@ -80,38 +71,38 @@ inner_loop_general_rss_R <- function(n, XtXmu1, mu1, V, Vinv, w0, S0, ###note: V #' @importFrom RcppParallel RcppParallelLibs #' @useDynLib mr.mash.alpha #' -inner_loop_general_Rcpp <- function(X, Rbar, mu1, V, Vinv, w0, S0, precomp_quants, +inner_loop_general_Rcpp <- function(n, XtXmu1, mu1, V, Vinv, w0, S0, precomp_quants, standardize, compute_ELBO, update_V, update_order, eps, nthreads){ - out <- inner_loop_general_rcpp(X, Rbar, mu1, V, Vinv, w0, S0, precomp_quants, + out <- inner_loop_general_rcpp(n, XtXmu1, mu1, V, Vinv, w0, S0, precomp_quants, standardize, compute_ELBO, update_V, update_order, eps, nthreads) ###Return output if(compute_ELBO && update_V){ - return(list(Rbar=out$Rbar, mu1=out$mu1, S1=out$S1, w1=out$w1, var_part_tr_wERSS=out$var_part_tr_wERSS, + return(list(mu1=out$mu1, S1=out$S1, w1=out$w1, var_part_tr_wERSS=out$var_part_tr_wERSS, neg_KL=out$neg_KL, var_part_ERSS=out$var_part_ERSS)) } else if(compute_ELBO && !update_V){ - return(list(Rbar=out$Rbar, mu1=out$mu1, S1=out$S1, w1=out$w1, var_part_tr_wERSS=out$var_part_tr_wERSS, + return(list(mu1=out$mu1, S1=out$S1, w1=out$w1, var_part_tr_wERSS=out$var_part_tr_wERSS, neg_KL=out$neg_KL)) } else if(!compute_ELBO && update_V) { - return(list(Rbar=out$Rbar, mu1=out$mu1, S1=out$S1, w1=out$w1, var_part_ERSS=out$var_part_ERSS)) + return(list(mu1=out$mu1, S1=out$S1, w1=out$w1, var_part_ERSS=out$var_part_ERSS)) } else { - return(list(Rbar=out$Rbar, mu1=out$mu1, S1=out$S1, w1=out$w1)) + return(list(mu1=out$mu1, S1=out$S1, w1=out$w1)) } } ###Wrapper of the inner loop with R or Rcpp -inner_loop_general <- function(X, Rbar, mu1, V, Vinv, w0, S0, precomp_quants, +inner_loop_general_rss <- function(n, XtXmu1, mu1, V, Vinv, w0, S0, precomp_quants, standardize, compute_ELBO, update_V, version, update_order, eps, nthreads){ if(version=="R"){ - out <- inner_loop_general_R(X, Rbar, mu1, V, Vinv, w0, S0, precomp_quants, + out <- inner_loop_general_R(n, XtXmu1, mu1, V, Vinv, w0, S0, precomp_quants, standardize, compute_ELBO, update_V, update_order, eps) } else if(version=="Rcpp"){ update_order <- as.integer(update_order-1) - out <- inner_loop_general_Rcpp(X, Rbar, mu1, V, Vinv, w0, simplify2array_custom(S0), precomp_quants, + out <- inner_loop_general_Rcpp(n, XtXmu1, mu1, V, Vinv, w0, simplify2array_custom(S0), precomp_quants, standardize, compute_ELBO, update_V, update_order, eps, nthreads) } @@ -120,36 +111,18 @@ inner_loop_general <- function(X, Rbar, mu1, V, Vinv, w0, S0, precomp_quants, ###Perform one iteration of the outer loop with or without scaling X -mr_mash_update_general <- function(X, Y, mu1_t, mu, V, Vinv, ldetV, w0, S0, - precomp_quants, compute_ELBO, standardize, - update_V, version, update_order, eps, - nthreads, Y_miss_patterns){ +mr_mash_update_general_rss <- function(n, XtX, XtY, mu1_t, V, Vinv, ldetV, w0, S0, + precomp_quants, compute_ELBO, standardize, + update_V, version, update_order, eps, + nthreads){ - if(!is.null(Y_miss_patterns)){ - ##Impute missing Ys - outY <- impute_missing_Y(Y=Y, mu=mu, Vinv=Vinv, miss=Y_miss_patterns$miss, non_miss=Y_miss_patterns$non_miss, - version=version) - Y <- outY$Y - Y_cov <- outY$Y_cov - sum_neg_ent_Y_miss <- outY$sum_neg_ent_Y_miss - - # Update the intercept - muy <- colMeans(Y) - - ##Compute expected residuals - Rbar <- scale_fast2(Y, scale=FALSE)$M - X%*%mu1_t - - } else { - Y_cov <- NULL - sum_neg_ent_Y_miss <- 0 - - ##Compute expected residuals - Rbar <- Y - mu - } + + ##Compute ?? + XtXmu1 <- XtX%*%mu1_t ##Update variational parameters, expected residuals, and ELBO components - updates <- inner_loop_general(X=X, Rbar=Rbar, mu1=mu1_t, V=V, Vinv=Vinv, w0=w0, S0=S0, + updates <- inner_loop_rss_general(n=n, XtXmu1=XtXmu1, mu1=mu1_t, V=V, Vinv=Vinv, w0=w0, S0=S0, precomp_quants=precomp_quants, standardize=standardize, compute_ELBO=compute_ELBO, update_V=update_V, version=version, update_order=update_order, eps=eps, nthreads=nthreads) @@ -160,30 +133,30 @@ mr_mash_update_general <- function(X, Y, mu1_t, mu, V, Vinv, ldetV, w0, S0, out <- list(mu1_t=mu1_t, S1_t=S1_t, w1_t=w1_t) - if(!is.null(Y_miss_patterns)){ - out$Y <- Y - out$muy <- muy - out$Y_cov <- Y_cov + if(compute_ELBO || update_V){ + RbartRbar <- YtY - crossprod(mu1_t, XtY) - crossprod(XtY, mu1_t) + crossprod(mu1_t, XtX)%*%mu1_t } if(compute_ELBO && update_V){ ##Compute ELBO var_part_tr_wERSS <- updates$var_part_tr_wERSS neg_KL <- updates$neg_KL - out$ELBO <- compute_ELBO_fun(Rbar=Rbar, Vinv=Vinv, ldetV=ldetV, var_part_tr_wERSS=var_part_tr_wERSS, - neg_KL=neg_KL, Y_cov=Y_cov, sum_neg_ent_Y_miss=sum_neg_ent_Y_miss) + out$ELBO <- compute_ELBO_rss_fun(n=n, RbartRbar=RbartRbar, mu1_t=mu1_t, Vinv=Vinv, ldetV=ldetV, + var_part_tr_wERSS=var_part_tr_wERSS, neg_KL=neg_KL) out$var_part_ERSS <- updates$var_part_ERSS + out$RbartRbar } else if(compute_ELBO && !update_V){ ##Compute ELBO var_part_tr_wERSS <- updates$var_part_tr_wERSS neg_KL <- updates$neg_KL - out$ELBO <- compute_ELBO_fun(Rbar=Rbar, Vinv=Vinv, ldetV=ldetV, var_part_tr_wERSS=var_part_tr_wERSS, - neg_KL=neg_KL, Y_cov=Y_cov, sum_neg_ent_Y_miss=sum_neg_ent_Y_miss) + out$ELBO <- compute_ELBO_rss_fun(n=n, RbartRbar=RbartRbar, mu1_t=mu1_t, Vinv=Vinv, ldetV=ldetV, + var_part_tr_wERSS=var_part_tr_wERSS, neg_KL=neg_KL) } else if(!compute_ELBO && update_V){ out$var_part_ERSS <- updates$var_part_ERSS + out$RbartRbar } return(out) @@ -191,11 +164,9 @@ mr_mash_update_general <- function(X, Y, mu1_t, mu, V, Vinv, ldetV, w0, S0, ###Update V -update_V_fun <- function(Y, mu, var_part_ERSS, Y_cov){ - n <- nrow(Y) - - Rbar <- Y - mu - ERSS <- crossprod(Rbar) + var_part_ERSS + Y_cov +update_V_rss_fun <- function(n, RbartRbar, var_part_ERSS){ + + ERSS <- RbartRbar + var_part_ERSS V <- ERSS/n return(V) @@ -208,49 +179,3 @@ update_weights_em <- function(x){ w <- w/sum(w) return(w) } - - -###Impute/update missing Y -impute_missing_Y_R <- function(Y, mu, Vinv, miss, non_miss){ - n <- nrow(Y) - r <- ncol(Y) - - Y_cov <- matrix(0, r, r) - sum_neg_ent_Y_miss <- 0 - - for (i in 1:n){ - non_miss_i <- non_miss[[i]] - miss_i <- miss[[i]] - Vinv_mo <- Vinv[miss_i, non_miss_i, drop=FALSE] - Vinv_mm <- Vinv[miss_i, miss_i, drop=FALSE] - if(any(miss_i)){ - # Compute variance - Y_cov_i <- matrix(0, r, r) - Vinv_mm_chol <- chol(Vinv_mm) - Y_cov_mm <- chol2inv(Vinv_mm_chol) - Y_cov_i[miss_i, miss_i] <- Y_cov_mm - - Y_cov <- Y_cov + Y_cov_i - - # Compute mean - Y[i, miss_i] <- mu[i, miss_i] - Y_cov_mm %*% Vinv_mo %*% (Y[i, non_miss_i] - mu[i, non_miss_i]) - - # Compute sum of the negative entropy of Y missing - #sum_neg_ent_Y_miss <- sum_neg_ent_Y_miss + (0.5 * as.numeric(determinant(1/(2*pi*exp(1))*Vinv_mm, logarithm = TRUE)$modulus)) - sum_neg_ent_Y_miss <- sum_neg_ent_Y_miss + (0.5 * (ncol(Vinv_mm_chol)*log((1/(2*pi*exp(1)))) + chol2ldet(Vinv_mm_chol))) # log(det(kA)) = r*log(k) + log(det(A)) where is the size of the matrix - } - } - - return(list(Y=Y, Y_cov=Y_cov, sum_neg_ent_Y_miss=sum_neg_ent_Y_miss)) -} - -###Wrapper of impute/update missing Y with R or Rcpp -impute_missing_Y <- function(Y, mu, Vinv, miss, non_miss, version){ - if(version=="R"){ - out <- impute_missing_Y_R(Y, mu, Vinv, miss, non_miss) - } else if(version=="Rcpp"){ - out <- impute_missing_Y_rcpp(Y, mu, Vinv, simplify2array_custom(miss), simplify2array_custom(non_miss)) - } - - return(out) -} \ No newline at end of file From c2269cecb3157ed25875d8af375c008ad38f9c03 Mon Sep 17 00:00:00 2001 From: fmorgante Date: Mon, 6 Mar 2023 18:20:27 -0500 Subject: [PATCH 005/103] Small change --- R/misc.R | 3 +-- R/mr_mash.R | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/R/misc.R b/R/misc.R index e7a42e8..eac1f9c 100644 --- a/R/misc.R +++ b/R/misc.R @@ -94,9 +94,8 @@ makePD <- function(S0, e){ } ###Precompute quantities in any case -precompute_quants <- function(X, V, S0, standardize, version){ +precompute_quants <- function(n, V, S0, standardize, version){ if(standardize){ - n <- nrow(X) xtx <- n-1 ###Quantities that don't depend on S0 diff --git a/R/mr_mash.R b/R/mr_mash.R index 520bf30..ef5f946 100644 --- a/R/mr_mash.R +++ b/R/mr_mash.R @@ -325,7 +325,7 @@ mr.mash <- function(X, Y, S0, w0=rep(1/(length(S0)), length(S0)), V=NULL, eps <- .Machine$double.eps ###Precompute quantities - comps <- precompute_quants(X, V, S0, standardize, version) + comps <- precompute_quants(n, V, S0, standardize, version) if(!standardize){ xtx <- colSums(X^2) comps$xtx <- xtx @@ -418,7 +418,7 @@ mr.mash <- function(X, Y, S0, w0=rep(1/(length(S0)), length(S0)), V=NULL, V <- diag(diag(V)) #Recompute precomputed quantities after updating V - comps <- precompute_quants(X, V, S0, standardize, version) + comps <- precompute_quants(n, V, S0, standardize, version) if(!standardize) comps$xtx <- xtx if(compute_ELBO || !standardize) From 054e380213936e64ace17733da74409c687e182e Mon Sep 17 00:00:00 2001 From: fmorgante Date: Mon, 6 Mar 2023 18:22:39 -0500 Subject: [PATCH 006/103] More changes to rss functions --- R/mr_mash_rss.R | 63 +++++++++++++++++++++-------------------- R/mr_mash_rss_updates.R | 63 ++++++++++++++++++++--------------------- 2 files changed, 63 insertions(+), 63 deletions(-) diff --git a/R/mr_mash_rss.R b/R/mr_mash_rss.R index ef59b1d..8026d0d 100644 --- a/R/mr_mash_rss.R +++ b/R/mr_mash_rss.R @@ -174,10 +174,10 @@ #' @export #' mr.mash.rss <- function(Bhat, Shat, Z, R, covY, n, S0, w0=rep(1/(length(S0)), length(S0)), V=NULL, - mu1_init=matrix(0, nrow=ncol(X), ncol=ncol(Y)), tol=1e-4, convergence_criterion=c("mu1", "ELBO"), + mu1_init=matrix(0, nrow=nrow(Z), ncol=ncol(Z)), tol=1e-4, convergence_criterion=c("mu1", "ELBO"), max_iter=5000, update_w0=TRUE, update_w0_method="EM", w0_threshold=0, compute_ELBO=TRUE, standardize=TRUE, verbose=TRUE, - update_V=FALSE, update_V_method=c("full", "diagonal"), version=c("Rcpp", "R"), e=1e-8, + update_V=FALSE, update_V_method=c("full", "diagonal"), version=c("R", "Rcpp"), e=1e-8, ca_update_order=c("consecutive", "decreasing_logBF", "increasing_logBF"), nthreads=as.integer(NA)) { @@ -243,8 +243,8 @@ mr.mash.rss <- function(Bhat, Shat, Z, R, covY, n, S0, w0=rep(1/(length(S0)), le stop("mu1_init must be a matrix.") if(convergence_criterion=="ELBO" && !compute_ELBO) stop("ELBO needs to be computed with convergence_criterion=\"ELBO\".") - if(ca_update_order!="consecutive" && any(is.na(Y))) - stop("ca_update_order=\"consecutive\" is the only option when Y has missing values.") + if(ca_update_order!="consecutive") + stop("ca_update_order=\"consecutive\" is the only option with summary data for now.") ###Obtain dimensions needed from inputs if(!missing(Bhat)){ @@ -306,16 +306,36 @@ mr.mash.rss <- function(Bhat, Shat, Z, R, covY, n, S0, w0=rep(1/(length(S0)), le ###Set eps eps <- .Machine$double.eps + + ##Compute pve-adjusted Z scores, if n is provided + if(!missing(n)) { + adj <- (n-1)/(Z^2 + n - 2) + Z <- sqrt(adj) * Z + } + + ##If covariance of Y and standard errors are provided, + ##the effects are on the *original scale*. + if(!missing(covY) & !missing(Shat)){ + XtXdiag <- rowMeans(matrix(diag(covY), nrow=p, ncol=r, byrow=TRUE) * adj/(Shat^2)) + XtX <- t(R * sqrt(XtXdiag)) * sqrt(XtXdiag) + XtX <- (XtX + t(XtX))/2 + XtY <- Z * sqrt(adj) * matrix(diag(covY), nrow=p, ncol=r, byrow=TRUE) / Shat + + } else { + ##The effects are on the *standardized* X, y scale. + XtX <- R*(n-1) + XtY <- Z*sqrt(n-1) + covY <- cov2cor(V) + } ###Precompute quantities - comps <- precompute_quants(X, V, S0, standardize, version) + comps <- precompute_quants(n, V, S0, standardize, version) if(!standardize){ - xtx <- colSums(X^2) - comps$xtx <- xtx + comps$xtx <- diag(XtX) } if(compute_ELBO || !standardize) - ###Compute inverse of V (needed for imputing missing Ys, the ELBO and unstandardized X) + ###Compute inverse of V (needed for the ELBO and unstandardized X) Vinv <- chol2inv(comps$V_chol) else { if(version=="R") @@ -341,27 +361,6 @@ mr.mash.rss <- function(Bhat, Shat, Z, R, covY, n, S0, w0=rep(1/(length(S0)), le # decreasing=FALSE, eps, nthreads) # } - ##Compute pve-adjusted Z scores, if n is provided - if(!missing(n)) { - adj <- (n-1)/(Z^2 + n - 2) - Z <- sqrt(adj) * Z - } - - ##If covariance of Y and standard errors are provided, - ##the effects are on the *original scale*. - if(!missing(covY) & !missing(Shat)){ - XTXdiag <- rowMeans(matrix(diag(covY), nrow=p, ncol=r, byrow=TRUE) * adj/(Shat^2)) - XTX <- t(R * sqrt(XTXdiag)) * sqrt(XTXdiag) - XTX <- (XTX + t(XTX))/2 - XTY <- Z * sqrt(adj) * matrix(diag(covY), nrow=p, ncol=r, byrow=TRUE) / Shat - - } else { - ##The effects are on the *standardized* X, y scale. - XTX <- R*(n-1) - XTY <- Z*sqrt(n-1) - covY <- cov2cor(V) - } - if(verbose) cat("Done!\n") @@ -411,9 +410,9 @@ mr.mash.rss <- function(Bhat, Shat, Z, R, covY, n, S0, w0=rep(1/(length(S0)), le V <- diag(diag(V)) #Recompute precomputed quantities after updating V - comps <- precompute_quants(X, V, S0, standardize, version) + comps <- precompute_quants(n, V, S0, standardize, version) if(!standardize) - comps$xtx <- xtx + comps$xtx <- diag(XtX) if(compute_ELBO || !standardize) Vinv <- chol2inv(comps$V_chol) if(compute_ELBO) @@ -467,6 +466,8 @@ mr.mash.rss <- function(Bhat, Shat, Z, R, covY, n, S0, w0=rep(1/(length(S0)), le ELBO <- ups$ELBO if(update_V) var_part_ERSS <- ups$var_part_ERSS + if(compute_ELBO || update_V) + RbartRbar <- ups$RbartRbar ##End timing time2 <- proc.time() diff --git a/R/mr_mash_rss_updates.R b/R/mr_mash_rss_updates.R index 06f0562..6256ff8 100644 --- a/R/mr_mash_rss_updates.R +++ b/R/mr_mash_rss_updates.R @@ -67,31 +67,31 @@ inner_loop_general_rss_R <- function(n, XtXmu1, mu1, V, Vinv, w0, S0, ###note: V ### Wrapper for the Rcpp function to update variational parameters, ### expected residuals, and ELBO components with or without scaling X. # -#' @importFrom Rcpp evalCpp -#' @importFrom RcppParallel RcppParallelLibs -#' @useDynLib mr.mash.alpha -#' -inner_loop_general_Rcpp <- function(n, XtXmu1, mu1, V, Vinv, w0, S0, precomp_quants, - standardize, compute_ELBO, update_V, update_order, - eps, nthreads){ - - out <- inner_loop_general_rcpp(n, XtXmu1, mu1, V, Vinv, w0, S0, precomp_quants, - standardize, compute_ELBO, update_V, update_order, - eps, nthreads) - - ###Return output - if(compute_ELBO && update_V){ - return(list(mu1=out$mu1, S1=out$S1, w1=out$w1, var_part_tr_wERSS=out$var_part_tr_wERSS, - neg_KL=out$neg_KL, var_part_ERSS=out$var_part_ERSS)) - } else if(compute_ELBO && !update_V){ - return(list(mu1=out$mu1, S1=out$S1, w1=out$w1, var_part_tr_wERSS=out$var_part_tr_wERSS, - neg_KL=out$neg_KL)) - } else if(!compute_ELBO && update_V) { - return(list(mu1=out$mu1, S1=out$S1, w1=out$w1, var_part_ERSS=out$var_part_ERSS)) - } else { - return(list(mu1=out$mu1, S1=out$S1, w1=out$w1)) - } -} +# #' @importFrom Rcpp evalCpp +# #' @importFrom RcppParallel RcppParallelLibs +# #' @useDynLib mr.mash.alpha +# #' +# inner_loop_general_Rcpp <- function(n, XtXmu1, mu1, V, Vinv, w0, S0, precomp_quants, +# standardize, compute_ELBO, update_V, update_order, +# eps, nthreads){ +# +# out <- inner_loop_general_rcpp(n, XtXmu1, mu1, V, Vinv, w0, S0, precomp_quants, +# standardize, compute_ELBO, update_V, update_order, +# eps, nthreads) +# +# ###Return output +# if(compute_ELBO && update_V){ +# return(list(mu1=out$mu1, S1=out$S1, w1=out$w1, var_part_tr_wERSS=out$var_part_tr_wERSS, +# neg_KL=out$neg_KL, var_part_ERSS=out$var_part_ERSS)) +# } else if(compute_ELBO && !update_V){ +# return(list(mu1=out$mu1, S1=out$S1, w1=out$w1, var_part_tr_wERSS=out$var_part_tr_wERSS, +# neg_KL=out$neg_KL)) +# } else if(!compute_ELBO && update_V) { +# return(list(mu1=out$mu1, S1=out$S1, w1=out$w1, var_part_ERSS=out$var_part_ERSS)) +# } else { +# return(list(mu1=out$mu1, S1=out$S1, w1=out$w1)) +# } +# } ###Wrapper of the inner loop with R or Rcpp inner_loop_general_rss <- function(n, XtXmu1, mu1, V, Vinv, w0, S0, precomp_quants, @@ -100,12 +100,12 @@ inner_loop_general_rss <- function(n, XtXmu1, mu1, V, Vinv, w0, S0, precomp_quan if(version=="R"){ out <- inner_loop_general_R(n, XtXmu1, mu1, V, Vinv, w0, S0, precomp_quants, standardize, compute_ELBO, update_V, update_order, eps) - } else if(version=="Rcpp"){ - update_order <- as.integer(update_order-1) - out <- inner_loop_general_Rcpp(n, XtXmu1, mu1, V, Vinv, w0, simplify2array_custom(S0), precomp_quants, - standardize, compute_ELBO, update_V, update_order, eps, nthreads) - } - + } # else if(version=="Rcpp"){ + # update_order <- as.integer(update_order-1) + # out <- inner_loop_general_Rcpp(n, XtXmu1, mu1, V, Vinv, w0, simplify2array_custom(S0), precomp_quants, + # standardize, compute_ELBO, update_V, update_order, eps, nthreads) + # } + # return(out) } @@ -129,7 +129,6 @@ mr_mash_update_general_rss <- function(n, XtX, XtY, mu1_t, V, Vinv, ldetV, w0, S mu1_t <- updates$mu1 S1_t <- updates$S1 w1_t <- updates$w1 - Rbar <- updates$Rbar out <- list(mu1_t=mu1_t, S1_t=S1_t, w1_t=w1_t) From 6b30309c9b2dddf3c5c09a5060bffc94ac17cba5 Mon Sep 17 00:00:00 2001 From: fmorgante Date: Mon, 6 Mar 2023 18:24:33 -0500 Subject: [PATCH 007/103] Bump up version --- DESCRIPTION | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 1f6fcfa..6e92508 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,14 +1,15 @@ Encoding: UTF-8 Type: Package Package: mr.mash.alpha -Version: 0.2-25 -Date: 2022-12-21 +Version: 0.2-26 +Date: 2023-03-06 Title: Multiple Regression with Multivariate Adaptive Shrinkage Description: Provides an implementation of methods for multivariate multiple regression with adaptive shrinkage priors. URL: https://github.com/stephenslab/mr.mash.alpha Authors@R: c(person("Fabio","Morgante",role=c("cre","aut"), email="fabiom@clemson.edu"), + person("Deborah","Kunkel",role="aut"), person("Peter","Carbonetto",role="aut"), person("Matthew","Stephens",role="aut")) License: MIT + file LICENSE From 0aeac28b531479725c803a0cd2bf6f9499c7cdfe Mon Sep 17 00:00:00 2001 From: fmorgante Date: Mon, 6 Mar 2023 18:31:13 -0500 Subject: [PATCH 008/103] Fix inputs bug --- R/elbo_rss.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/elbo_rss.R b/R/elbo_rss.R index 8926c26..fcd8923 100644 --- a/R/elbo_rss.R +++ b/R/elbo_rss.R @@ -1,6 +1,6 @@ ###Compute ELBO from intermediate components -compute_ELBO_rss_fun <- function(n, YtY, XtY, XtX, mu1_t, Vinv, ldetV, var_part_tr_wERSS, neg_KL){ - r <- ncol(YtY) +compute_ELBO_rss_fun <- function(n, RbartRbar, Vinv, ldetV, var_part_tr_wERSS, neg_KL){ + r <- ncol(RbartRbar) tr_wERSS <- sum(Vinv*RbartRbar) + var_part_tr_wERSS From 1c4134f7b6f52ed357e5aa699918082382dcda1e Mon Sep 17 00:00:00 2001 From: fmorgante Date: Mon, 6 Mar 2023 18:31:53 -0500 Subject: [PATCH 009/103] Run devtools::document() --- NAMESPACE | 1 + man/mr.mash.rss.Rd | 205 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 206 insertions(+) create mode 100644 man/mr.mash.rss.Rd diff --git a/NAMESPACE b/NAMESPACE index d4ad215..a909fd9 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -9,6 +9,7 @@ export(compute_data_driven_covs) export(compute_univariate_sumstats) export(expand_covs) export(mr.mash) +export(mr.mash.rss) export(predict.mr.mash) export(simulate_mr_mash_data) importFrom(MBSP,matrix_normal) diff --git a/man/mr.mash.rss.Rd b/man/mr.mash.rss.Rd new file mode 100644 index 0000000..6787c13 --- /dev/null +++ b/man/mr.mash.rss.Rd @@ -0,0 +1,205 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mr_mash_rss.R +\name{mr.mash.rss} +\alias{mr.mash.rss} +\title{Multiple Regression with Multivariate Adaptive Shrinkage + from summary data.} +\usage{ +mr.mash.rss( + Bhat, + Shat, + Z, + R, + covY, + n, + S0, + w0 = rep(1/(length(S0)), length(S0)), + V = NULL, + mu1_init = matrix(0, nrow = nrow(Z), ncol = ncol(Z)), + tol = 1e-04, + convergence_criterion = c("mu1", "ELBO"), + max_iter = 5000, + update_w0 = TRUE, + update_w0_method = "EM", + w0_threshold = 0, + compute_ELBO = TRUE, + standardize = TRUE, + verbose = TRUE, + update_V = FALSE, + update_V_method = c("full", "diagonal"), + version = c("R", "Rcpp"), + e = 1e-08, + ca_update_order = c("consecutive", "decreasing_logBF", "increasing_logBF"), + nthreads = as.integer(NA) +) +} +\arguments{ +\item{Bhat}{p x r matrix of regression coefficients from univariate +simple linear regression.} + +\item{Shat}{p x r matrix of standard errors of the regression coefficients +from univariate simple linear regression.} + +\item{Z}{p x r matrix of Z-scores from univariate +simple linear regression.} + +\item{R}{p x p correlation matrix among the variables.} + +\item{covY}{r x r covariance matrix across responses.} + +\item{n}{scalar indicating the sample size.} + +\item{S0}{List of length K containing the desired r x r prior +covariance matrices on the regression coefficients.} + +\item{w0}{K-vector with prior mixture weights, each associated with +the respective covariance matrix in \code{S0}.} + +\item{V}{r x r residual covariance matrix.} + +\item{mu1_init}{p x r matrix of initial estimates of the posterior +mean regression coefficients. These should be on the same scale as +the X provided. If \code{standardize=TRUE}, mu1_init will be scaled +appropriately after standardizing X.} + +\item{tol}{Convergence tolerance.} + +\item{convergence_criterion}{Criterion to use for convergence check.} + +\item{max_iter}{Maximum number of iterations for the optimization +algorithm.} + +\item{update_w0}{If \code{TRUE}, prior weights are updated.} + +\item{update_w0_method}{Method to update prior weights. Only EM is +currently supported.} + +\item{w0_threshold}{Drop mixture components with weight less than this value. +Components are dropped at each iteration after 15 initial iterations. +This is done to prevent from dropping some poetentially important +components prematurely.} + +\item{compute_ELBO}{If \code{TRUE}, ELBO is computed.} + +\item{standardize}{If \code{TRUE}, X is "standardized" using the +sample means and sample standard deviations. Standardizing X +allows a faster implementation, but the prior has a different +interpretation. Coefficients and covariances are returned on the +original scale.} + +\item{verbose}{If \code{TRUE}, some information about the +algorithm's process is printed at each iteration.} + +\item{update_V}{if \code{TRUE}, residual covariance is updated.} + +\item{update_V_method}{Method to update residual covariance. So far, +"full" and "diagonal" are supported. If \code{update_V=TRUE} and V +is not provided by the user, this option will determine how V is +computed (and fixed) internally from \code{mu1_init}.} + +\item{version}{Whether to use R or C++ code to perform the +coordinate ascent updates.} + +\item{e}{A small number to add to the diagonal elements of the +prior matrices to improve numerical stability of the updates.} + +\item{ca_update_order}{The order with which coordinates are +updated. So far, "consecutive", "decreasing_logBF", +"increasing_logBF" are supported.} + +\item{nthreads}{Number of RcppParallel threads to use for the +updates. When \code{nthreads} is \code{NA}, the default number of +threads is used; see +\code{\link[RcppParallel]{defaultNumThreads}}. This setting is +ignored when \code{version = "R"}.} +} +\value{ +A mr.mash fit, stored as a list with some or all of the +following elements: + +\item{mu1}{p x r matrix of posterior means for the regression + coeffcients.} + +\item{S1}{r x r x p array of posterior covariances for the + regression coeffcients.} + +\item{w1}{p x K matrix of posterior assignment probabilities to the + mixture components.} + +\item{V}{r x r residual covariance matrix} + +\item{w0}{K-vector with (updated, if \code{update_w0=TRUE}) prior mixture weights, each associated with + the respective covariance matrix in \code{S0}}. + +\item{S0}{r x r x K array of prior covariance matrices + on the regression coefficients}. + +\item{intercept}{r-vector containing posterior mean estimate of the + intercept.} + +\item{fitted}{n x r matrix of fitted values.} + +\item{G}{r x r covariance matrix of fitted values.} + +\item{pve}{r-vector of proportion of variance explained by the covariates.} + +\item{ELBO}{Evidence Lower Bound (ELBO) at last iteration.} + +\item{progress}{A data frame including information regarding + convergence criteria at each iteration.} + +\item{converged}{\code{TRUE} or \code{FALSE}, indicating whether + the optimization algorithm converged to a solution within the chosen tolerance + level.} + +\item{Y}{n x r matrix of responses at last iteration (only relevant when missing values + are present in the input Y).} +} +\description{ +Performs multivariate multiple regression with + mixture-of-normals prior. +} +\examples{ +###Set seed +set.seed(123) + +###Simulate X and Y +##Set parameters +n <- 1000 +p <- 100 +p_causal <- 20 +r <- 5 + +###Simulate data +out <- simulate_mr_mash_data(n, p, p_causal, r, pve=0.5, B_cor=1, + B_scale=1, X_cor=0, X_scale=1, V_cor=0) + +###Split the data in training and test sets +Ytrain <- out$Y[-c(1:200), ] +Xtrain <- out$X[-c(1:200), ] +Ytest <- out$Y[c(1:200), ] +Xtest <- out$X[c(1:200), ] + +###Specify the covariance matrices for the mixture-of-normals prior. +univ_sumstats <- compute_univariate_sumstats(Xtrain, Ytrain, + standardize=TRUE, standardize.response=FALSE) +grid <- autoselect.mixsd(univ_sumstats, mult=sqrt(2))^2 +S0 <- compute_canonical_covs(ncol(Ytrain), singletons=TRUE, + hetgrid=c(0, 0.25, 0.5, 0.75, 1)) +S0 <- expand_covs(S0, grid, zeromat=TRUE) + +###Fit mr.mash +fit <- mr.mash.rss(Xtrain, Ytrain, S0, update_V=TRUE) + +# Compare the "fitted" values of Y against the true Y in the training set. +plot(fit$fitted,Ytrain,pch = 20,col = "darkblue",xlab = "true", + ylab = "fitted") +abline(a = 0,b = 1,col = "magenta",lty = "dotted") + +# Predict the multivariate outcomes in the test set using the fitted model. +Ytest_est <- predict(fit,Xtest) +plot(Ytest_est,Ytest,pch = 20,col = "darkblue",xlab = "true", + ylab = "predicted") +abline(a = 0,b = 1,col = "magenta",lty = "dotted") + +} From affbea6a6951fd6460e726c5b5b41bb7bfdb4f81 Mon Sep 17 00:00:00 2001 From: fmorgante Date: Mon, 6 Mar 2023 18:32:20 -0500 Subject: [PATCH 010/103] Bump up version and add author --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 6e92508..ed12c9e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -36,4 +36,4 @@ Remotes: stephenslab/ebnm, stephenslab/mashr, willwerscheid/flashier -RoxygenNote: 7.2.2 +RoxygenNote: 7.2.3 From 859ae7e67b3c092c98c156b0e9a7955848a35fcf Mon Sep 17 00:00:00 2001 From: fmorgante Date: Mon, 6 Mar 2023 18:34:26 -0500 Subject: [PATCH 011/103] Fix bug --- R/mr_mash_rss_updates.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/mr_mash_rss_updates.R b/R/mr_mash_rss_updates.R index 6256ff8..f134629 100644 --- a/R/mr_mash_rss_updates.R +++ b/R/mr_mash_rss_updates.R @@ -140,7 +140,7 @@ mr_mash_update_general_rss <- function(n, XtX, XtY, mu1_t, V, Vinv, ldetV, w0, S ##Compute ELBO var_part_tr_wERSS <- updates$var_part_tr_wERSS neg_KL <- updates$neg_KL - out$ELBO <- compute_ELBO_rss_fun(n=n, RbartRbar=RbartRbar, mu1_t=mu1_t, Vinv=Vinv, ldetV=ldetV, + out$ELBO <- compute_ELBO_rss_fun(n=n, RbartRbar=RbartRbar, Vinv=Vinv, ldetV=ldetV, var_part_tr_wERSS=var_part_tr_wERSS, neg_KL=neg_KL) out$var_part_ERSS <- updates$var_part_ERSS @@ -150,7 +150,7 @@ mr_mash_update_general_rss <- function(n, XtX, XtY, mu1_t, V, Vinv, ldetV, w0, S ##Compute ELBO var_part_tr_wERSS <- updates$var_part_tr_wERSS neg_KL <- updates$neg_KL - out$ELBO <- compute_ELBO_rss_fun(n=n, RbartRbar=RbartRbar, mu1_t=mu1_t, Vinv=Vinv, ldetV=ldetV, + out$ELBO <- compute_ELBO_rss_fun(n=n, RbartRbar=RbartRbar, Vinv=Vinv, ldetV=ldetV, var_part_tr_wERSS=var_part_tr_wERSS, neg_KL=neg_KL) } else if(!compute_ELBO && update_V){ From cef442bedfbd79ae9602152c90e8f2e07d9556ce Mon Sep 17 00:00:00 2001 From: fmorgante Date: Mon, 6 Mar 2023 18:46:43 -0500 Subject: [PATCH 012/103] Fiz issue with order of arguments --- R/mr_mash_rss.R | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/R/mr_mash_rss.R b/R/mr_mash_rss.R index 8026d0d..57b6e94 100644 --- a/R/mr_mash_rss.R +++ b/R/mr_mash_rss.R @@ -174,7 +174,7 @@ #' @export #' mr.mash.rss <- function(Bhat, Shat, Z, R, covY, n, S0, w0=rep(1/(length(S0)), length(S0)), V=NULL, - mu1_init=matrix(0, nrow=nrow(Z), ncol=ncol(Z)), tol=1e-4, convergence_criterion=c("mu1", "ELBO"), + mu1_init, tol=1e-4, convergence_criterion=c("mu1", "ELBO"), max_iter=5000, update_w0=TRUE, update_w0_method="EM", w0_threshold=0, compute_ELBO=TRUE, standardize=TRUE, verbose=TRUE, update_V=FALSE, update_V_method=c("full", "diagonal"), version=c("R", "Rcpp"), e=1e-8, @@ -239,7 +239,7 @@ mr.mash.rss <- function(Bhat, Shat, Z, R, covY, n, S0, w0=rep(1/(length(S0)), le stop("Elements of w0 must sum to 1.") if(length(S0)!=length(w0)) stop("S0 and w0 must have the same length.") - if(!is.matrix(mu1_init)) + if(!missing(mu1_init) && !is.matrix(mu1_init)) stop("mu1_init must be a matrix.") if(convergence_criterion=="ELBO" && !compute_ELBO) stop("ELBO needs to be computed with convergence_criterion=\"ELBO\".") @@ -275,6 +275,11 @@ mr.mash.rss <- function(Bhat, Shat, Z, R, covY, n, S0, w0=rep(1/(length(S0)), le ###numerical stability) S0 <- lapply(S0, makePD, e=e) + ##Initialize regression coefficients to 0 if not provided + if(missing(mu1_init)){ + mu1_init <- matrix(0, nrow=p, ncol=r) + } + ###Scale mu1_init, if X is standardized if(standardize) mu1_init <- mu1_init*sx From 9611a25a3477e7e932e2884433d1c158e8d00ffc Mon Sep 17 00:00:00 2001 From: fmorgante Date: Mon, 6 Mar 2023 18:50:48 -0500 Subject: [PATCH 013/103] Remove junk code --- R/mr_mash_rss.R | 4 ---- 1 file changed, 4 deletions(-) diff --git a/R/mr_mash_rss.R b/R/mr_mash_rss.R index 57b6e94..096daf9 100644 --- a/R/mr_mash_rss.R +++ b/R/mr_mash_rss.R @@ -451,10 +451,6 @@ mr.mash.rss <- function(Bhat, Shat, Z, R, covY, n, S0, w0=rep(1/(length(S0)), le # E-STEP # ------ - mr_mash_update_general_rss <- function(n, XtX, XtY, mu1, V, Vinv, ldetV, w0, S0, - precomp_quants, compute_ELBO, standardize, - update_V, version, update_order, eps, - nthreads) ###Update variational parameters ups <- mr_mash_update_general_rss(n=n, XtX=XtX, XtY=XtY, mu1_t=mu1_t, mu=mu, V=V, Vinv=Vinv, ldetV=ldetV, w0=w0, S0=S0, From de3ce2459ec9b7f3cda71bac24313a98b1b4016c Mon Sep 17 00:00:00 2001 From: fmorgante Date: Mon, 6 Mar 2023 18:52:31 -0500 Subject: [PATCH 014/103] Remove junk code 2 --- R/mr_mash_rss.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/mr_mash_rss.R b/R/mr_mash_rss.R index 096daf9..fe613b5 100644 --- a/R/mr_mash_rss.R +++ b/R/mr_mash_rss.R @@ -452,7 +452,7 @@ mr.mash.rss <- function(Bhat, Shat, Z, R, covY, n, S0, w0=rep(1/(length(S0)), le # E-STEP # ------ ###Update variational parameters - ups <- mr_mash_update_general_rss(n=n, XtX=XtX, XtY=XtY, mu1_t=mu1_t, mu=mu, V=V, + ups <- mr_mash_update_general_rss(n=n, XtX=XtX, XtY=XtY, mu1_t=mu1_t, V=V, Vinv=Vinv, ldetV=ldetV, w0=w0, S0=S0, precomp_quants=comps, compute_ELBO=compute_ELBO, From 5028f1e032e307f1e465ce28a9a04c95dd554be6 Mon Sep 17 00:00:00 2001 From: fmorgante Date: Mon, 6 Mar 2023 18:54:15 -0500 Subject: [PATCH 015/103] Fix naming issue --- R/mr_mash_rss_updates.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/mr_mash_rss_updates.R b/R/mr_mash_rss_updates.R index f134629..7f05379 100644 --- a/R/mr_mash_rss_updates.R +++ b/R/mr_mash_rss_updates.R @@ -122,7 +122,7 @@ mr_mash_update_general_rss <- function(n, XtX, XtY, mu1_t, V, Vinv, ldetV, w0, S XtXmu1 <- XtX%*%mu1_t ##Update variational parameters, expected residuals, and ELBO components - updates <- inner_loop_rss_general(n=n, XtXmu1=XtXmu1, mu1=mu1_t, V=V, Vinv=Vinv, w0=w0, S0=S0, + updates <- inner_loop_general_rss(n=n, XtXmu1=XtXmu1, mu1=mu1_t, V=V, Vinv=Vinv, w0=w0, S0=S0, precomp_quants=precomp_quants, standardize=standardize, compute_ELBO=compute_ELBO, update_V=update_V, version=version, update_order=update_order, eps=eps, nthreads=nthreads) From 773ba284ccd29c22d5e51b2fac2151670734b7f3 Mon Sep 17 00:00:00 2001 From: fmorgante Date: Mon, 6 Mar 2023 18:57:38 -0500 Subject: [PATCH 016/103] Fix bug --- R/mr_mash_rss_updates.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/mr_mash_rss_updates.R b/R/mr_mash_rss_updates.R index 7f05379..a838367 100644 --- a/R/mr_mash_rss_updates.R +++ b/R/mr_mash_rss_updates.R @@ -4,7 +4,7 @@ inner_loop_general_rss_R <- function(n, XtXmu1, mu1, V, Vinv, w0, S0, ###note: V update_order, eps){ ###Create variables to store quantities r <- ncol(XtXmu1) - p <- ncol(XtX) + p <- nrow(mu1) K <- length(S0) S1 <- array(0, c(r, r, p)) w1 <- matrix(0, nrow=p, ncol=K) From 8a0f740db570af5b948e76adae0aca0b9d672d9c Mon Sep 17 00:00:00 2001 From: fmorgante Date: Mon, 6 Mar 2023 19:04:26 -0500 Subject: [PATCH 017/103] Fix bug --- R/mr_mash_rss_updates.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/mr_mash_rss_updates.R b/R/mr_mash_rss_updates.R index a838367..ad1b2de 100644 --- a/R/mr_mash_rss_updates.R +++ b/R/mr_mash_rss_updates.R @@ -98,7 +98,7 @@ inner_loop_general_rss <- function(n, XtXmu1, mu1, V, Vinv, w0, S0, precomp_quan standardize, compute_ELBO, update_V, version, update_order, eps, nthreads){ if(version=="R"){ - out <- inner_loop_general_R(n, XtXmu1, mu1, V, Vinv, w0, S0, precomp_quants, + out <- inner_loop_general_rss_R(n, XtXmu1, mu1, V, Vinv, w0, S0, precomp_quants, standardize, compute_ELBO, update_V, update_order, eps) } # else if(version=="Rcpp"){ # update_order <- as.integer(update_order-1) From 737e9bf9363dad726c96add607fa1ab684ce988d Mon Sep 17 00:00:00 2001 From: fmorgante Date: Mon, 6 Mar 2023 19:09:37 -0500 Subject: [PATCH 018/103] Fix another bug --- R/mr_mash_rss_updates.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/R/mr_mash_rss_updates.R b/R/mr_mash_rss_updates.R index ad1b2de..031d1fe 100644 --- a/R/mr_mash_rss_updates.R +++ b/R/mr_mash_rss_updates.R @@ -41,7 +41,7 @@ inner_loop_general_rss_R <- function(n, XtXmu1, mu1, V, Vinv, w0, S0, ###note: V #Compute ELBO params if(compute_ELBO){ - ELBO_parts <- compute_ELBO_terms(var_part_tr_wERSS, neg_KL, X[, j], Rbar_j, bfit, xtx, Vinv) + ELBO_parts <- compute_ELBO_rss_terms(var_part_tr_wERSS, neg_KL, xtRbar_j, bfit, xtx, Vinv) var_part_tr_wERSS <- ELBO_parts$var_part_tr_wERSS neg_KL <- ELBO_parts$neg_KL } @@ -54,13 +54,13 @@ inner_loop_general_rss_R <- function(n, XtXmu1, mu1, V, Vinv, w0, S0, ###note: V ###Return output if(compute_ELBO && update_V){ - return(list(Rbar=Rbar, mu1=mu1, S1=S1, w1=w1, var_part_tr_wERSS=var_part_tr_wERSS, neg_KL=neg_KL, var_part_ERSS=var_part_ERSS)) + return(list(mu1=mu1, S1=S1, w1=w1, var_part_tr_wERSS=var_part_tr_wERSS, neg_KL=neg_KL, var_part_ERSS=var_part_ERSS)) } else if(compute_ELBO && !update_V){ - return(list(Rbar=Rbar, mu1=mu1, S1=S1, w1=w1, var_part_tr_wERSS=var_part_tr_wERSS, neg_KL=neg_KL)) + return(list(mu1=mu1, S1=S1, w1=w1, var_part_tr_wERSS=var_part_tr_wERSS, neg_KL=neg_KL)) } else if(!compute_ELBO && update_V) { - return(list(Rbar=Rbar, mu1=mu1, S1=S1, w1=w1, var_part_ERSS=var_part_ERSS)) + return(list(mu1=mu1, S1=S1, w1=w1, var_part_ERSS=var_part_ERSS)) } else { - return(list(Rbar=Rbar, mu1=mu1, S1=S1, w1=w1)) + return(list(mu1=mu1, S1=S1, w1=w1)) } } From 518be0da4032c6086a28c8876dd4fec5cc7d2e9b Mon Sep 17 00:00:00 2001 From: fmorgante Date: Mon, 6 Mar 2023 19:11:59 -0500 Subject: [PATCH 019/103] Fix yet another bug --- R/mr_mash_rss_updates.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/mr_mash_rss_updates.R b/R/mr_mash_rss_updates.R index 031d1fe..947d22b 100644 --- a/R/mr_mash_rss_updates.R +++ b/R/mr_mash_rss_updates.R @@ -1,5 +1,5 @@ ###Update variational parameters, expected residuals, and ELBO components with or without scaling X -inner_loop_general_rss_R <- function(n, XtXmu1, mu1, V, Vinv, w0, S0, ###note: V is only needed when not scaling X +inner_loop_general_rss_R <- function(n, XtY, XtXmu1, mu1, V, Vinv, w0, S0, ###note: V is only needed when not scaling X precomp_quants, standardize, compute_ELBO, update_V, update_order, eps){ ###Create variables to store quantities @@ -94,11 +94,11 @@ inner_loop_general_rss_R <- function(n, XtXmu1, mu1, V, Vinv, w0, S0, ###note: V # } ###Wrapper of the inner loop with R or Rcpp -inner_loop_general_rss <- function(n, XtXmu1, mu1, V, Vinv, w0, S0, precomp_quants, +inner_loop_general_rss <- function(n, XtY, XtXmu1, mu1, V, Vinv, w0, S0, precomp_quants, standardize, compute_ELBO, update_V, version, update_order, eps, nthreads){ if(version=="R"){ - out <- inner_loop_general_rss_R(n, XtXmu1, mu1, V, Vinv, w0, S0, precomp_quants, + out <- inner_loop_general_rss_R(n, XtY, XtXmu1, mu1, V, Vinv, w0, S0, precomp_quants, standardize, compute_ELBO, update_V, update_order, eps) } # else if(version=="Rcpp"){ # update_order <- as.integer(update_order-1) @@ -122,7 +122,7 @@ mr_mash_update_general_rss <- function(n, XtX, XtY, mu1_t, V, Vinv, ldetV, w0, S XtXmu1 <- XtX%*%mu1_t ##Update variational parameters, expected residuals, and ELBO components - updates <- inner_loop_general_rss(n=n, XtXmu1=XtXmu1, mu1=mu1_t, V=V, Vinv=Vinv, w0=w0, S0=S0, + updates <- inner_loop_general_rss(n=n, XtY=XtY, XtXmu1=XtXmu1, mu1=mu1_t, V=V, Vinv=Vinv, w0=w0, S0=S0, precomp_quants=precomp_quants, standardize=standardize, compute_ELBO=compute_ELBO, update_V=update_V, version=version, update_order=update_order, eps=eps, nthreads=nthreads) From d1bb9a5d456c35354e42e9f8f13007ff93e94c71 Mon Sep 17 00:00:00 2001 From: fmorgante Date: Mon, 6 Mar 2023 19:20:32 -0500 Subject: [PATCH 020/103] Try fixing a bug --- R/mr_mash_rss_updates.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/mr_mash_rss_updates.R b/R/mr_mash_rss_updates.R index 947d22b..937c52c 100644 --- a/R/mr_mash_rss_updates.R +++ b/R/mr_mash_rss_updates.R @@ -22,7 +22,7 @@ inner_loop_general_rss_R <- function(n, XtY, XtXmu1, mu1, V, Vinv, w0, S0, ###no } #Remove j-th effect from expected residuals - xtRbar_j <- XtY[j, ] - XtXmu1[j, ] + xtx*mu1[j,] + xtRbar_j <- matrix(XtY[j, ] - XtXmu1[j, ] + xtx*mu1[j, ], nrow=1) #Run Bayesian SLR if(standardize){ From 37cba8e47d98afdc40bb778682a5b099aaaef1aa Mon Sep 17 00:00:00 2001 From: fmorgante Date: Mon, 6 Mar 2023 19:30:41 -0500 Subject: [PATCH 021/103] Fix bug --- R/mr_mash_rss_updates.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/mr_mash_rss_updates.R b/R/mr_mash_rss_updates.R index 937c52c..752dba7 100644 --- a/R/mr_mash_rss_updates.R +++ b/R/mr_mash_rss_updates.R @@ -3,7 +3,7 @@ inner_loop_general_rss_R <- function(n, XtY, XtXmu1, mu1, V, Vinv, w0, S0, ###no precomp_quants, standardize, compute_ELBO, update_V, update_order, eps){ ###Create variables to store quantities - r <- ncol(XtXmu1) + r <- ncol(mu1) p <- nrow(mu1) K <- length(S0) S1 <- array(0, c(r, r, p)) @@ -22,14 +22,14 @@ inner_loop_general_rss_R <- function(n, XtY, XtXmu1, mu1, V, Vinv, w0, S0, ###no } #Remove j-th effect from expected residuals - xtRbar_j <- matrix(XtY[j, ] - XtXmu1[j, ] + xtx*mu1[j, ], nrow=1) + xtRbar_j <- XtY[j, ] - XtXmu1[j, ] + xtx*mu1[j, ] #Run Bayesian SLR if(standardize){ - bfit <- bayes_mvr_mix_standardized_X(n, xtRbar_j, w0, S0, precomp_quants$S, precomp_quants$S1, + bfit <- bayes_mvr_mix_standardized_X_rss(n, xtRbar_j, w0, S0, precomp_quants$S, precomp_quants$S1, precomp_quants$SplusS0_chol, precomp_quants$S_chol, eps) } else { - bfit <- bayes_mvr_mix_centered_X(xtRbar_j, V, w0, S0, xtx, Vinv, + bfit <- bayes_mvr_mix_centered_X_rss(xtRbar_j, V, w0, S0, xtx, Vinv, precomp_quants$V_chol, precomp_quants$d, precomp_quants$QtimesV_chol, eps) } From e0d4548b77fb5a2b7303c58d9acafd05716d5244 Mon Sep 17 00:00:00 2001 From: fmorgante Date: Mon, 6 Mar 2023 19:36:36 -0500 Subject: [PATCH 022/103] Fix bug --- R/elbo_rss.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/elbo_rss.R b/R/elbo_rss.R index fcd8923..2bfcd8f 100644 --- a/R/elbo_rss.R +++ b/R/elbo_rss.R @@ -15,7 +15,7 @@ compute_ELBO_rss_terms <- function(var_part_tr_wERSS, neg_KL, xtRbar_j, bfit, xt var_part_tr_wERSS <- var_part_tr_wERSS + (sum(Vinv*bfit$S1)*xtx) - Cm <- -mu1_mat%*%xtR_j - tcrossprod(xtR_j, mu1_mat) + tcrossprod(mu1_mat)*xtx + bfit$S1*xtx + Cm <- -mu1_mat%*%xtRbar_j - tcrossprod(xtRbar_j, mu1_mat) + tcrossprod(mu1_mat)*xtx + bfit$S1*xtx neg_KL <- neg_KL + (bfit$logbf + 0.5*(sum(Vinv*Cm))) From c8a2ebe51996e5f65b5cd7998c48d270cd2ea893 Mon Sep 17 00:00:00 2001 From: fmorgante Date: Mon, 6 Mar 2023 19:42:06 -0500 Subject: [PATCH 023/103] Add YtY --- R/mr_mash_rss.R | 17 +++++++++-------- R/mr_mash_rss_updates.R | 2 +- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/R/mr_mash_rss.R b/R/mr_mash_rss.R index fe613b5..ef539d6 100644 --- a/R/mr_mash_rss.R +++ b/R/mr_mash_rss.R @@ -452,14 +452,15 @@ mr.mash.rss <- function(Bhat, Shat, Z, R, covY, n, S0, w0=rep(1/(length(S0)), le # E-STEP # ------ ###Update variational parameters - ups <- mr_mash_update_general_rss(n=n, XtX=XtX, XtY=XtY, mu1_t=mu1_t, V=V, - Vinv=Vinv, ldetV=ldetV, w0=w0, S0=S0, - precomp_quants=comps, - compute_ELBO=compute_ELBO, - standardize=standardize, - update_V=update_V, version=version, - update_order=update_order, eps=eps, - nthreads=nthreads) + ups <- mr_mash_update_general_rss(n=n, XtX=XtX, XtY=XtY, YtY=YtY, + mu1_t=mu1_t, V=V, + Vinv=Vinv, ldetV=ldetV, w0=w0, S0=S0, + precomp_quants=comps, + compute_ELBO=compute_ELBO, + standardize=standardize, + update_V=update_V, version=version, + update_order=update_order, eps=eps, + nthreads=nthreads) mu1_t <- ups$mu1_t S1_t <- ups$S1_t w1_t <- ups$w1_t diff --git a/R/mr_mash_rss_updates.R b/R/mr_mash_rss_updates.R index 752dba7..5cc9cd0 100644 --- a/R/mr_mash_rss_updates.R +++ b/R/mr_mash_rss_updates.R @@ -111,7 +111,7 @@ inner_loop_general_rss <- function(n, XtY, XtXmu1, mu1, V, Vinv, w0, S0, precomp ###Perform one iteration of the outer loop with or without scaling X -mr_mash_update_general_rss <- function(n, XtX, XtY, mu1_t, V, Vinv, ldetV, w0, S0, +mr_mash_update_general_rss <- function(n, XtX, XtY, YtY, mu1_t, V, Vinv, ldetV, w0, S0, precomp_quants, compute_ELBO, standardize, update_V, version, update_order, eps, nthreads){ From 024fedbeaaf7b2fcbd0d2cb23939d8a2488e29a2 Mon Sep 17 00:00:00 2001 From: fmorgante Date: Mon, 6 Mar 2023 19:44:14 -0500 Subject: [PATCH 024/103] Add YtY --- R/mr_mash_rss.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/mr_mash_rss.R b/R/mr_mash_rss.R index ef539d6..3923be7 100644 --- a/R/mr_mash_rss.R +++ b/R/mr_mash_rss.R @@ -332,6 +332,8 @@ mr.mash.rss <- function(Bhat, Shat, Z, R, covY, n, S0, w0=rep(1/(length(S0)), le XtY <- Z*sqrt(n-1) covY <- cov2cor(V) } + + YtY <- covY*(n-1) ###Precompute quantities comps <- precompute_quants(n, V, S0, standardize, version) From 47ea0f1ab2abaa9e1c71466849430b354032fe4b Mon Sep 17 00:00:00 2001 From: fmorgante Date: Mon, 6 Mar 2023 19:46:54 -0500 Subject: [PATCH 025/103] Fix missing part --- R/mr_mash_rss_updates.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/mr_mash_rss_updates.R b/R/mr_mash_rss_updates.R index 5cc9cd0..2c31e55 100644 --- a/R/mr_mash_rss_updates.R +++ b/R/mr_mash_rss_updates.R @@ -144,7 +144,7 @@ mr_mash_update_general_rss <- function(n, XtX, XtY, YtY, mu1_t, V, Vinv, ldetV, var_part_tr_wERSS=var_part_tr_wERSS, neg_KL=neg_KL) out$var_part_ERSS <- updates$var_part_ERSS - out$RbartRbar + out$RbartRbar <- RbartRbar } else if(compute_ELBO && !update_V){ ##Compute ELBO @@ -155,7 +155,7 @@ mr_mash_update_general_rss <- function(n, XtX, XtY, YtY, mu1_t, V, Vinv, ldetV, } else if(!compute_ELBO && update_V){ out$var_part_ERSS <- updates$var_part_ERSS - out$RbartRbar + out$RbartRbar <- RbartRbar } return(out) From 53a92da8edceebc19e9fdb60e89c587e98243b46 Mon Sep 17 00:00:00 2001 From: fmorgante Date: Mon, 6 Mar 2023 19:49:15 -0500 Subject: [PATCH 026/103] Bump up version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index ed12c9e..981cbef 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Encoding: UTF-8 Type: Package Package: mr.mash.alpha -Version: 0.2-26 +Version: 0.2-27 Date: 2023-03-06 Title: Multiple Regression with Multivariate Adaptive Shrinkage Description: Provides an implementation of methods for multivariate From c18728fec672ebd277f4098d54500665803ef680 Mon Sep 17 00:00:00 2001 From: fmorgante Date: Wed, 8 Mar 2023 18:44:04 -0500 Subject: [PATCH 027/103] Add case of standardized X --- R/mr_mash_rss.R | 54 ++++++++++++++++++++++++++++--------------------- 1 file changed, 31 insertions(+), 23 deletions(-) diff --git a/R/mr_mash_rss.R b/R/mr_mash_rss.R index 3923be7..ba07d2b 100644 --- a/R/mr_mash_rss.R +++ b/R/mr_mash_rss.R @@ -267,6 +267,37 @@ mr.mash.rss <- function(Bhat, Shat, Z, R, covY, n, S0, w0=rep(1/(length(S0)), le Z <- Bhat/Shat } + ##Compute pve-adjusted Z scores, if n is provided + if(!missing(n)) { + adj <- (n-1)/(Z^2 + n - 2) + Z <- sqrt(adj) * Z + } + + ##If covariance of Y and standard errors are provided, + ##the effects are on the *original scale*. + if(!missing(covY) & !missing(Shat)){ + XtXdiag <- rowMeans(matrix(diag(covY), nrow=p, ncol=r, byrow=TRUE) * adj/(Shat^2)) + XtX <- t(R * sqrt(XtXdiag)) * sqrt(XtXdiag) + XtX <- (XtX + t(XtX))/2 + XtY <- Z * sqrt(adj) * matrix(diag(covY), nrow=p, ncol=r, byrow=TRUE) / Shat + + } else { + ##The effects are on the *standardized* X, y scale. + XtX <- R*(n-1) + XtY <- Z*sqrt(n-1) + covY <- cov2cor(V) + } + + YtY <- covY*(n-1) + + if(standardize){ + dXtX <- diag(XtX) + sx <- sqrt(dXtX/(n-1)) + sx[sx == 0] <- 1 + XtX <- t((1/sx) * XtX) / sx + XtY < XtY / sx + } + ###Store dimensions names of the inputs Z_colnames <- colnames(Z) Z_rownames <- rownames(Z) @@ -312,29 +343,6 @@ mr.mash.rss <- function(Bhat, Shat, Z, R, covY, n, S0, w0=rep(1/(length(S0)), le ###Set eps eps <- .Machine$double.eps - ##Compute pve-adjusted Z scores, if n is provided - if(!missing(n)) { - adj <- (n-1)/(Z^2 + n - 2) - Z <- sqrt(adj) * Z - } - - ##If covariance of Y and standard errors are provided, - ##the effects are on the *original scale*. - if(!missing(covY) & !missing(Shat)){ - XtXdiag <- rowMeans(matrix(diag(covY), nrow=p, ncol=r, byrow=TRUE) * adj/(Shat^2)) - XtX <- t(R * sqrt(XtXdiag)) * sqrt(XtXdiag) - XtX <- (XtX + t(XtX))/2 - XtY <- Z * sqrt(adj) * matrix(diag(covY), nrow=p, ncol=r, byrow=TRUE) / Shat - - } else { - ##The effects are on the *standardized* X, y scale. - XtX <- R*(n-1) - XtY <- Z*sqrt(n-1) - covY <- cov2cor(V) - } - - YtY <- covY*(n-1) - ###Precompute quantities comps <- precompute_quants(n, V, S0, standardize, version) if(!standardize){ From 709c6a3f9124436e2d1b60b18459e044bff1a7b5 Mon Sep 17 00:00:00 2001 From: Fabio Morgante Date: Thu, 9 Mar 2023 10:03:21 -0500 Subject: [PATCH 028/103] Fix bug --- R/mr_mash_rss.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/mr_mash_rss.R b/R/mr_mash_rss.R index ba07d2b..a3bfca9 100644 --- a/R/mr_mash_rss.R +++ b/R/mr_mash_rss.R @@ -295,7 +295,7 @@ mr.mash.rss <- function(Bhat, Shat, Z, R, covY, n, S0, w0=rep(1/(length(S0)), le sx <- sqrt(dXtX/(n-1)) sx[sx == 0] <- 1 XtX <- t((1/sx) * XtX) / sx - XtY < XtY / sx + XtY <- XtY / sx } ###Store dimensions names of the inputs From d3c7d9633d19ea8b26ec3d25c3811ef9e759517b Mon Sep 17 00:00:00 2001 From: Fabio Morgante Date: Thu, 9 Mar 2023 10:10:04 -0500 Subject: [PATCH 029/103] Bump up version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 981cbef..2d6c558 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Encoding: UTF-8 Type: Package Package: mr.mash.alpha -Version: 0.2-27 -Date: 2023-03-06 +Version: 0.2-28 +Date: 2023-03-09 Title: Multiple Regression with Multivariate Adaptive Shrinkage Description: Provides an implementation of methods for multivariate multiple regression with adaptive shrinkage priors. From 061cd45c477e9e645234cfe805003ae6d57f6e08 Mon Sep 17 00:00:00 2001 From: Fabio Morgante Date: Tue, 14 Mar 2023 14:52:04 -0400 Subject: [PATCH 030/103] Add code to perform the inner loop in Rcpp --- R/mr_mash_rss_updates.R | 64 +++++++-------- src/bayes_reg_mv.cpp | 119 +++++++++++++++++++++++++++ src/bayes_reg_mv.h | 73 ++++++++++++----- src/misc.cpp | 26 ++++++ src/misc.h | 4 + src/mr_mash_rss_updates.cpp | 158 ++++++++++++++++++++++++++++++++++++ 6 files changed, 390 insertions(+), 54 deletions(-) create mode 100644 src/mr_mash_rss_updates.cpp diff --git a/R/mr_mash_rss_updates.R b/R/mr_mash_rss_updates.R index 2c31e55..acd901d 100644 --- a/R/mr_mash_rss_updates.R +++ b/R/mr_mash_rss_updates.R @@ -66,32 +66,32 @@ inner_loop_general_rss_R <- function(n, XtY, XtXmu1, mu1, V, Vinv, w0, S0, ###no ### Wrapper for the Rcpp function to update variational parameters, ### expected residuals, and ELBO components with or without scaling X. -# -# #' @importFrom Rcpp evalCpp -# #' @importFrom RcppParallel RcppParallelLibs -# #' @useDynLib mr.mash.alpha -# #' -# inner_loop_general_Rcpp <- function(n, XtXmu1, mu1, V, Vinv, w0, S0, precomp_quants, -# standardize, compute_ELBO, update_V, update_order, -# eps, nthreads){ -# -# out <- inner_loop_general_rcpp(n, XtXmu1, mu1, V, Vinv, w0, S0, precomp_quants, -# standardize, compute_ELBO, update_V, update_order, -# eps, nthreads) -# -# ###Return output -# if(compute_ELBO && update_V){ -# return(list(mu1=out$mu1, S1=out$S1, w1=out$w1, var_part_tr_wERSS=out$var_part_tr_wERSS, -# neg_KL=out$neg_KL, var_part_ERSS=out$var_part_ERSS)) -# } else if(compute_ELBO && !update_V){ -# return(list(mu1=out$mu1, S1=out$S1, w1=out$w1, var_part_tr_wERSS=out$var_part_tr_wERSS, -# neg_KL=out$neg_KL)) -# } else if(!compute_ELBO && update_V) { -# return(list(mu1=out$mu1, S1=out$S1, w1=out$w1, var_part_ERSS=out$var_part_ERSS)) -# } else { -# return(list(mu1=out$mu1, S1=out$S1, w1=out$w1)) -# } -# } + +#' @importFrom Rcpp evalCpp +#' @importFrom RcppParallel RcppParallelLibs +#' @useDynLib mr.mash.alpha +#' +inner_loop_general_rss_Rcpp <- function(n, XtXmu1, mu1, V, Vinv, w0, S0, precomp_quants, + standardize, compute_ELBO, update_V, update_order, + eps, nthreads){ + + out <- inner_loop_general_rss_rcpp(n, XtXmu1, mu1, V, Vinv, w0, S0, precomp_quants, + standardize, compute_ELBO, update_V, update_order, + eps, nthreads) + + ###Return output + if(compute_ELBO && update_V){ + return(list(mu1=out$mu1, S1=out$S1, w1=out$w1, var_part_tr_wERSS=out$var_part_tr_wERSS, + neg_KL=out$neg_KL, var_part_ERSS=out$var_part_ERSS)) + } else if(compute_ELBO && !update_V){ + return(list(mu1=out$mu1, S1=out$S1, w1=out$w1, var_part_tr_wERSS=out$var_part_tr_wERSS, + neg_KL=out$neg_KL)) + } else if(!compute_ELBO && update_V) { + return(list(mu1=out$mu1, S1=out$S1, w1=out$w1, var_part_ERSS=out$var_part_ERSS)) + } else { + return(list(mu1=out$mu1, S1=out$S1, w1=out$w1)) + } +} ###Wrapper of the inner loop with R or Rcpp inner_loop_general_rss <- function(n, XtY, XtXmu1, mu1, V, Vinv, w0, S0, precomp_quants, @@ -100,12 +100,12 @@ inner_loop_general_rss <- function(n, XtY, XtXmu1, mu1, V, Vinv, w0, S0, precomp if(version=="R"){ out <- inner_loop_general_rss_R(n, XtY, XtXmu1, mu1, V, Vinv, w0, S0, precomp_quants, standardize, compute_ELBO, update_V, update_order, eps) - } # else if(version=="Rcpp"){ - # update_order <- as.integer(update_order-1) - # out <- inner_loop_general_Rcpp(n, XtXmu1, mu1, V, Vinv, w0, simplify2array_custom(S0), precomp_quants, - # standardize, compute_ELBO, update_V, update_order, eps, nthreads) - # } - # + } else if(version=="Rcpp"){ + update_order <- as.integer(update_order-1) + out <- inner_loop_general_rss_Rcpp(n, XtY, XtXmu1, mu1, V, Vinv, w0, simplify2array_custom(S0), precomp_quants, + standardize, compute_ELBO, update_V, update_order, eps, nthreads) + } + return(out) } diff --git a/src/bayes_reg_mv.cpp b/src/bayes_reg_mv.cpp index 9081781..c660229 100644 --- a/src/bayes_reg_mv.cpp +++ b/src/bayes_reg_mv.cpp @@ -319,3 +319,122 @@ double bayes_mvr_mix_centered_X (const vec& x, const mat& Y, const mat& V, double u = max(logbfmix); return u + log(sum(exp(logbfmix - u))); } + + +// Perform Bayesian multivariate simple regression with summary data +// and mixture-of-normals prior with standardized x. +double bayes_mvr_mix_standardized_X_rss (unsigned int n, const vec& xtY, const vec& w0, + const cube& S0, const mat& S, + const cube& S1, const cube& SplusS0_chol, + const mat& S_chol, double eps, + unsigned int nthreads, vec& mu1_mix, + mat& S1_mix, vec& w1) { + unsigned int k = w0.n_elem; + unsigned int r = xtY.n_elem; + + mat mu1mix(r,k); + vec logbfmix(k); + vec mu1(r); + + // Compute the least-squares estimate. + vec b = xtY/(n-1); + + // Compute the quantities separately for each mixture component. + if (nthreads > 1) { + bayes_mvr_mix_standardized_X_worker worker(b,S,S_chol,S0,S1,SplusS0_chol, + logbfmix,mu1mix); + parallelFor(0,k,worker); + } else { + for (unsigned int i = 0; i < k; i++) { + logbfmix(i) = bayes_mvr_ridge_standardized_X(b,S0.slice(i),S,S1.slice(i), + SplusS0_chol.slice(i), + S_chol,mu1); + mu1mix.col(i) = mu1; + } + } + + // Compute the posterior assignment probabilities for the latent + // indicator variable. + logbfmix += log(w0 + eps); + w1 = logbfmix; + softmax(w1); + + // Compute the posterior mean (mu1) and covariance (S1_mix) of the + // regression coefficients. + S1_mix.fill(0); + mu1_mix.fill(0); + for (unsigned int i = 0; i < k; i++) { + b = mu1mix.col(i); + mu1_mix += w1(i) * b; + S1_mix += w1(i) * (S1.slice(i) + b * trans(b)); + } + S1_mix -= mu1_mix * trans(mu1_mix); + + // Compute the log-Bayes factor as a linear combination of the + // individual Bayes factors for each mixture component. + double u = max(logbfmix); + return u + log(sum(exp(logbfmix - u))); +} + +// Perform Bayesian multivariate simple regression with summary data +// and mixture-of-normals prior with centered x. +double bayes_mvr_mix_centered_X_rss (const vec& xtY, const mat& V, + const vec& w0, const cube& S0, double xtx, + const mat& Vinv, const mat& V_chol, + const mat& d, const cube& QtimesV_chol, + double eps, unsigned int nthreads, + vec& mu1_mix, mat& S1_mix, vec& w1) { + unsigned int k = w0.n_elem; + unsigned int r = xtY.n_elem; + + mat mu1mix(r,k); + cube S1mix(r,r,k); + vec logbfmix(k); + vec mu1(r); + mat S1(r,r); + + // Compute the least-squares estimate. + vec b = xtY/xtx; + mat S = V/xtx; + + // Compute quantities needed for bayes_mvr_ridge_centered_X() + mat S_chol = V_chol/sqrt(xtx); + + // Compute the quantities separately for each mixture component. + if (nthreads > 1) { + bayes_mvr_mix_centered_X_worker worker(b,xtx,V,Vinv,V_chol,S,S_chol,d, + S0,QtimesV_chol,logbfmix,mu1mix, + S1mix); + parallelFor(0,k,worker); + } else { + for (unsigned int i = 0; i < k; i++) { + logbfmix(i) = bayes_mvr_ridge_centered_X(V,b,S,S0.slice(i),xtx,Vinv, + V_chol,S_chol,d.col(i), + QtimesV_chol.slice(i),mu1,S1); + mu1mix.col(i) = mu1; + S1mix.slice(i) = S1; + } + } + + // Compute the posterior assignment probabilities for the latent + // indicator variable. + logbfmix += log(w0 + eps); + w1 = logbfmix; + softmax(w1); + + // Compute the posterior mean (mu1) and covariance (S1_mix) of the + // regression coefficients. + S1_mix.fill(0); + mu1_mix.fill(0); + for (unsigned int i = 0; i < k; i++) { + b = mu1mix.col(i); + mu1_mix += w1(i) * b; + S1_mix += w1(i) * (S1mix.slice(i) + b * trans(b)); + } + S1_mix -= mu1_mix * trans(mu1_mix); + + // Compute the log-Bayes factor as a linear combination of the + // individual Bayes factors for each mixture component. + double u = max(logbfmix); + return u + log(sum(exp(logbfmix - u))); +} diff --git a/src/bayes_reg_mv.h b/src/bayes_reg_mv.h index 41c2c27..0b9f319 100644 --- a/src/bayes_reg_mv.h +++ b/src/bayes_reg_mv.h @@ -17,33 +17,62 @@ double bayes_mvr_ridge_centered_X (const arma::mat& V, const arma::vec& b, const arma::vec& mu1, arma::mat& S1); double bayes_mvr_mix_standardized_X (const arma::vec& x, - const arma::mat& Y, - const arma::vec& w0, - const arma::cube& S0, - const arma::mat& S, - const arma::cube& S1, - const arma::cube& SplusS0_chol, - const arma::mat& S_chol, - double eps, - unsigned int nthreads, - arma::vec& mu1_mix, - arma::mat& S1_mix, - arma::vec& w1); + const arma::mat& Y, + const arma::vec& w0, + const arma::cube& S0, + const arma::mat& S, + const arma::cube& S1, + const arma::cube& SplusS0_chol, + const arma::mat& S_chol, + double eps, + unsigned int nthreads, + arma::vec& mu1_mix, + arma::mat& S1_mix, + arma::vec& w1); double bayes_mvr_mix_centered_X (const arma::vec& x, - const arma::mat& Y, - const arma::mat& V, + const arma::mat& Y, + const arma::mat& V, const arma::vec& w0, - const arma::cube& S0, - double xtx, + const arma::cube& S0, + double xtx, const arma::mat& Vinv, - const arma::mat& V_chol, + const arma::mat& V_chol, const arma::mat& d, - const arma::cube& QtimesV_chol, - double eps, - unsigned int nthreads, + const arma::cube& QtimesV_chol, + double eps, + unsigned int nthreads, arma::vec& mu1_mix, - arma::mat& S1_mix, - arma::vec& w1); + arma::mat& S1_mix, + arma::vec& w1); + +double bayes_mvr_mix_standardized_X_rss (unsigned int n, + const arma::vec& xtY, + const arma::vec& w0, + const arma::cube& S0, + const arma::mat& S, + const arma::cube& S1, + const arma::cube& SplusS0_chol, + const arma::mat& S_chol, + double eps, + unsigned int nthreads, + arma::vec& mu1_mix, + arma::mat& S1_mix, + arma::vec& w1); + +double bayes_mvr_mix_centered_X_rss (const arma::vec& xtY, + const arma::mat& V, + const arma::vec& w0, + const arma::cube& S0, + double xtx, + const arma::mat& Vinv, + const arma::mat& V_chol, + const arma::mat& d, + const arma::cube& QtimesV_chol, + double eps, + unsigned int nthreads, + arma::vec& mu1_mix, + arma::mat& S1_mix, + arma::vec& w1); #endif diff --git a/src/misc.cpp b/src/misc.cpp index c324190..3fb851b 100644 --- a/src/misc.cpp +++ b/src/misc.cpp @@ -87,3 +87,29 @@ void compute_var_part_ERSS (mat& var_part_ERSS, const mat& S1, double xtx){ // // return var_part_ERSS; // } + +// Function to compute some terms of the ELBO with summary data +void compute_ELBO_rss_terms (double& var_part_tr_wERSS, double& neg_KL, const mat& XtRbar_j, + double logbf, const mat& mu1, const mat& S1, + double xtx, const mat& Vinv){ + + var_part_tr_wERSS += (as_scalar(accu(Vinv % S1))*xtx); + + neg_KL += (logbf + as_scalar(0.5*accu(Vinv % (-mu1 * XtRbar_j - XtRbar_j * trans(mu1) + mu1 * trans(mu1) * xtx + S1*xtx)))); +} + +// // [[Rcpp::plugins("cpp11")]] +// // [[Rcpp::depends(RcppArmadillo)]] +// // [[Rcpp::export]] +// List compute_ELBO_terms_rcpp (double var_part_tr_wERSS_init, double neg_KL_init, double x_j, +// const mat& rbar_j, double logbf, const mat& mu1, const mat& S1, +// double xtx, const mat& Vinv) { +// +// double var_part_tr_wERSS = var_part_tr_wERSS_init; +// double neg_KL = neg_KL_init; +// +// compute_ELBO_terms(var_part_tr_wERSS, neg_KL, x_j, rbar_j, logbf, mu1, S1, xtx, Vinv); +// +// return List::create(Named("var_part_tr_wERSS") = var_part_tr_wERSS, +// Named("neg_KL") = neg_KL); +// } diff --git a/src/misc.h b/src/misc.h index 2857c77..953df43 100644 --- a/src/misc.h +++ b/src/misc.h @@ -31,6 +31,10 @@ void compute_ELBO_terms (double& var_part_tr_wERSS, double& neg_KL, const arma:: const arma::mat& rbar_j, double logbf, const arma::mat& mu1, const arma::mat& S1, double xtx, const arma::mat& Vinv); +void compute_ELBO_rss_terms (double& var_part_tr_wERSS, double& neg_KL, + const arma::mat& XtRbar_j, double logbf, const arma::mat& mu1, const arma::mat& S1, + double xtx, const arma::mat& Vinv); + void compute_var_part_ERSS (arma::mat& var_part_ERSS, const arma::mat& S1, double xtx); diff --git a/src/mr_mash_rss_updates.cpp b/src/mr_mash_rss_updates.cpp new file mode 100644 index 0000000..0752d7b --- /dev/null +++ b/src/mr_mash_rss_updates.cpp @@ -0,0 +1,158 @@ +#include "bayes_reg_mv.h" +#include "misc.h" +#include +#include + +using namespace Rcpp; +using namespace arma; + +// TYPE DEFINITIONS +// ---------------- + +// A list of precomputed quantities that are invariant to any updates +// to the mr-mash model parameters. +struct mr_mash_precomputed_quantities { + const mat S; + const mat S_chol; + const cube S1; + const cube SplusS0_chol; + const mat V_chol; + const mat d; + const cube QtimesV_chol; + const vec xtx; + + // This is used to create a mr_mash_precomputed_quantities object. + mr_mash_precomputed_quantities (const mat& S, const mat& S_chol, + const cube& S1, const cube& SplusS0_chol, + const mat& V_chol, const mat& d, const cube& QtimesV_chol, + const vec& xtx) : + S(S), S_chol(S_chol), S1(S1), SplusS0_chol(SplusS0_chol), + V_chol(V_chol), d(d), QtimesV_chol(QtimesV_chol), xtx(xtx) { }; +}; + + +// FUNCTION DECLARATIONS +// --------------------- + +// Inner loop +void inner_loop_general_rss (unsigned int n, const mat& XtY, mat& XtXmu1, mat& mu1, const mat& V, + const mat& Vinv, const vec& w0, const cube& S0, + const mr_mash_precomputed_quantities& precomp_quants, + bool standardize, bool compute_ELBO, bool update_V, + const vec& update_order, double eps, unsigned int nthreads, + cube& S1, mat& w1, double& var_part_tr_wERSS, + double& neg_KL, mat& var_part_ERSS); + + +// FUNCTION DEFINITIONS +// -------------------- + +// Inner loop +// +// [[Rcpp::depends(RcppArmadillo)]] +// [[Rcpp::depends(RcppParallel)]] +// [[Rcpp::export]] +List inner_loop_general_rss_rcpp (unsigned int n, const arma::mat& XtY, arma::mat& XtXmu1, arma::mat& mu1, + const arma::mat& V, const arma::mat& Vinv, const arma::vec& w0, + const arma::cube& S0, const List& precomp_quants_list, + bool standardize, bool compute_ELBO, bool update_V, + const arma::vec& update_order, double eps, unsigned int nthreads) { + unsigned int r = mu1.n_cols; + unsigned int p = mu1.n_rows; + unsigned int k = w0.n_elem; + cube S1(r,r,p); + mat w1(p,k); + mat mu1_new = mu1; + mat XtXmu1_new = XtXmu1; + double var_part_tr_wERSS; + double neg_KL; + mat var_part_ERSS(r,r); + mr_mash_precomputed_quantities precomp_quants + (as(precomp_quants_list["S"]), + as(precomp_quants_list["S_chol"]), + as(precomp_quants_list["S1"]), + as(precomp_quants_list["SplusS0_chol"]), + as(precomp_quants_list["V_chol"]), + as(precomp_quants_list["d"]), + as(precomp_quants_list["QtimesV_chol"]), + as(precomp_quants_list["xtx"])); + inner_loop_general_rss(n, XtY, XtXmu1_new, mu1_new, V, Vinv, w0, S0, precomp_quants, + standardize, compute_ELBO, update_V, update_order, eps, + nthreads, S1, w1, var_part_tr_wERSS, neg_KL, var_part_ERSS); + return List::create(Named("mu1") = mu1_new, + Named("S1") = S1, + Named("w1") = w1, + Named("var_part_tr_wERSS") = var_part_tr_wERSS, + Named("neg_KL") = neg_KL, + Named("var_part_ERSS") = var_part_ERSS); +} + +// Perform the inner loop +void inner_loop_general_rss (unsigned int n, mat& XtY, mat& XtXmu1, mat& mu1, const mat& V, + const mat& Vinv, const vec& w0, const cube& S0, + const mr_mash_precomputed_quantities& precomp_quants, + bool standardize, bool compute_ELBO, bool update_V, + const vec& update_order, double eps, unsigned int nthreads, + cube& S1, mat& w1, double& var_part_tr_wERSS, + double& neg_KL, mat& var_part_ERSS) { + unsigned int p = mu1.n_rows; + unsigned int r = mu1.n_cols; + unsigned int k = w0.n_elem; + vec x(n); + mat XtRbar_j(p,r); + vec mu1_j(r); + vec mu1_mix(r); + mat S1_mix(r,r); + vec w1_mix(k); + double logbf_mix; + double xtx; + + // Initialize ELBO parameters + var_part_tr_wERSS = 0; + neg_KL = 0; + + // Initialize V parameters + var_part_ERSS.zeros(r,r); + + // Repeat for each predictor. + for (unsigned int j : update_order) { + + if (standardize) + double xtx_j = n-1; + else + double xtx_j = precomp_quants(j); + + mu1_j = trans(mu1.row(j)); + XtY_j = trans(XtY.row(j)); + XtXmu1_j = trans(XtXmu1.row(j)); + + // Disregard the ith predictor in the expected residuals. + XtRbar_j = XtY_j - XtXmu1_j + xtx * mu1_j; + + // Update the posterior quantities for the jth + // predictor. + if (standardize) + logbf_mix = bayes_mvr_mix_standardized_X_rss(n, XtRbar_j, w0, S0, precomp_quants.S, + precomp_quants.S1, + precomp_quants.SplusS0_chol, + precomp_quants.S_chol, eps, nthreads, + mu1_mix, S1_mix, w1_mix); + else + logbf_mix = bayes_mvr_mix_centered_X_rss(XtRbar_j, V, w0, S0, xtx_j, Vinv, + precomp_quants.V_chol, precomp_quants.d, + precomp_quants.QtimesV_chol, eps, nthreads, + mu1_mix, S1_mix, w1_mix); + + mu1.row(j) = trans(mu1_mix); + S1.slice(j) = S1_mix; + w1.row(j) = trans(w1_mix); + + // Compute ELBO parameters + if (compute_ELBO) + compute_ELBO_rss_terms(var_part_tr_wERSS, neg_KL, x, Rbar_j, logbf_mix, mu1_mix, S1_mix, xtx_j, Vinv); + + // Compute V parameters + if (update_V) + compute_var_part_ERSS(var_part_ERSS, S1_mix, xtx_j); + } +} From 258e9b3f4e61e57f7f8f35fea63b7fc8ddb49788 Mon Sep 17 00:00:00 2001 From: Fabio Morgante Date: Tue, 14 Mar 2023 17:16:47 -0400 Subject: [PATCH 031/103] Fix bugs -- ELBO still wrong --- R/mr_mash_rss.R | 2 +- R/mr_mash_rss_updates.R | 4 +- src/misc.cpp | 12 +-- src/misc.h | 2 +- src/mr_mash_rss_updates.cpp | 158 ------------------------------------ src/mr_mash_updates.cpp | 124 ++++++++++++++++++++++++++-- 6 files changed, 129 insertions(+), 173 deletions(-) delete mode 100644 src/mr_mash_rss_updates.cpp diff --git a/R/mr_mash_rss.R b/R/mr_mash_rss.R index a3bfca9..fbf1943 100644 --- a/R/mr_mash_rss.R +++ b/R/mr_mash_rss.R @@ -177,7 +177,7 @@ mr.mash.rss <- function(Bhat, Shat, Z, R, covY, n, S0, w0=rep(1/(length(S0)), le mu1_init, tol=1e-4, convergence_criterion=c("mu1", "ELBO"), max_iter=5000, update_w0=TRUE, update_w0_method="EM", w0_threshold=0, compute_ELBO=TRUE, standardize=TRUE, verbose=TRUE, - update_V=FALSE, update_V_method=c("full", "diagonal"), version=c("R", "Rcpp"), e=1e-8, + update_V=FALSE, update_V_method=c("full", "diagonal"), version=c("Rcpp", "R"), e=1e-8, ca_update_order=c("consecutive", "decreasing_logBF", "increasing_logBF"), nthreads=as.integer(NA)) { diff --git a/R/mr_mash_rss_updates.R b/R/mr_mash_rss_updates.R index acd901d..94400c4 100644 --- a/R/mr_mash_rss_updates.R +++ b/R/mr_mash_rss_updates.R @@ -71,11 +71,11 @@ inner_loop_general_rss_R <- function(n, XtY, XtXmu1, mu1, V, Vinv, w0, S0, ###no #' @importFrom RcppParallel RcppParallelLibs #' @useDynLib mr.mash.alpha #' -inner_loop_general_rss_Rcpp <- function(n, XtXmu1, mu1, V, Vinv, w0, S0, precomp_quants, +inner_loop_general_rss_Rcpp <- function(n, XtY, XtXmu1, mu1, V, Vinv, w0, S0, precomp_quants, standardize, compute_ELBO, update_V, update_order, eps, nthreads){ - out <- inner_loop_general_rss_rcpp(n, XtXmu1, mu1, V, Vinv, w0, S0, precomp_quants, + out <- inner_loop_general_rss_rcpp(n, XtY, XtXmu1, mu1, V, Vinv, w0, S0, precomp_quants, standardize, compute_ELBO, update_V, update_order, eps, nthreads) diff --git a/src/misc.cpp b/src/misc.cpp index 3fb851b..7cdd0ab 100644 --- a/src/misc.cpp +++ b/src/misc.cpp @@ -89,26 +89,28 @@ void compute_var_part_ERSS (mat& var_part_ERSS, const mat& S1, double xtx){ // } // Function to compute some terms of the ELBO with summary data -void compute_ELBO_rss_terms (double& var_part_tr_wERSS, double& neg_KL, const mat& XtRbar_j, +void compute_ELBO_rss_terms (double& var_part_tr_wERSS, double& neg_KL, const vec& XtRbar_j, double logbf, const mat& mu1, const mat& S1, double xtx, const mat& Vinv){ var_part_tr_wERSS += (as_scalar(accu(Vinv % S1))*xtx); - neg_KL += (logbf + as_scalar(0.5*accu(Vinv % (-mu1 * XtRbar_j - XtRbar_j * trans(mu1) + mu1 * trans(mu1) * xtx + S1*xtx)))); + mat Cm = (- mu1 * XtRbar_j - XtRbar_j * trans(mu1) + mu1 * trans(mu1) * xtx + S1*xtx); + + neg_KL += (logbf + as_scalar(0.5*accu(Vinv % Cm))); } // // [[Rcpp::plugins("cpp11")]] // // [[Rcpp::depends(RcppArmadillo)]] // // [[Rcpp::export]] -// List compute_ELBO_terms_rcpp (double var_part_tr_wERSS_init, double neg_KL_init, double x_j, -// const mat& rbar_j, double logbf, const mat& mu1, const mat& S1, +// List compute_ELBO_rss_terms_rcpp (double var_part_tr_wERSS_init, double neg_KL_init, +// const vec& xtRbar_j, double logbf, const mat& mu1, const mat& S1, // double xtx, const mat& Vinv) { // // double var_part_tr_wERSS = var_part_tr_wERSS_init; // double neg_KL = neg_KL_init; // -// compute_ELBO_terms(var_part_tr_wERSS, neg_KL, x_j, rbar_j, logbf, mu1, S1, xtx, Vinv); +// compute_ELBO_rss_terms(var_part_tr_wERSS, neg_KL, x_j, rbar_j, logbf, mu1, S1, xtx, Vinv); // // return List::create(Named("var_part_tr_wERSS") = var_part_tr_wERSS, // Named("neg_KL") = neg_KL); diff --git a/src/misc.h b/src/misc.h index 953df43..557ca2c 100644 --- a/src/misc.h +++ b/src/misc.h @@ -32,7 +32,7 @@ void compute_ELBO_terms (double& var_part_tr_wERSS, double& neg_KL, const arma:: double xtx, const arma::mat& Vinv); void compute_ELBO_rss_terms (double& var_part_tr_wERSS, double& neg_KL, - const arma::mat& XtRbar_j, double logbf, const arma::mat& mu1, const arma::mat& S1, + const arma::vec& XtRbar_j, double logbf, const arma::mat& mu1, const arma::mat& S1, double xtx, const arma::mat& Vinv); void compute_var_part_ERSS (arma::mat& var_part_ERSS, const arma::mat& S1, double xtx); diff --git a/src/mr_mash_rss_updates.cpp b/src/mr_mash_rss_updates.cpp deleted file mode 100644 index 0752d7b..0000000 --- a/src/mr_mash_rss_updates.cpp +++ /dev/null @@ -1,158 +0,0 @@ -#include "bayes_reg_mv.h" -#include "misc.h" -#include -#include - -using namespace Rcpp; -using namespace arma; - -// TYPE DEFINITIONS -// ---------------- - -// A list of precomputed quantities that are invariant to any updates -// to the mr-mash model parameters. -struct mr_mash_precomputed_quantities { - const mat S; - const mat S_chol; - const cube S1; - const cube SplusS0_chol; - const mat V_chol; - const mat d; - const cube QtimesV_chol; - const vec xtx; - - // This is used to create a mr_mash_precomputed_quantities object. - mr_mash_precomputed_quantities (const mat& S, const mat& S_chol, - const cube& S1, const cube& SplusS0_chol, - const mat& V_chol, const mat& d, const cube& QtimesV_chol, - const vec& xtx) : - S(S), S_chol(S_chol), S1(S1), SplusS0_chol(SplusS0_chol), - V_chol(V_chol), d(d), QtimesV_chol(QtimesV_chol), xtx(xtx) { }; -}; - - -// FUNCTION DECLARATIONS -// --------------------- - -// Inner loop -void inner_loop_general_rss (unsigned int n, const mat& XtY, mat& XtXmu1, mat& mu1, const mat& V, - const mat& Vinv, const vec& w0, const cube& S0, - const mr_mash_precomputed_quantities& precomp_quants, - bool standardize, bool compute_ELBO, bool update_V, - const vec& update_order, double eps, unsigned int nthreads, - cube& S1, mat& w1, double& var_part_tr_wERSS, - double& neg_KL, mat& var_part_ERSS); - - -// FUNCTION DEFINITIONS -// -------------------- - -// Inner loop -// -// [[Rcpp::depends(RcppArmadillo)]] -// [[Rcpp::depends(RcppParallel)]] -// [[Rcpp::export]] -List inner_loop_general_rss_rcpp (unsigned int n, const arma::mat& XtY, arma::mat& XtXmu1, arma::mat& mu1, - const arma::mat& V, const arma::mat& Vinv, const arma::vec& w0, - const arma::cube& S0, const List& precomp_quants_list, - bool standardize, bool compute_ELBO, bool update_V, - const arma::vec& update_order, double eps, unsigned int nthreads) { - unsigned int r = mu1.n_cols; - unsigned int p = mu1.n_rows; - unsigned int k = w0.n_elem; - cube S1(r,r,p); - mat w1(p,k); - mat mu1_new = mu1; - mat XtXmu1_new = XtXmu1; - double var_part_tr_wERSS; - double neg_KL; - mat var_part_ERSS(r,r); - mr_mash_precomputed_quantities precomp_quants - (as(precomp_quants_list["S"]), - as(precomp_quants_list["S_chol"]), - as(precomp_quants_list["S1"]), - as(precomp_quants_list["SplusS0_chol"]), - as(precomp_quants_list["V_chol"]), - as(precomp_quants_list["d"]), - as(precomp_quants_list["QtimesV_chol"]), - as(precomp_quants_list["xtx"])); - inner_loop_general_rss(n, XtY, XtXmu1_new, mu1_new, V, Vinv, w0, S0, precomp_quants, - standardize, compute_ELBO, update_V, update_order, eps, - nthreads, S1, w1, var_part_tr_wERSS, neg_KL, var_part_ERSS); - return List::create(Named("mu1") = mu1_new, - Named("S1") = S1, - Named("w1") = w1, - Named("var_part_tr_wERSS") = var_part_tr_wERSS, - Named("neg_KL") = neg_KL, - Named("var_part_ERSS") = var_part_ERSS); -} - -// Perform the inner loop -void inner_loop_general_rss (unsigned int n, mat& XtY, mat& XtXmu1, mat& mu1, const mat& V, - const mat& Vinv, const vec& w0, const cube& S0, - const mr_mash_precomputed_quantities& precomp_quants, - bool standardize, bool compute_ELBO, bool update_V, - const vec& update_order, double eps, unsigned int nthreads, - cube& S1, mat& w1, double& var_part_tr_wERSS, - double& neg_KL, mat& var_part_ERSS) { - unsigned int p = mu1.n_rows; - unsigned int r = mu1.n_cols; - unsigned int k = w0.n_elem; - vec x(n); - mat XtRbar_j(p,r); - vec mu1_j(r); - vec mu1_mix(r); - mat S1_mix(r,r); - vec w1_mix(k); - double logbf_mix; - double xtx; - - // Initialize ELBO parameters - var_part_tr_wERSS = 0; - neg_KL = 0; - - // Initialize V parameters - var_part_ERSS.zeros(r,r); - - // Repeat for each predictor. - for (unsigned int j : update_order) { - - if (standardize) - double xtx_j = n-1; - else - double xtx_j = precomp_quants(j); - - mu1_j = trans(mu1.row(j)); - XtY_j = trans(XtY.row(j)); - XtXmu1_j = trans(XtXmu1.row(j)); - - // Disregard the ith predictor in the expected residuals. - XtRbar_j = XtY_j - XtXmu1_j + xtx * mu1_j; - - // Update the posterior quantities for the jth - // predictor. - if (standardize) - logbf_mix = bayes_mvr_mix_standardized_X_rss(n, XtRbar_j, w0, S0, precomp_quants.S, - precomp_quants.S1, - precomp_quants.SplusS0_chol, - precomp_quants.S_chol, eps, nthreads, - mu1_mix, S1_mix, w1_mix); - else - logbf_mix = bayes_mvr_mix_centered_X_rss(XtRbar_j, V, w0, S0, xtx_j, Vinv, - precomp_quants.V_chol, precomp_quants.d, - precomp_quants.QtimesV_chol, eps, nthreads, - mu1_mix, S1_mix, w1_mix); - - mu1.row(j) = trans(mu1_mix); - S1.slice(j) = S1_mix; - w1.row(j) = trans(w1_mix); - - // Compute ELBO parameters - if (compute_ELBO) - compute_ELBO_rss_terms(var_part_tr_wERSS, neg_KL, x, Rbar_j, logbf_mix, mu1_mix, S1_mix, xtx_j, Vinv); - - // Compute V parameters - if (update_V) - compute_var_part_ERSS(var_part_ERSS, S1_mix, xtx_j); - } -} diff --git a/src/mr_mash_updates.cpp b/src/mr_mash_updates.cpp index 711f4fe..3244d2d 100644 --- a/src/mr_mash_updates.cpp +++ b/src/mr_mash_updates.cpp @@ -43,12 +43,20 @@ void inner_loop_general (const mat& X, mat& Rbar, mat& mu1, const mat& V, cube& S1, mat& w1, double& var_part_tr_wERSS, double& neg_KL, mat& var_part_ERSS); - // Missing Y imputation void impute_missing_Y (mat& Y, const mat& mu, const mat& Vinv, const mat& miss, const mat& non_miss, mat& Y_cov, double& sum_neg_ent_Y_miss); +// Inner loop rss +void inner_loop_general_rss (unsigned int n, const mat& XtY, const mat& XtXmu1, mat& mu1, const mat& V, + const mat& Vinv, const vec& w0, const cube& S0, + const mr_mash_precomputed_quantities& precomp_quants, + bool standardize, bool compute_ELBO, bool update_V, + const vec& update_order, double eps, unsigned int nthreads, + cube& S1, mat& w1, double& var_part_tr_wERSS, + double& neg_KL, mat& var_part_ERSS); + // FUNCTION DEFINITIONS // -------------------- @@ -175,11 +183,6 @@ void inner_loop_general (const mat& X, mat& Rbar, mat& mu1, const mat& V, } } - - - - - // Impute missing Y // // [[Rcpp::depends(RcppArmadillo)]] @@ -241,3 +244,112 @@ void impute_missing_Y (mat& Y, const mat& mu, const mat& Vinv, } } } + +// Inner loop rss +// +// [[Rcpp::depends(RcppArmadillo)]] +// [[Rcpp::depends(RcppParallel)]] +// [[Rcpp::export]] +List inner_loop_general_rss_rcpp (unsigned int n, const arma::mat& XtY, const arma::mat& XtXmu1, arma::mat& mu1, + const arma::mat& V, const arma::mat& Vinv, const arma::vec& w0, + const arma::cube& S0, const List& precomp_quants_list, + bool standardize, bool compute_ELBO, bool update_V, + const arma::vec& update_order, double eps, unsigned int nthreads) { + unsigned int r = mu1.n_cols; + unsigned int p = mu1.n_rows; + unsigned int k = w0.n_elem; + cube S1(r,r,p); + mat w1(p,k); + mat mu1_new = mu1; + double var_part_tr_wERSS; + double neg_KL; + mat var_part_ERSS(r,r); + mr_mash_precomputed_quantities precomp_quants + (as(precomp_quants_list["S"]), + as(precomp_quants_list["S_chol"]), + as(precomp_quants_list["S1"]), + as(precomp_quants_list["SplusS0_chol"]), + as(precomp_quants_list["V_chol"]), + as(precomp_quants_list["d"]), + as(precomp_quants_list["QtimesV_chol"]), + as(precomp_quants_list["xtx"])); + inner_loop_general_rss(n, XtY, XtXmu1, mu1_new, V, Vinv, w0, S0, precomp_quants, + standardize, compute_ELBO, update_V, update_order, eps, + nthreads, S1, w1, var_part_tr_wERSS, neg_KL, var_part_ERSS); + return List::create(Named("mu1") = mu1_new, + Named("S1") = S1, + Named("w1") = w1, + Named("var_part_tr_wERSS") = var_part_tr_wERSS, + Named("neg_KL") = neg_KL, + Named("var_part_ERSS") = var_part_ERSS); +} + +// Perform the inner loop rss +void inner_loop_general_rss (unsigned int n, const mat& XtY, const mat& XtXmu1, mat& mu1, const mat& V, + const mat& Vinv, const vec& w0, const cube& S0, + const mr_mash_precomputed_quantities& precomp_quants, + bool standardize, bool compute_ELBO, bool update_V, + const vec& update_order, double eps, unsigned int nthreads, + cube& S1, mat& w1, double& var_part_tr_wERSS, + double& neg_KL, mat& var_part_ERSS) { + unsigned int r = mu1.n_cols; + unsigned int k = w0.n_elem; + vec XtRbar_j(r); + vec mu1_j(r); + vec XtY_j(r); + vec XtXmu1_j(r); + vec mu1_mix(r); + mat S1_mix(r,r); + vec w1_mix(k); + double logbf_mix; + double xtx_j; + + // Initialize ELBO parameters + var_part_tr_wERSS = 0; + neg_KL = 0; + + // Initialize V parameters + var_part_ERSS.zeros(r,r); + + // Repeat for each predictor. + for (unsigned int j : update_order) { + + if (standardize) + xtx_j = n-1; + else + xtx_j = precomp_quants.xtx(j); + + mu1_j = trans(mu1.row(j)); + XtY_j = trans(XtY.row(j)); + XtXmu1_j = trans(XtXmu1.row(j)); + + // Disregard the ith predictor in the expected residuals. + XtRbar_j = XtY_j - XtXmu1_j + xtx_j * mu1_j; + + // Update the posterior quantities for the jth + // predictor. + if (standardize) + logbf_mix = bayes_mvr_mix_standardized_X_rss(n, XtRbar_j, w0, S0, precomp_quants.S, + precomp_quants.S1, + precomp_quants.SplusS0_chol, + precomp_quants.S_chol, eps, nthreads, + mu1_mix, S1_mix, w1_mix); + else + logbf_mix = bayes_mvr_mix_centered_X_rss(XtRbar_j, V, w0, S0, xtx_j, Vinv, + precomp_quants.V_chol, precomp_quants.d, + precomp_quants.QtimesV_chol, eps, nthreads, + mu1_mix, S1_mix, w1_mix); + + mu1.row(j) = trans(mu1_mix); + S1.slice(j) = S1_mix; + w1.row(j) = trans(w1_mix); + + // Compute ELBO parameters + if (compute_ELBO) + compute_ELBO_rss_terms(var_part_tr_wERSS, neg_KL, XtRbar_j, logbf_mix, mu1_mix, S1_mix, xtx_j, Vinv); + + // Compute V parameters + if (update_V) + compute_var_part_ERSS(var_part_ERSS, S1_mix, xtx_j); + } +} From fdb6c867517c0968a757113c290095c7a0e24584 Mon Sep 17 00:00:00 2001 From: Fabio Morgante Date: Tue, 14 Mar 2023 17:17:34 -0400 Subject: [PATCH 032/103] Run devtools::document() --- R/RcppExports.R | 4 ++++ man/mr.mash.rss.Rd | 4 ++-- src/RcppExports.cpp | 26 ++++++++++++++++++++++++++ 3 files changed, 32 insertions(+), 2 deletions(-) diff --git a/R/RcppExports.R b/R/RcppExports.R index e418778..7c60dc3 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -9,6 +9,10 @@ impute_missing_Y_rcpp <- function(Y, mu, Vinv, miss, non_miss) { .Call('_mr_mash_alpha_impute_missing_Y_rcpp', PACKAGE = 'mr.mash.alpha', Y, mu, Vinv, miss, non_miss) } +inner_loop_general_rss_rcpp <- function(n, XtY, XtXmu1, mu1, V, Vinv, w0, S0, precomp_quants_list, standardize, compute_ELBO, update_V, update_order, eps, nthreads) { + .Call('_mr_mash_alpha_inner_loop_general_rss_rcpp', PACKAGE = 'mr.mash.alpha', n, XtY, XtXmu1, mu1, V, Vinv, w0, S0, precomp_quants_list, standardize, compute_ELBO, update_V, update_order, eps, nthreads) +} + scale_rcpp <- function(M, a, b) { .Call('_mr_mash_alpha_scale_rcpp', PACKAGE = 'mr.mash.alpha', M, a, b) } diff --git a/man/mr.mash.rss.Rd b/man/mr.mash.rss.Rd index 6787c13..11e3789 100644 --- a/man/mr.mash.rss.Rd +++ b/man/mr.mash.rss.Rd @@ -15,7 +15,7 @@ mr.mash.rss( S0, w0 = rep(1/(length(S0)), length(S0)), V = NULL, - mu1_init = matrix(0, nrow = nrow(Z), ncol = ncol(Z)), + mu1_init, tol = 1e-04, convergence_criterion = c("mu1", "ELBO"), max_iter = 5000, @@ -27,7 +27,7 @@ mr.mash.rss( verbose = TRUE, update_V = FALSE, update_V_method = c("full", "diagonal"), - version = c("R", "Rcpp"), + version = c("Rcpp", "R"), e = 1e-08, ca_update_order = c("consecutive", "decreasing_logBF", "increasing_logBF"), nthreads = as.integer(NA) diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index a8c4abd..aa63683 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -50,6 +50,31 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// inner_loop_general_rss_rcpp +List inner_loop_general_rss_rcpp(unsigned int n, const arma::mat& XtY, const arma::mat& XtXmu1, arma::mat& mu1, const arma::mat& V, const arma::mat& Vinv, const arma::vec& w0, const arma::cube& S0, const List& precomp_quants_list, bool standardize, bool compute_ELBO, bool update_V, const arma::vec& update_order, double eps, unsigned int nthreads); +RcppExport SEXP _mr_mash_alpha_inner_loop_general_rss_rcpp(SEXP nSEXP, SEXP XtYSEXP, SEXP XtXmu1SEXP, SEXP mu1SEXP, SEXP VSEXP, SEXP VinvSEXP, SEXP w0SEXP, SEXP S0SEXP, SEXP precomp_quants_listSEXP, SEXP standardizeSEXP, SEXP compute_ELBOSEXP, SEXP update_VSEXP, SEXP update_orderSEXP, SEXP epsSEXP, SEXP nthreadsSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< unsigned int >::type n(nSEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type XtY(XtYSEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type XtXmu1(XtXmu1SEXP); + Rcpp::traits::input_parameter< arma::mat& >::type mu1(mu1SEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type V(VSEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type Vinv(VinvSEXP); + Rcpp::traits::input_parameter< const arma::vec& >::type w0(w0SEXP); + Rcpp::traits::input_parameter< const arma::cube& >::type S0(S0SEXP); + Rcpp::traits::input_parameter< const List& >::type precomp_quants_list(precomp_quants_listSEXP); + Rcpp::traits::input_parameter< bool >::type standardize(standardizeSEXP); + Rcpp::traits::input_parameter< bool >::type compute_ELBO(compute_ELBOSEXP); + Rcpp::traits::input_parameter< bool >::type update_V(update_VSEXP); + Rcpp::traits::input_parameter< const arma::vec& >::type update_order(update_orderSEXP); + Rcpp::traits::input_parameter< double >::type eps(epsSEXP); + Rcpp::traits::input_parameter< unsigned int >::type nthreads(nthreadsSEXP); + rcpp_result_gen = Rcpp::wrap(inner_loop_general_rss_rcpp(n, XtY, XtXmu1, mu1, V, Vinv, w0, S0, precomp_quants_list, standardize, compute_ELBO, update_V, update_order, eps, nthreads)); + return rcpp_result_gen; +END_RCPP +} // scale_rcpp arma::mat scale_rcpp(const arma::mat& M, const arma::vec& a, const arma::vec& b); RcppExport SEXP _mr_mash_alpha_scale_rcpp(SEXP MSEXP, SEXP aSEXP, SEXP bSEXP) { @@ -113,6 +138,7 @@ END_RCPP static const R_CallMethodDef CallEntries[] = { {"_mr_mash_alpha_inner_loop_general_rcpp", (DL_FUNC) &_mr_mash_alpha_inner_loop_general_rcpp, 14}, {"_mr_mash_alpha_impute_missing_Y_rcpp", (DL_FUNC) &_mr_mash_alpha_impute_missing_Y_rcpp, 5}, + {"_mr_mash_alpha_inner_loop_general_rss_rcpp", (DL_FUNC) &_mr_mash_alpha_inner_loop_general_rss_rcpp, 15}, {"_mr_mash_alpha_scale_rcpp", (DL_FUNC) &_mr_mash_alpha_scale_rcpp, 3}, {"_mr_mash_alpha_scale2_rcpp", (DL_FUNC) &_mr_mash_alpha_scale2_rcpp, 3}, {"_mr_mash_alpha_rescale_post_mean_covar_rcpp", (DL_FUNC) &_mr_mash_alpha_rescale_post_mean_covar_rcpp, 3}, From d8e70882625387f1165b533db043f971c978c430 Mon Sep 17 00:00:00 2001 From: fmorgante Date: Wed, 15 Mar 2023 14:37:21 -0400 Subject: [PATCH 033/103] Fix issue with ELBO rss computation --- src/misc.cpp | 24 ++++++++++++------------ src/misc.h | 2 +- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/misc.cpp b/src/misc.cpp index 7cdd0ab..310078f 100644 --- a/src/misc.cpp +++ b/src/misc.cpp @@ -89,29 +89,29 @@ void compute_var_part_ERSS (mat& var_part_ERSS, const mat& S1, double xtx){ // } // Function to compute some terms of the ELBO with summary data -void compute_ELBO_rss_terms (double& var_part_tr_wERSS, double& neg_KL, const vec& XtRbar_j, +void compute_ELBO_rss_terms (double& var_part_tr_wERSS, double& neg_KL, const mat& XtRbar_j, double logbf, const mat& mu1, const mat& S1, double xtx, const mat& Vinv){ var_part_tr_wERSS += (as_scalar(accu(Vinv % S1))*xtx); - mat Cm = (- mu1 * XtRbar_j - XtRbar_j * trans(mu1) + mu1 * trans(mu1) * xtx + S1*xtx); + mat mu1XtRbar_j = mu1 * trans(XtRbar_j); - neg_KL += (logbf + as_scalar(0.5*accu(Vinv % Cm))); + neg_KL += (logbf + as_scalar(0.5*accu(Vinv % (- mu1XtRbar_j - trans(mu1XtRbar_j) + mu1 * trans(mu1) * xtx + S1*xtx)))); } // // [[Rcpp::plugins("cpp11")]] // // [[Rcpp::depends(RcppArmadillo)]] // // [[Rcpp::export]] -// List compute_ELBO_rss_terms_rcpp (double var_part_tr_wERSS_init, double neg_KL_init, -// const vec& xtRbar_j, double logbf, const mat& mu1, const mat& S1, -// double xtx, const mat& Vinv) { -// +// Rcpp::List compute_ELBO_rss_terms_rcpp (double var_part_tr_wERSS_init, double neg_KL_init, +// const arma::mat& xtRbar_j, double logbf, const arma::mat& mu1, const arma::mat& S1, +// double xtx, const arma::mat& Vinv) { +// // double var_part_tr_wERSS = var_part_tr_wERSS_init; // double neg_KL = neg_KL_init; -// -// compute_ELBO_rss_terms(var_part_tr_wERSS, neg_KL, x_j, rbar_j, logbf, mu1, S1, xtx, Vinv); -// -// return List::create(Named("var_part_tr_wERSS") = var_part_tr_wERSS, -// Named("neg_KL") = neg_KL); +// +// compute_ELBO_rss_terms(var_part_tr_wERSS, neg_KL, xtRbar_j, logbf, mu1, S1, xtx, Vinv); +// +// return Rcpp::List::create(Rcpp::Named("var_part_tr_wERSS") = var_part_tr_wERSS, +// Rcpp::Named("neg_KL") = neg_KL); // } diff --git a/src/misc.h b/src/misc.h index 557ca2c..953df43 100644 --- a/src/misc.h +++ b/src/misc.h @@ -32,7 +32,7 @@ void compute_ELBO_terms (double& var_part_tr_wERSS, double& neg_KL, const arma:: double xtx, const arma::mat& Vinv); void compute_ELBO_rss_terms (double& var_part_tr_wERSS, double& neg_KL, - const arma::vec& XtRbar_j, double logbf, const arma::mat& mu1, const arma::mat& S1, + const arma::mat& XtRbar_j, double logbf, const arma::mat& mu1, const arma::mat& S1, double xtx, const arma::mat& Vinv); void compute_var_part_ERSS (arma::mat& var_part_ERSS, const arma::mat& S1, double xtx); From 2998424c8f9d4c84c3b7202ed133b8cd398bad3e Mon Sep 17 00:00:00 2001 From: fmorgante Date: Wed, 15 Mar 2023 14:38:00 -0400 Subject: [PATCH 034/103] Simplify computation --- R/elbo_rss.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/elbo_rss.R b/R/elbo_rss.R index 2bfcd8f..d12e19a 100644 --- a/R/elbo_rss.R +++ b/R/elbo_rss.R @@ -15,7 +15,9 @@ compute_ELBO_rss_terms <- function(var_part_tr_wERSS, neg_KL, xtRbar_j, bfit, xt var_part_tr_wERSS <- var_part_tr_wERSS + (sum(Vinv*bfit$S1)*xtx) - Cm <- -mu1_mat%*%xtRbar_j - tcrossprod(xtRbar_j, mu1_mat) + tcrossprod(mu1_mat)*xtx + bfit$S1*xtx + mu1xtRbar_j <- mu1_mat%*%xtRbar_j + + Cm <- - mu1xtRbar_j - t(mu1xtRbar_j) + tcrossprod(mu1_mat)*xtx + bfit$S1*xtx neg_KL <- neg_KL + (bfit$logbf + 0.5*(sum(Vinv*Cm))) From 3969ca292cfca4c1f65246faf28c4eb0bb8e9611 Mon Sep 17 00:00:00 2001 From: fmorgante Date: Wed, 15 Mar 2023 14:39:00 -0400 Subject: [PATCH 035/103] Bump up version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 2d6c558..bd85f6e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Encoding: UTF-8 Type: Package Package: mr.mash.alpha -Version: 0.2-28 -Date: 2023-03-09 +Version: 0.3-1 +Date: 2023-03-15 Title: Multiple Regression with Multivariate Adaptive Shrinkage Description: Provides an implementation of methods for multivariate multiple regression with adaptive shrinkage priors. From dc313be6ff30afd06120d7e181470d947c942b0f Mon Sep 17 00:00:00 2001 From: Fabio Morgante Date: Thu, 16 Mar 2023 15:36:33 -0400 Subject: [PATCH 036/103] Add utility function --- R/misc.R | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/R/misc.R b/R/misc.R index eac1f9c..1bb098a 100644 --- a/R/misc.R +++ b/R/misc.R @@ -338,3 +338,13 @@ extract_missing_Y_pattern <- function(Y){ return(list(miss=miss, non_miss=non_miss)) } +###Check whether a matrix is PSD +check_semi_pd <- function (A, tol) { + attr(A,"eigen") <- eigen(A,symmetric = TRUE) + v <- attr(A,"eigen")$values + v[abs(v) < tol] = 0 + return(list(matrix = A, + status = !any(v < 0), + eigenvalues = v)) +} + From 1e731b041aa9c44e1410fa6846558e183d0d17aa Mon Sep 17 00:00:00 2001 From: Fabio Morgante Date: Thu, 16 Mar 2023 15:37:02 -0400 Subject: [PATCH 037/103] Add input checks and deal with intercept --- R/mr_mash_rss.R | 150 ++++++++++++++++++++++++++---------------------- 1 file changed, 80 insertions(+), 70 deletions(-) diff --git a/R/mr_mash_rss.R b/R/mr_mash_rss.R index fbf1943..4a94be9 100644 --- a/R/mr_mash_rss.R +++ b/R/mr_mash_rss.R @@ -74,8 +74,15 @@ #' prior matrices to improve numerical stability of the updates. #' #' @param ca_update_order The order with which coordinates are -#' updated. So far, "consecutive", "decreasing_logBF", -#' "increasing_logBF" are supported. +#' updated. So far, "consecutive" is supported. +#' +#' @param X_colmeans a p-vector of variable means. +#' +#' @param Y_colmeans a r-vector of response means. +#' +#' @param check_R If \code{TRUE}, R is checked to be positive semidefinite. +#' +#' @param R_tol tolerance to declare positive semi-definiteness of R. #' #' @param nthreads Number of RcppParallel threads to use for the #' updates. When \code{nthreads} is \code{NA}, the default number of @@ -83,7 +90,7 @@ #' \code{\link[RcppParallel]{defaultNumThreads}}. This setting is #' ignored when \code{version = "R"}. #' -#' @return A mr.mash fit, stored as a list with some or all of the +#' @return A mr.mash.rss fit, stored as a list with some or all of the #' following elements: #' #' \item{mu1}{p x r matrix of posterior means for the regression @@ -104,13 +111,7 @@ #' on the regression coefficients}. #' #' \item{intercept}{r-vector containing posterior mean estimate of the -#' intercept.} -#' -#' \item{fitted}{n x r matrix of fitted values.} -#' -#' \item{G}{r x r covariance matrix of fitted values.} -#' -#' \item{pve}{r-vector of proportion of variance explained by the covariates.} +#' intercept, if \code{X_colmeans} and \code{Y_colmeans} are provided.} #' #' \item{ELBO}{Evidence Lower Bound (ELBO) at last iteration.} #' @@ -154,12 +155,11 @@ #' S0 <- expand_covs(S0, grid, zeromat=TRUE) #' #' ###Fit mr.mash -#' fit <- mr.mash.rss(Xtrain, Ytrain, S0, update_V=TRUE) -#' -#' # Compare the "fitted" values of Y against the true Y in the training set. -#' plot(fit$fitted,Ytrain,pch = 20,col = "darkblue",xlab = "true", -#' ylab = "fitted") -#' abline(a = 0,b = 1,col = "magenta",lty = "dotted") +#' covY <- cov(Ytrain) +#' corX <- cor(Xtrain) +#' n_train <- nrow(Ytrain) +#' fit <- mr.mash.rss(Bhat=univ_sumstats$Bhat, Shat=univ_sumstats$Shat, S0=S0, +#' covY=covY, R=corX, n=n_train, V=covY, update_V=TRUE) #' #' # Predict the multivariate outcomes in the test set using the fitted model. #' Ytest_est <- predict(fit,Xtest) @@ -168,17 +168,19 @@ #' abline(a = 0,b = 1,col = "magenta",lty = "dotted") #' #' @importFrom stats cov +#' @importFrom Rfast is.symmetric #' @importFrom RcppParallel defaultNumThreads #' @importFrom RcppParallel setThreadOptions #' #' @export #' mr.mash.rss <- function(Bhat, Shat, Z, R, covY, n, S0, w0=rep(1/(length(S0)), length(S0)), V=NULL, - mu1_init, tol=1e-4, convergence_criterion=c("mu1", "ELBO"), + mu1_init=NULL, tol=1e-4, convergence_criterion=c("mu1", "ELBO"), max_iter=5000, update_w0=TRUE, update_w0_method="EM", w0_threshold=0, compute_ELBO=TRUE, standardize=TRUE, verbose=TRUE, update_V=FALSE, update_V_method=c("full", "diagonal"), version=c("Rcpp", "R"), e=1e-8, ca_update_order=c("consecutive", "decreasing_logBF", "increasing_logBF"), + X_colmeans=NULL, Y_colmeans=NULL, check_R=TRUE, R_tol=1e-08, nthreads=as.integer(NA)) { if(verbose){ @@ -219,16 +221,35 @@ mr.mash.rss <- function(Bhat, Shat, Z, R, covY, n, S0, w0=rep(1/(length(S0)), le } ###Check that the inputs are in the correct format - # if(!is.matrix(Y)) - # stop("Y must be a matrix.") - # if(!is.matrix(X)) - # stop("X must be a matrix.") - # if(any(is.na(X))) - # stop("X must not contain missing values.") + if (sum(c(missing(Z), missing(Bhat) || missing(Shat))) != 1) + stop("Please provide either Z or (Bhat, Shat), but not both") + + if(missing(Z)){ + if(!is.matrix(Bhat)) + stop("Bhat must be a matrix.") + if(!is.matrix(Shat)) + stop("Shat must be a matrix.") + if(any(is.na(Bhat)) || any(is.na(Shat))) + stop("Bhat, Shat must not contain missing values.") + if(any(Shat <= 0)) + stop("Shat cannot have zero or negative elements.") + } else { + if(!is.matrix(Z)) + stop("Z must be a matrix.") + if(any(is.na(Z))) + stop("Z must not contain missing values.") + } + if(!is.null(V)){ - if(!is.matrix(V) || !isSymmetric(V)) + if(!is.matrix(V) || !is.symmetric(V)) stop("V must be a symmetric matrix.") } + if(!missing(covY)){ + if(!is.matrix(covY) || !is.symmetric(covY)) + stop("covY must be a symmetric matrix.") + } + if(!is.matrix(R) || !is.symmetric(R)) + stop("R must be a symmetric matrix.") if(!is.list(S0)) stop("S0 must be a list.") if(!is.vector(w0)) @@ -246,43 +267,38 @@ mr.mash.rss <- function(Bhat, Shat, Z, R, covY, n, S0, w0=rep(1/(length(S0)), le if(ca_update_order!="consecutive") stop("ca_update_order=\"consecutive\" is the only option with summary data for now.") - ###Obtain dimensions needed from inputs - if(!missing(Bhat)){ - p <- nrow(Bhat) - r <- ncol(Bhat) - } else if(!missing(Z)){ - p <- nrow(Z) - r <- ncol(Z) - } else{ - stop("Z or Bhat should be provided.") - } - - K <- length(S0) - # PRE-PROCESSING STEPS # -------------------- - ##Compute Z scores + ###Compute Z scores if(missing(Z)){ Z <- Bhat/Shat } - ##Compute pve-adjusted Z scores, if n is provided + Z[is.na(Z)] <- 0 + + ###Compute pve-adjusted Z scores, if n is provided if(!missing(n)) { adj <- (n-1)/(Z^2 + n - 2) Z <- sqrt(adj) * Z } - ##If covariance of Y and standard errors are provided, - ##the effects are on the *original scale*. - if(!missing(covY) & !missing(Shat)){ + ###Obtain dimensions and store dimensions names of the inputs + p <- nrow(Z) + r <- ncol(Z) + K <- length(S0) + Z_colnames <- colnames(Z) + Z_rownames <- rownames(Z) + + ###If covariance of Y and standard errors are provided, + ###the effects are on the *original scale*. + if(!missing(Shat) & !missing(covY)){ XtXdiag <- rowMeans(matrix(diag(covY), nrow=p, ncol=r, byrow=TRUE) * adj/(Shat^2)) XtX <- t(R * sqrt(XtXdiag)) * sqrt(XtXdiag) XtX <- (XtX + t(XtX))/2 XtY <- Z * sqrt(adj) * matrix(diag(covY), nrow=p, ncol=r, byrow=TRUE) / Shat - } else { - ##The effects are on the *standardized* X, y scale. + ###The effects are on the *standardized* X, y scale. XtX <- R*(n-1) XtY <- Z*sqrt(n-1) covY <- cov2cor(V) @@ -290,6 +306,14 @@ mr.mash.rss <- function(Bhat, Shat, Z, R, covY, n, S0, w0=rep(1/(length(S0)), le YtY <- covY*(n-1) + ###Check whether XtX is positive semidefinite + if(check_R){ + semi_pd <- check_semi_pd(XtX, R_tol) + if (!semi_pd$status) + stop("XtX is not a positive semidefinite matrix") + } + + ###Adjust XtX and XtY if X is standardized if(standardize){ dXtX <- diag(XtX) sx <- sqrt(dXtX/(n-1)) @@ -298,16 +322,12 @@ mr.mash.rss <- function(Bhat, Shat, Z, R, covY, n, S0, w0=rep(1/(length(S0)), le XtY <- XtY / sx } - ###Store dimensions names of the inputs - Z_colnames <- colnames(Z) - Z_rownames <- rownames(Z) - ###Add number to diagonal elements of the prior matrices (improves ###numerical stability) S0 <- lapply(S0, makePD, e=e) - ##Initialize regression coefficients to 0 if not provided - if(missing(mu1_init)){ + ###Initialize regression coefficients to 0 if not provided + if(is.null(mu1_init)){ mu1_init <- matrix(0, nrow=p, ncol=r) } @@ -521,14 +541,6 @@ mr.mash.rss <- function(Bhat, Shat, Z, R, covY, n, S0, w0=rep(1/(length(S0)), le # POST-PROCESSING STEPS # -------------------- - ###Compute the "fitted" values. -# fitted_vals <- addtocols(X %*% mu1_t, muy) - - ###Compute covariance of fitted values and PVE -# cov_fitted <- cov(fitted_vals) -# var_fitted <- diag(cov_fitted) -# pve <- var_fitted/(var_fitted+diag(V)) - if(standardize){ ###Rescale posterior means and covariance of coefficients. In the ###context of predicting Y, this rescaling is equivalent to @@ -543,7 +555,10 @@ mr.mash.rss <- function(Bhat, Shat, Z, R, covY, n, S0, w0=rep(1/(length(S0)), le ###with respect to the *rescaled* coefficients to recover the ###correct fitted values. This is why this is done after rescaling ###the coefficients above. -# intercept <- drop(muy - mux %*% mu1_t) + if(!is.null(X_colmeans) & !is.null(Y_colmeans)){ + intercept <- drop(Y_colmeans - X_colmeans %*% mu1_t) + names(intercept) <- Z_colnames + } ###Assign names to outputs dimensions S0_names <- names(S0) @@ -558,13 +573,7 @@ mr.mash.rss <- function(Bhat, Shat, Z, R, covY, n, S0, w0=rep(1/(length(S0)), le names(w0) <- S0_names rownames(V) <- Z_colnames colnames(V) <- Z_colnames - # rownames(fitted_vals) <- Y_rownames - # colnames(fitted_vals) <- Y_colnames - # rownames(cov_fitted) <- Y_colnames - # colnames(cov_fitted) <- Y_colnames - # names(pve) <- Y_colnames - # names(intercept) <- Y_colnames - + ###Remove unused rows of progress progress <- progress[rowSums(is.na(progress)) != ncol(progress), ] @@ -577,18 +586,19 @@ mr.mash.rss <- function(Bhat, Shat, Z, R, covY, n, S0, w0=rep(1/(length(S0)), le ###the Evidence Lower Bound (ELBO; if computed) and imputed responses (Y; if ###missing values were present). out <- list(mu1=mu1_t, S1=S1_t, w1=w1_t, V=V, w0=w0, S0=simplify2array_custom(S0), - progress=progress, #intercept=intercept, fitted=fitted_vals, G=cov_fitted, pve=pve, - converged=converged) + intercept=NA, progress=progress, converged=converged) if(compute_ELBO) ###Append ELBO to the output out$ELBO <- ELBO + if(!is.null(X_colmeans) & !is.null(Y_colmeans)) + out$intercept <- intercept - class(out) <- c("mr.mash", "list") + class(out) <- c("mr.mash.rss", "list") if(verbose){ cat("Done!\n") toc <- Sys.time() - cat("mr.mash successfully executed in", difftime(toc, tic, units="mins"), + cat("mr.mash.rss successfully executed in", difftime(toc, tic, units="mins"), "minutes!\n") } From eb85cf4678f818f60a7c19fcee3daab518056414 Mon Sep 17 00:00:00 2001 From: Fabio Morgante Date: Thu, 16 Mar 2023 15:38:54 -0400 Subject: [PATCH 038/103] Add coeff and predict functions for the mr.mash.rss class --- R/mr_mash_rss_predict.R | 47 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 R/mr_mash_rss_predict.R diff --git a/R/mr_mash_rss_predict.R b/R/mr_mash_rss_predict.R new file mode 100644 index 0000000..b4517ed --- /dev/null +++ b/R/mr_mash_rss_predict.R @@ -0,0 +1,47 @@ +#' @title Predict future observations from mr.mash.rss fit. +#' +#' @param object a mr.mash.rss fit. +#' +#' @param newx a new value for X at which to do predictions. +#' +#' @param \dots Additional arguments (not used). +#' +#' @return Matrix of predicted values. +#' +#' @export +#' @export predict.mr.mash.rss +#' +predict.mr.mash <- function(object, newx, ...){ + if(!is.matrix(newx)) + stop("X must be a matrix.") + if (any(is.na(newx))) + stop("X must not contain missing values.") + + if(!is.na(object$intercept)) + return(with(object,addtocols(newx %*% mu1,intercept))) + else + return(with(object,newx %*% mu1)) +} + +#' @title Extract coefficients from mr.mash.rss fit. +#' +#' @param object a mr.mash fit. +#' +#' @param \dots Other arguments (not used). +#' +#' @return p x r or (p+1) x r matrix of coefficients, +#' depending on whether an intercept was computed. +#' +#' @export +#' @export coef.mr.mash.rss +#' +coef.mr.mash <- function(object, ...){ + if(!is.na(object$intercept)){ + coeffs <- rbind(object$intercept, object$mu1) + rownames(coeffs)[1] <- "(Intercept)" + } else { + coeffs <- object$mu1 + } + + return(coeffs) +} From f5ff3f250022c06060f780b6002e03255a94f587 Mon Sep 17 00:00:00 2001 From: Fabio Morgante Date: Thu, 16 Mar 2023 15:39:26 -0400 Subject: [PATCH 039/103] Add clarification --- R/mr_mash_rss.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/mr_mash_rss.R b/R/mr_mash_rss.R index 4a94be9..1995f16 100644 --- a/R/mr_mash_rss.R +++ b/R/mr_mash_rss.R @@ -111,7 +111,8 @@ #' on the regression coefficients}. #' #' \item{intercept}{r-vector containing posterior mean estimate of the -#' intercept, if \code{X_colmeans} and \code{Y_colmeans} are provided.} +#' intercept, if \code{X_colmeans} and \code{Y_colmeans} are provided. +#' Otherwise, \code{NA} is output.} #' #' \item{ELBO}{Evidence Lower Bound (ELBO) at last iteration.} #' From b0c9f9beddde77101905331ad50950b1fc5208a4 Mon Sep 17 00:00:00 2001 From: Fabio Morgante Date: Thu, 16 Mar 2023 15:40:35 -0400 Subject: [PATCH 040/103] Add dependency and bump up version --- DESCRIPTION | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index bd85f6e..9043434 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Encoding: UTF-8 Type: Package Package: mr.mash.alpha -Version: 0.3-1 -Date: 2023-03-15 +Version: 0.3-2 +Date: 2023-03-16 Title: Multiple Regression with Multivariate Adaptive Shrinkage Description: Provides an implementation of methods for multivariate multiple regression with adaptive shrinkage priors. @@ -24,7 +24,8 @@ Imports: mashr (>= 0.2.41), ebnm, flashier, - parallel + parallel, + Rfast Suggests: testthat, varbvs From 2981c394f8bbb18eaaa9f13898de8135fefe54fe Mon Sep 17 00:00:00 2001 From: Fabio Morgante Date: Thu, 16 Mar 2023 15:42:00 -0400 Subject: [PATCH 041/103] Fix example --- R/mr_mash_rss.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/mr_mash_rss.R b/R/mr_mash_rss.R index 1995f16..d8730b8 100644 --- a/R/mr_mash_rss.R +++ b/R/mr_mash_rss.R @@ -160,7 +160,8 @@ #' corX <- cor(Xtrain) #' n_train <- nrow(Ytrain) #' fit <- mr.mash.rss(Bhat=univ_sumstats$Bhat, Shat=univ_sumstats$Shat, S0=S0, -#' covY=covY, R=corX, n=n_train, V=covY, update_V=TRUE) +#' covY=covY, R=corX, n=n_train, V=covY, update_V=TRUE, +#' X_colmeans=colMeans(Xtrain), Y_colmeans=colMeans(Ytrain)) #' #' # Predict the multivariate outcomes in the test set using the fitted model. #' Ytest_est <- predict(fit,Xtest) From d6476711db101a3c7c48913e109ab20e62d9b6e9 Mon Sep 17 00:00:00 2001 From: Fabio Morgante Date: Thu, 16 Mar 2023 15:47:33 -0400 Subject: [PATCH 042/103] Fix names --- R/mr_mash_rss_predict.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/mr_mash_rss_predict.R b/R/mr_mash_rss_predict.R index b4517ed..5d96bbd 100644 --- a/R/mr_mash_rss_predict.R +++ b/R/mr_mash_rss_predict.R @@ -11,7 +11,7 @@ #' @export #' @export predict.mr.mash.rss #' -predict.mr.mash <- function(object, newx, ...){ +predict.mr.mash.rss <- function(object, newx, ...){ if(!is.matrix(newx)) stop("X must be a matrix.") if (any(is.na(newx))) @@ -35,7 +35,7 @@ predict.mr.mash <- function(object, newx, ...){ #' @export #' @export coef.mr.mash.rss #' -coef.mr.mash <- function(object, ...){ +coef.mr.mash.rss <- function(object, ...){ if(!is.na(object$intercept)){ coeffs <- rbind(object$intercept, object$mu1) rownames(coeffs)[1] <- "(Intercept)" From 54c8206807365871eb610dbc0e034da229b6c5aa Mon Sep 17 00:00:00 2001 From: Fabio Morgante Date: Thu, 16 Mar 2023 15:50:31 -0400 Subject: [PATCH 043/103] Run devtools::document() --- NAMESPACE | 5 +++++ man/coef.mr.mash.rss.Rd | 20 +++++++++++++++++++ man/mr.mash.rss.Rd | 40 ++++++++++++++++++++++---------------- man/predict.mr.mash.rss.Rd | 21 ++++++++++++++++++++ 4 files changed, 69 insertions(+), 17 deletions(-) create mode 100644 man/coef.mr.mash.rss.Rd create mode 100644 man/predict.mr.mash.rss.Rd diff --git a/NAMESPACE b/NAMESPACE index a909fd9..fcf5582 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,9 +1,12 @@ # Generated by roxygen2: do not edit by hand S3method(coef,mr.mash) +S3method(coef,mr.mash.rss) S3method(predict,mr.mash) +S3method(predict,mr.mash.rss) export(autoselect.mixsd) export(coef.mr.mash) +export(coef.mr.mash.rss) export(compute_canonical_covs) export(compute_data_driven_covs) export(compute_univariate_sumstats) @@ -11,12 +14,14 @@ export(expand_covs) export(mr.mash) export(mr.mash.rss) export(predict.mr.mash) +export(predict.mr.mash.rss) export(simulate_mr_mash_data) importFrom(MBSP,matrix_normal) importFrom(Rcpp,evalCpp) importFrom(RcppParallel,RcppParallelLibs) importFrom(RcppParallel,defaultNumThreads) importFrom(RcppParallel,setThreadOptions) +importFrom(Rfast,is.symmetric) importFrom(ebnm,ebnm_normal) importFrom(ebnm,ebnm_normal_scale_mixture) importFrom(flashier,flash) diff --git a/man/coef.mr.mash.rss.Rd b/man/coef.mr.mash.rss.Rd new file mode 100644 index 0000000..42f9fee --- /dev/null +++ b/man/coef.mr.mash.rss.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mr_mash_rss_predict.R +\name{coef.mr.mash.rss} +\alias{coef.mr.mash.rss} +\title{Extract coefficients from mr.mash.rss fit.} +\usage{ +\method{coef}{mr.mash.rss}(object, ...) +} +\arguments{ +\item{object}{a mr.mash fit.} + +\item{\dots}{Other arguments (not used).} +} +\value{ +p x r or (p+1) x r matrix of coefficients, + depending on whether an intercept was computed. +} +\description{ +Extract coefficients from mr.mash.rss fit. +} diff --git a/man/mr.mash.rss.Rd b/man/mr.mash.rss.Rd index 11e3789..cae2a75 100644 --- a/man/mr.mash.rss.Rd +++ b/man/mr.mash.rss.Rd @@ -15,7 +15,7 @@ mr.mash.rss( S0, w0 = rep(1/(length(S0)), length(S0)), V = NULL, - mu1_init, + mu1_init = NULL, tol = 1e-04, convergence_criterion = c("mu1", "ELBO"), max_iter = 5000, @@ -30,6 +30,10 @@ mr.mash.rss( version = c("Rcpp", "R"), e = 1e-08, ca_update_order = c("consecutive", "decreasing_logBF", "increasing_logBF"), + X_colmeans = NULL, + Y_colmeans = NULL, + check_R = TRUE, + R_tol = 1e-08, nthreads = as.integer(NA) ) } @@ -104,8 +108,15 @@ coordinate ascent updates.} prior matrices to improve numerical stability of the updates.} \item{ca_update_order}{The order with which coordinates are -updated. So far, "consecutive", "decreasing_logBF", -"increasing_logBF" are supported.} +updated. So far, "consecutive" is supported.} + +\item{X_colmeans}{a p-vector of variable means.} + +\item{Y_colmeans}{a r-vector of response means.} + +\item{check_R}{If \code{TRUE}, R is checked to be positive semidefinite.} + +\item{R_tol}{tolerance to declare positive semi-definiteness of R.} \item{nthreads}{Number of RcppParallel threads to use for the updates. When \code{nthreads} is \code{NA}, the default number of @@ -114,7 +125,7 @@ threads is used; see ignored when \code{version = "R"}.} } \value{ -A mr.mash fit, stored as a list with some or all of the +A mr.mash.rss fit, stored as a list with some or all of the following elements: \item{mu1}{p x r matrix of posterior means for the regression @@ -135,13 +146,8 @@ following elements: on the regression coefficients}. \item{intercept}{r-vector containing posterior mean estimate of the - intercept.} - -\item{fitted}{n x r matrix of fitted values.} - -\item{G}{r x r covariance matrix of fitted values.} - -\item{pve}{r-vector of proportion of variance explained by the covariates.} + intercept, if \code{X_colmeans} and \code{Y_colmeans} are provided. + Otherwise, \code{NA} is output.} \item{ELBO}{Evidence Lower Bound (ELBO) at last iteration.} @@ -189,12 +195,12 @@ S0 <- compute_canonical_covs(ncol(Ytrain), singletons=TRUE, S0 <- expand_covs(S0, grid, zeromat=TRUE) ###Fit mr.mash -fit <- mr.mash.rss(Xtrain, Ytrain, S0, update_V=TRUE) - -# Compare the "fitted" values of Y against the true Y in the training set. -plot(fit$fitted,Ytrain,pch = 20,col = "darkblue",xlab = "true", - ylab = "fitted") -abline(a = 0,b = 1,col = "magenta",lty = "dotted") +covY <- cov(Ytrain) +corX <- cor(Xtrain) +n_train <- nrow(Ytrain) +fit <- mr.mash.rss(Bhat=univ_sumstats$Bhat, Shat=univ_sumstats$Shat, S0=S0, + covY=covY, R=corX, n=n_train, V=covY, update_V=TRUE, + X_colmeans=colMeans(Xtrain), Y_colmeans=colMeans(Ytrain)) # Predict the multivariate outcomes in the test set using the fitted model. Ytest_est <- predict(fit,Xtest) diff --git a/man/predict.mr.mash.rss.Rd b/man/predict.mr.mash.rss.Rd new file mode 100644 index 0000000..193cbc1 --- /dev/null +++ b/man/predict.mr.mash.rss.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mr_mash_rss_predict.R +\name{predict.mr.mash.rss} +\alias{predict.mr.mash.rss} +\title{Predict future observations from mr.mash.rss fit.} +\usage{ +\method{predict}{mr.mash.rss}(object, newx, ...) +} +\arguments{ +\item{object}{a mr.mash.rss fit.} + +\item{newx}{a new value for X at which to do predictions.} + +\item{\dots}{Additional arguments (not used).} +} +\value{ +Matrix of predicted values. +} +\description{ +Predict future observations from mr.mash.rss fit. +} From ce29106fd096199a66c41764e85b2e45debbdc75 Mon Sep 17 00:00:00 2001 From: Fabio Morgante Date: Thu, 16 Mar 2023 15:50:57 -0400 Subject: [PATCH 044/103] Bump up version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 9043434..5b29f6a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Encoding: UTF-8 Type: Package Package: mr.mash.alpha -Version: 0.3-2 +Version: 0.3-3 Date: 2023-03-16 Title: Multiple Regression with Multivariate Adaptive Shrinkage Description: Provides an implementation of methods for multivariate From 65a72065495097a2adcc4d5dbcd469a54144f4c4 Mon Sep 17 00:00:00 2001 From: Fabio Morgante Date: Thu, 16 Mar 2023 15:55:11 -0400 Subject: [PATCH 045/103] Fix issue --- R/mr_mash_rss_predict.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/mr_mash_rss_predict.R b/R/mr_mash_rss_predict.R index 5d96bbd..b5aa144 100644 --- a/R/mr_mash_rss_predict.R +++ b/R/mr_mash_rss_predict.R @@ -17,7 +17,7 @@ predict.mr.mash.rss <- function(object, newx, ...){ if (any(is.na(newx))) stop("X must not contain missing values.") - if(!is.na(object$intercept)) + if(any(!is.na(object$intercept))) return(with(object,addtocols(newx %*% mu1,intercept))) else return(with(object,newx %*% mu1)) @@ -36,7 +36,7 @@ predict.mr.mash.rss <- function(object, newx, ...){ #' @export coef.mr.mash.rss #' coef.mr.mash.rss <- function(object, ...){ - if(!is.na(object$intercept)){ + if(any(!is.na(object$intercept))){ coeffs <- rbind(object$intercept, object$mu1) rownames(coeffs)[1] <- "(Intercept)" } else { From 36cc18591bcc17a43fbc091b2330f82fa6f4b084 Mon Sep 17 00:00:00 2001 From: fmorgante Date: Fri, 17 Mar 2023 16:18:51 -0400 Subject: [PATCH 046/103] Fix bug --- .../test_compute_rank_variables_BFmix_R_vs_Rcpp.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/testthat/test_compute_rank_variables_BFmix_R_vs_Rcpp.R b/tests/testthat/test_compute_rank_variables_BFmix_R_vs_Rcpp.R index e34ecc7..4f584e2 100644 --- a/tests/testthat/test_compute_rank_variables_BFmix_R_vs_Rcpp.R +++ b/tests/testthat/test_compute_rank_variables_BFmix_R_vs_Rcpp.R @@ -38,18 +38,18 @@ test_that("R and Rcpp version of compute_rank_variables_BFmix return the same re eps <- .Machine$double.eps ###Compute logbf with standardize=TRUE - comps_r <- precompute_quants(X, V, S0mix, standardize=TRUE, version="R") + comps_r <- precompute_quants(n, V, S0mix, standardize=TRUE, version="R") ranks_r <- compute_rank_variables_BFmix(X, Y, V, Vinv, w0, S0mix, comps_r, standardize=TRUE, version="R", decreasing=TRUE, eps) - comps_rcpp <- precompute_quants(X, V, S0mix, standardize=TRUE, version="Rcpp") + comps_rcpp <- precompute_quants(n, V, S0mix, standardize=TRUE, version="Rcpp") ranks_rcpp <- compute_rank_variables_BFmix(X, Y, V, Vinv, w0, S0mix, comps_rcpp, standardize=TRUE, version="Rcpp", decreasing=TRUE, eps, nthreads=1) ###Compute logbf with standardize=FALSE - comps1_r <- precompute_quants(X, V, S0mix, standardize=FALSE, version="R") + comps1_r <- precompute_quants(n, V, S0mix, standardize=FALSE, version="R") comps1_r$xtx <- colSums(X^2) ranks1_r <- compute_rank_variables_BFmix(X, Y, V, Vinv, w0, S0mix, comps1_r, standardize=FALSE, version="R", decreasing=TRUE, eps) - comps1_rcpp <- precompute_quants(X, V, S0mix, standardize=FALSE, version="Rcpp") + comps1_rcpp <- precompute_quants(n, V, S0mix, standardize=FALSE, version="Rcpp") comps1_rcpp$xtx <- colSums(X^2) ranks1_rcpp <- compute_rank_variables_BFmix(X, Y, V, Vinv, w0, S0mix, comps1_rcpp, standardize=FALSE, version="Rcpp", decreasing=TRUE, eps, nthreads=1) From 774aba493fb9a5f8ad17dde3842be294c5be8248 Mon Sep 17 00:00:00 2001 From: Fabio Morgante Date: Tue, 21 Mar 2023 11:33:59 -0400 Subject: [PATCH 047/103] Add Deborah to license --- LICENSE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/LICENSE b/LICENSE index f3cdc0a..d38957b 100644 --- a/LICENSE +++ b/LICENSE @@ -1,2 +1,2 @@ YEAR: 2020 -COPYRIGHT HOLDER: Fabio Morgante, Peter Carbonetto, Matthew Stephens +COPYRIGHT HOLDER: Fabio Morgante, Deborah Kunkel, Peter Carbonetto, Matthew Stephens From 933214688c8db848dfd32a970cbc012f85d5d9bf Mon Sep 17 00:00:00 2001 From: fmorgante Date: Wed, 22 Mar 2023 08:52:04 -0400 Subject: [PATCH 048/103] Improve simulation function --- NAMESPACE | 2 +- R/simulate_demo_data.R | 14 +++++++++----- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index fcf5582..a747fe4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -22,6 +22,7 @@ importFrom(RcppParallel,RcppParallelLibs) importFrom(RcppParallel,defaultNumThreads) importFrom(RcppParallel,setThreadOptions) importFrom(Rfast,is.symmetric) +importFrom(Rfast,rmvnorm) importFrom(ebnm,ebnm_normal) importFrom(ebnm,ebnm_normal_scale_mixture) importFrom(flashier,flash) @@ -35,7 +36,6 @@ importFrom(matrixStats,colMeans2) importFrom(matrixStats,colSds) importFrom(matrixStats,colVars) importFrom(mvtnorm,dmvnorm) -importFrom(mvtnorm,rmvnorm) importFrom(parallel,makeCluster) importFrom(parallel,parLapply) importFrom(parallel,stopCluster) diff --git a/R/simulate_demo_data.R b/R/simulate_demo_data.R index 66f02f0..98ab173 100644 --- a/R/simulate_demo_data.R +++ b/R/simulate_demo_data.R @@ -58,7 +58,7 @@ #' \item{causal_vars_to_mixture_comps}{p_causal-vector of indexes indicating from which #' mixture components each causal effect comes.} #' -#' @importFrom mvtnorm rmvnorm +#' @importFrom Rfast rmvnorm #' @importFrom MBSP matrix_normal #' @importFrom matrixStats colVars #' @@ -119,10 +119,14 @@ simulate_mr_mash_data <- function(n, p, p_causal, r, r_causal=list(1:r), interce B[causal_variables, ] <- B_causal ##Simulate X from N_r(0, Gamma) where Gamma is a given covariance matrix across variables - Gamma_offdiag <- X_scale*X_cor - Gamma <- matrix(Gamma_offdiag, nrow=p, ncol=p) - diag(Gamma) <- X_scale - X <- rmvnorm(n=n, mean=rep(0, p), sigma=Gamma) + if(X_cor != 0){ + Gamma_offdiag <- X_scale*X_cor + Gamma <- matrix(Gamma_offdiag, nrow=p, ncol=p) + diag(Gamma) <- X_scale + X <- rmvnorm(n=n, mean=rep(0, p), sigma=Gamma) + } else { + X <- replicate(p, rnorm(n=n, mean=0, sd=sqrt(X_scale))) + } X <- scale_fast2(X, scale=FALSE)$M ##Compute G and its variance From 3e24d3b39be917307058d0f85d2223775ae5f031 Mon Sep 17 00:00:00 2001 From: fmorgante Date: Wed, 22 Mar 2023 08:53:19 -0400 Subject: [PATCH 049/103] Bump up version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 5b29f6a..6ae8647 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Encoding: UTF-8 Type: Package Package: mr.mash.alpha -Version: 0.3-3 +Version: 0.3-4 Date: 2023-03-16 Title: Multiple Regression with Multivariate Adaptive Shrinkage Description: Provides an implementation of methods for multivariate From 1b6fac64b48b9f37af41e13895874515ed6c15c5 Mon Sep 17 00:00:00 2001 From: fmorgante Date: Wed, 22 Mar 2023 10:18:29 -0400 Subject: [PATCH 050/103] Add seed argument --- R/simulate_demo_data.R | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/R/simulate_demo_data.R b/R/simulate_demo_data.R index 98ab173..4d412d2 100644 --- a/R/simulate_demo_data.R +++ b/R/simulate_demo_data.R @@ -32,6 +32,10 @@ #' @param X_scale scalar indicating the diagonal value for Gamma. #' #' @param V_cor scalar indicating the positive correlation [0, 1] between residuals +#' +#' @param seed seed for random number generation used by \code{Rfast::rmvnorm}. +#' However, some computations will also need a general \code{set.seed()} to be +#' reproducible. #' #' @return A list with some or all of the #' following elements: @@ -66,16 +70,17 @@ #' #' #' @examples +#' set.seed(1) #' dat <- simulate_mr_mash_data(n=50, p=40, p_causal=20, r=5, #' r_causal=list(1:2, 3:4), intercepts=rep(1, 5), #' pve=0.2, B_cor=c(0, 1), B_scale=c(0.5, 1), #' w=c(0.5, 0.5), X_cor=0.5, X_scale=1, -#' V_cor=0) +#' V_cor=0, seed=1) #' #' simulate_mr_mash_data <- function(n, p, p_causal, r, r_causal=list(1:r), intercepts=rep(1, r), pve=0.2, B_cor=1, B_scale=1, w=1, - X_cor=0, X_scale=1, V_cor=0){ + X_cor=0, X_scale=1, V_cor=0, seed=NULL){ ##Check that the inputs are correct if(length(intercepts)!=r) stop("intercepts must be of length equal to r.") @@ -87,6 +92,8 @@ simulate_mr_mash_data <- function(n, p, p_causal, r, r_causal=list(1:r), interce stop("Elements of w must sum to 1.") if(length(pve)!=1 & length(pve)!=r) stop("pve must be of length equal to 1 or r.") + if(is.null(seed)) + stop("seed argument must be provided.") ##Get number of mixture components K <- length(w) @@ -107,12 +114,12 @@ simulate_mr_mash_data <- function(n, p, p_causal, r, r_causal=list(1:r), interce for(j in 1:p_causal){ comp_to_use <- mixcomps[j] r_causal_mix <- r_causal[[comp_to_use]] - B_causal[j, r_causal_mix] <- rmvnorm(n=1, mean=rep(0, length(r_causal_mix)), sigma=Sigma[[comp_to_use]]) + B_causal[j, r_causal_mix] <- rmvnorm(n=1, mean=rep(0, length(r_causal_mix)), sigma=Sigma[[comp_to_use]], seed=seed) } } else { r_causal_length <- length(r_causal[[1]]) r_causal_index <- r_causal[[1]] - B_causal[, r_causal_index] <- rmvnorm(n=p_causal, mean=rep(0, r_causal_length), sigma=Sigma[[1]]) + B_causal[, r_causal_index] <- rmvnorm(n=p_causal, mean=rep(0, r_causal_length), sigma=Sigma[[1]], seed=seed) } B <- matrix(0, ncol=r, nrow=p) causal_variables <- sample(x=(1:p), size=p_causal) @@ -123,7 +130,7 @@ simulate_mr_mash_data <- function(n, p, p_causal, r, r_causal=list(1:r), interce Gamma_offdiag <- X_scale*X_cor Gamma <- matrix(Gamma_offdiag, nrow=p, ncol=p) diag(Gamma) <- X_scale - X <- rmvnorm(n=n, mean=rep(0, p), sigma=Gamma) + X <- rmvnorm(n=n, mean=rep(0, p), sigma=Gamma, seed) } else { X <- replicate(p, rnorm(n=n, mean=0, sd=sqrt(X_scale))) } From d265d1b9e768225012566f2bdc7a5268340581e7 Mon Sep 17 00:00:00 2001 From: fmorgante Date: Wed, 22 Mar 2023 10:22:10 -0400 Subject: [PATCH 051/103] Run devtools::document() --- man/simulate_mr_mash_data.Rd | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/man/simulate_mr_mash_data.Rd b/man/simulate_mr_mash_data.Rd index 3e8b8e2..b380b0e 100644 --- a/man/simulate_mr_mash_data.Rd +++ b/man/simulate_mr_mash_data.Rd @@ -17,7 +17,8 @@ simulate_mr_mash_data( w = 1, X_cor = 0, X_scale = 1, - V_cor = 0 + V_cor = 0, + seed = NULL ) } \arguments{ @@ -50,6 +51,10 @@ proportions associated to each mixture component.} \item{X_scale}{scalar indicating the diagonal value for Gamma.} \item{V_cor}{scalar indicating the positive correlation [0, 1] between residuals} + +\item{seed}{seed for random number generation used by \code{Rfast::rmvnorm}. +However, some computations will also need a general \code{set.seed()} to be +reproducible.} } \value{ A list with some or all of the @@ -82,11 +87,12 @@ Function to simulate data from \eqn{MN_{nxr}(XB, I, V)}, where \eqn{X \sim N_p(0 \eqn{B \sim \sum_k w_k N_r(0, Sigma_k)}, with \eqn{Gamma}, \eqn{w_k}, \eqn{Sigma_k}, and \eqn{V} defined by the user. } \examples{ +set.seed(1) dat <- simulate_mr_mash_data(n=50, p=40, p_causal=20, r=5, r_causal=list(1:2, 3:4), intercepts=rep(1, 5), pve=0.2, B_cor=c(0, 1), B_scale=c(0.5, 1), w=c(0.5, 0.5), X_cor=0.5, X_scale=1, - V_cor=0) + V_cor=0, seed=1) } From 62d06025a748fab2b47578fbc24b10a50449ad0a Mon Sep 17 00:00:00 2001 From: fmorgante Date: Wed, 22 Mar 2023 10:36:21 -0400 Subject: [PATCH 052/103] Additional improvements --- R/simulate_demo_data.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/simulate_demo_data.R b/R/simulate_demo_data.R index 4d412d2..814e530 100644 --- a/R/simulate_demo_data.R +++ b/R/simulate_demo_data.R @@ -34,8 +34,8 @@ #' @param V_cor scalar indicating the positive correlation [0, 1] between residuals #' #' @param seed seed for random number generation used by \code{Rfast::rmvnorm}. -#' However, some computations will also need a general \code{set.seed()} to be -#' reproducible. +#' and \code{Rfast::Rnorm}. However, some computations will also need a general +#' \code{set.seed()} to be reproducible. #' #' @return A list with some or all of the #' following elements: @@ -62,7 +62,7 @@ #' \item{causal_vars_to_mixture_comps}{p_causal-vector of indexes indicating from which #' mixture components each causal effect comes.} #' -#' @importFrom Rfast rmvnorm +#' @importFrom Rfast rmvnorm Rnorm #' @importFrom MBSP matrix_normal #' @importFrom matrixStats colVars #' @@ -132,7 +132,7 @@ simulate_mr_mash_data <- function(n, p, p_causal, r, r_causal=list(1:r), interce diag(Gamma) <- X_scale X <- rmvnorm(n=n, mean=rep(0, p), sigma=Gamma, seed) } else { - X <- replicate(p, rnorm(n=n, mean=0, sd=sqrt(X_scale))) + X <- replicate(p, Rnorm(n=n, m=0, s=sqrt(X_scale), seed=seed)) } X <- scale_fast2(X, scale=FALSE)$M From 7cdcec3284ea8c212bfe58dca1a55792694ebc86 Mon Sep 17 00:00:00 2001 From: fmorgante Date: Wed, 22 Mar 2023 10:37:06 -0400 Subject: [PATCH 053/103] Run devtools::document() --- NAMESPACE | 1 + man/simulate_mr_mash_data.Rd | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index a747fe4..3424d21 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -21,6 +21,7 @@ importFrom(Rcpp,evalCpp) importFrom(RcppParallel,RcppParallelLibs) importFrom(RcppParallel,defaultNumThreads) importFrom(RcppParallel,setThreadOptions) +importFrom(Rfast,Rnorm) importFrom(Rfast,is.symmetric) importFrom(Rfast,rmvnorm) importFrom(ebnm,ebnm_normal) diff --git a/man/simulate_mr_mash_data.Rd b/man/simulate_mr_mash_data.Rd index b380b0e..89fb37a 100644 --- a/man/simulate_mr_mash_data.Rd +++ b/man/simulate_mr_mash_data.Rd @@ -53,8 +53,8 @@ proportions associated to each mixture component.} \item{V_cor}{scalar indicating the positive correlation [0, 1] between residuals} \item{seed}{seed for random number generation used by \code{Rfast::rmvnorm}. -However, some computations will also need a general \code{set.seed()} to be -reproducible.} +and \code{Rfast::Rnorm}. However, some computations will also need a general +\code{set.seed()} to be reproducible.} } \value{ A list with some or all of the From 03b6b0ca8870ec54e629295ff141708b1188863e Mon Sep 17 00:00:00 2001 From: fmorgante Date: Wed, 22 Mar 2023 10:42:12 -0400 Subject: [PATCH 054/103] Fix bugs --- R/simulate_demo_data.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/simulate_demo_data.R b/R/simulate_demo_data.R index 814e530..0345a52 100644 --- a/R/simulate_demo_data.R +++ b/R/simulate_demo_data.R @@ -114,12 +114,12 @@ simulate_mr_mash_data <- function(n, p, p_causal, r, r_causal=list(1:r), interce for(j in 1:p_causal){ comp_to_use <- mixcomps[j] r_causal_mix <- r_causal[[comp_to_use]] - B_causal[j, r_causal_mix] <- rmvnorm(n=1, mean=rep(0, length(r_causal_mix)), sigma=Sigma[[comp_to_use]], seed=seed) + B_causal[j, r_causal_mix] <- rmvnorm(n=1, mu=rep(0, length(r_causal_mix)), sigma=Sigma[[comp_to_use]], seed=seed) } } else { r_causal_length <- length(r_causal[[1]]) r_causal_index <- r_causal[[1]] - B_causal[, r_causal_index] <- rmvnorm(n=p_causal, mean=rep(0, r_causal_length), sigma=Sigma[[1]], seed=seed) + B_causal[, r_causal_index] <- rmvnorm(n=p_causal, mu=rep(0, r_causal_length), sigma=Sigma[[1]], seed=seed) } B <- matrix(0, ncol=r, nrow=p) causal_variables <- sample(x=(1:p), size=p_causal) @@ -130,7 +130,7 @@ simulate_mr_mash_data <- function(n, p, p_causal, r, r_causal=list(1:r), interce Gamma_offdiag <- X_scale*X_cor Gamma <- matrix(Gamma_offdiag, nrow=p, ncol=p) diag(Gamma) <- X_scale - X <- rmvnorm(n=n, mean=rep(0, p), sigma=Gamma, seed) + X <- rmvnorm(n=n, mu=rep(0, p), sigma=Gamma, seed) } else { X <- replicate(p, Rnorm(n=n, m=0, s=sqrt(X_scale), seed=seed)) } From 8d921f79071203d3e0a89b3826cfe27cdd77c07a Mon Sep 17 00:00:00 2001 From: fmorgante Date: Wed, 22 Mar 2023 11:01:38 -0400 Subject: [PATCH 055/103] Handle covariance matrices that are not PD --- R/simulate_demo_data.R | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/R/simulate_demo_data.R b/R/simulate_demo_data.R index 0345a52..4e6938e 100644 --- a/R/simulate_demo_data.R +++ b/R/simulate_demo_data.R @@ -36,6 +36,9 @@ #' @param seed seed for random number generation used by \code{Rfast::rmvnorm}. #' and \code{Rfast::Rnorm}. However, some computations will also need a general #' \code{set.seed()} to be reproducible. +#' +#' @param e A small number to add to the diagonal elements of the +#' covariance matrices to make them positive definite. #' #' @return A list with some or all of the #' following elements: @@ -80,7 +83,7 @@ #' simulate_mr_mash_data <- function(n, p, p_causal, r, r_causal=list(1:r), intercepts=rep(1, r), pve=0.2, B_cor=1, B_scale=1, w=1, - X_cor=0, X_scale=1, V_cor=0, seed=NULL){ + X_cor=0, X_scale=1, V_cor=0, seed=NULL, e=1e-8){ ##Check that the inputs are correct if(length(intercepts)!=r) stop("intercepts must be of length equal to r.") @@ -107,6 +110,8 @@ simulate_mr_mash_data <- function(n, p, p_causal, r, r_causal=list(1:r), interce Sigma[[i]] <- matrix(Sigma_offdiag, nrow=r_mix_length, ncol=r_mix_length) diag(Sigma[[i]]) <- B_scale[i] } + Sigma <- lapply(Sigma, makePD, e=e) + #Sample effects from a mixture of MVN distributions or a single MVN distribution B_causal <- matrix(0, nrow=p_causal, ncol=r) if(K>1){ @@ -130,6 +135,7 @@ simulate_mr_mash_data <- function(n, p, p_causal, r, r_causal=list(1:r), interce Gamma_offdiag <- X_scale*X_cor Gamma <- matrix(Gamma_offdiag, nrow=p, ncol=p) diag(Gamma) <- X_scale + Gamma <- makePD(Gamma, e) X <- rmvnorm(n=n, mu=rep(0, p), sigma=Gamma, seed) } else { X <- replicate(p, Rnorm(n=n, m=0, s=sqrt(X_scale), seed=seed)) From 5cb2ecd693ffac9dc71945316280c98158ce667d Mon Sep 17 00:00:00 2001 From: fmorgante Date: Wed, 22 Mar 2023 11:02:49 -0400 Subject: [PATCH 056/103] Run devtools::document() --- man/simulate_mr_mash_data.Rd | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/man/simulate_mr_mash_data.Rd b/man/simulate_mr_mash_data.Rd index 89fb37a..8ac16f0 100644 --- a/man/simulate_mr_mash_data.Rd +++ b/man/simulate_mr_mash_data.Rd @@ -18,7 +18,8 @@ simulate_mr_mash_data( X_cor = 0, X_scale = 1, V_cor = 0, - seed = NULL + seed = NULL, + e = 1e-08 ) } \arguments{ @@ -55,6 +56,9 @@ proportions associated to each mixture component.} \item{seed}{seed for random number generation used by \code{Rfast::rmvnorm}. and \code{Rfast::Rnorm}. However, some computations will also need a general \code{set.seed()} to be reproducible.} + +\item{e}{A small number to add to the diagonal elements of the +covariance matrices to make them positive definite.} } \value{ A list with some or all of the From 8caa78eb70bfab73a9bb2a7cd879e47b2d1099d6 Mon Sep 17 00:00:00 2001 From: fmorgante Date: Wed, 22 Mar 2023 11:03:21 -0400 Subject: [PATCH 057/103] Bump up version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 6ae8647..e940063 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Encoding: UTF-8 Type: Package Package: mr.mash.alpha -Version: 0.3-4 +Version: 0.3-5 Date: 2023-03-16 Title: Multiple Regression with Multivariate Adaptive Shrinkage Description: Provides an implementation of methods for multivariate From c4455eeb40e9382145edb6ec1e3800b3ae6ca010 Mon Sep 17 00:00:00 2001 From: fmorgante Date: Wed, 22 Mar 2023 11:39:53 -0400 Subject: [PATCH 058/103] Fix issues with rng --- R/simulate_demo_data.R | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/R/simulate_demo_data.R b/R/simulate_demo_data.R index 4e6938e..97c99ff 100644 --- a/R/simulate_demo_data.R +++ b/R/simulate_demo_data.R @@ -65,7 +65,7 @@ #' \item{causal_vars_to_mixture_comps}{p_causal-vector of indexes indicating from which #' mixture components each causal effect comes.} #' -#' @importFrom Rfast rmvnorm Rnorm +#' @importFrom Rfast rmvnorm #' @importFrom MBSP matrix_normal #' @importFrom matrixStats colVars #' @@ -138,7 +138,7 @@ simulate_mr_mash_data <- function(n, p, p_causal, r, r_causal=list(1:r), interce Gamma <- makePD(Gamma, e) X <- rmvnorm(n=n, mu=rep(0, p), sigma=Gamma, seed) } else { - X <- replicate(p, Rnorm(n=n, m=0, s=sqrt(X_scale), seed=seed)) + X <- sapply(seed:(seed+(p-1)), sample_norm, n=n, m=0, s2=sqrt(X_scale)) } X <- scale_fast2(X, scale=FALSE)$M @@ -182,7 +182,12 @@ simulate_mr_mash_data <- function(n, p, p_causal, r, r_causal=list(1:r), interce } - +#' @importFrom Rfast Rnorm +sample_norm <- function(i, n, m, s2){ + x <- Rnorm(n=n, m=m, s=sqrt(s2), seed=i) + + return(x) +} From 4e512d48e230a9e52234cc9d19541b20f5c0b468 Mon Sep 17 00:00:00 2001 From: fmorgante Date: Wed, 22 Mar 2023 14:57:16 -0400 Subject: [PATCH 059/103] Add flag to deal with large matrices --- src/Makevars | 4 ++-- src/Makevars.win | 5 +++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/Makevars b/src/Makevars index 47251da..a4063af 100644 --- a/src/Makevars +++ b/src/Makevars @@ -1,5 +1,5 @@ CXX_STD = CXX11 -PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) -DARMA_WARN_LEVEL=1 \ - -DARMA_NO_DEBUG -DARMA_DONT_USE_OPENMP -DARMA_USE_TBB_ALLOC +PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) -DARMA_WARN_LEVEL=1 -DARMA_64BIT_WORD \ + -DARMA_DONT_USE_OPENMP -DARMA_USE_TBB_ALLOC -DARMA_NO_DEBUG PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) \ $(shell ${R_HOME}/bin/Rscript -e "RcppParallel::RcppParallelLibs()") diff --git a/src/Makevars.win b/src/Makevars.win index fb8175a..d54863f 100644 --- a/src/Makevars.win +++ b/src/Makevars.win @@ -1,6 +1,7 @@ CXX_STD = CXX11 PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) -DARMA_WARN_LEVEL=1 \ - -DARMA_NO_DEBUG -DARMA_DONT_USE_OPENMP \ - -DARMA_USE_TBB_ALLOC -DRCPP_PARALLEL_USE_TBB=1 + -DARMA_64BIT_WORD -DARMA_DONT_USE_OPENMP \ + -DARMA_USE_TBB_ALLOC -DRCPP_PARALLEL_USE_TBB=1 \ + -DARMA_NO_DEBUG PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) \ $(shell "${R_HOME}/bin${R_ARCH_BIN}/Rscript.exe" -e "RcppParallel::RcppParallelLibs()") From f828389254c6ded30ba75326a32469d1ce2cf89a Mon Sep 17 00:00:00 2001 From: fmorgante Date: Wed, 22 Mar 2023 14:57:52 -0400 Subject: [PATCH 060/103] Bump up version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index e940063..19c3fb3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Encoding: UTF-8 Type: Package Package: mr.mash.alpha -Version: 0.3-5 -Date: 2023-03-16 +Version: 0.3-6 +Date: 2023-03-22 Title: Multiple Regression with Multivariate Adaptive Shrinkage Description: Provides an implementation of methods for multivariate multiple regression with adaptive shrinkage priors. From d8fcccfef26f7f994395a0cfe8225cb462365308 Mon Sep 17 00:00:00 2001 From: fmorgante Date: Wed, 22 Mar 2023 16:27:43 -0400 Subject: [PATCH 061/103] Improve efficiency --- R/simulate_demo_data.R | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/R/simulate_demo_data.R b/R/simulate_demo_data.R index 97c99ff..539b34a 100644 --- a/R/simulate_demo_data.R +++ b/R/simulate_demo_data.R @@ -155,7 +155,7 @@ simulate_mr_mash_data <- function(n, p, p_causal, r, r_causal=list(1:r), interce V <- D %*% V_cor_mat %*% D ##Simulate Y from MN(XB, I_n, V) where I_n is an nxn identity matrix and V is the residual covariance - Y <- matrix_normal(G + matrix(intercepts, n, r, byrow=TRUE), diag(n), V) + Y <- matrix_normal_indep_rows(M=(G + matrix(intercepts, n, r, byrow=TRUE)), V=V, seed=seed) ##Compile output causal_responses <- r_causal @@ -189,6 +189,20 @@ sample_norm <- function(i, n, m, s2){ return(x) } +#' @importFrom Rfast matrnorm +matrix_normal_indep_rows = function(M, V, seed){ + a <- nrow(M) + b <- ncol(M) + + # Draw Z from MN(O, I, I) + Z <- matrnorm(n=a, p=b, seed=seed) + + # Cholesky decomposition of V + L2 <- chol(V) + + # Return draw from MN(M,I,V) + return(M + Z %*% L2) +} From 4998392d94a1ff59ac6755c142abc2124f03cf9a Mon Sep 17 00:00:00 2001 From: fmorgante Date: Wed, 22 Mar 2023 16:28:32 -0400 Subject: [PATCH 062/103] Run devtools::document() --- NAMESPACE | 1 + 1 file changed, 1 insertion(+) diff --git a/NAMESPACE b/NAMESPACE index 3424d21..38d323b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -23,6 +23,7 @@ importFrom(RcppParallel,defaultNumThreads) importFrom(RcppParallel,setThreadOptions) importFrom(Rfast,Rnorm) importFrom(Rfast,is.symmetric) +importFrom(Rfast,matrnorm) importFrom(Rfast,rmvnorm) importFrom(ebnm,ebnm_normal) importFrom(ebnm,ebnm_normal_scale_mixture) From dcab9d5a01d6983131a226c6390560d87ed83a0c Mon Sep 17 00:00:00 2001 From: fmorgante Date: Wed, 22 Mar 2023 16:29:04 -0400 Subject: [PATCH 063/103] Bump up version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 19c3fb3..2f3f9df 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Encoding: UTF-8 Type: Package Package: mr.mash.alpha -Version: 0.3-6 +Version: 0.3-7 Date: 2023-03-22 Title: Multiple Regression with Multivariate Adaptive Shrinkage Description: Provides an implementation of methods for multivariate From 39e7cafc7038dcfec8a52dfe0692b5a90e0a0609 Mon Sep 17 00:00:00 2001 From: fmorgante Date: Fri, 24 Mar 2023 22:40:57 -0400 Subject: [PATCH 064/103] Add arma compilation flags --- src/Makevars | 3 ++- src/Makevars.win | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Makevars b/src/Makevars index a4063af..6596210 100644 --- a/src/Makevars +++ b/src/Makevars @@ -1,5 +1,6 @@ CXX_STD = CXX11 PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) -DARMA_WARN_LEVEL=1 -DARMA_64BIT_WORD \ - -DARMA_DONT_USE_OPENMP -DARMA_USE_TBB_ALLOC -DARMA_NO_DEBUG + -DARMA_DONT_USE_OPENMP -DARMA_USE_TBB_ALLOC -DARMA_NO_DEBUG \ + -DARMA_USE_BLAS PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) \ $(shell ${R_HOME}/bin/Rscript -e "RcppParallel::RcppParallelLibs()") diff --git a/src/Makevars.win b/src/Makevars.win index d54863f..2d2d16a 100644 --- a/src/Makevars.win +++ b/src/Makevars.win @@ -2,6 +2,6 @@ CXX_STD = CXX11 PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) -DARMA_WARN_LEVEL=1 \ -DARMA_64BIT_WORD -DARMA_DONT_USE_OPENMP \ -DARMA_USE_TBB_ALLOC -DRCPP_PARALLEL_USE_TBB=1 \ - -DARMA_NO_DEBUG + -DARMA_NO_DEBUG -DARMA_USE_BLAS PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) \ $(shell "${R_HOME}/bin${R_ARCH_BIN}/Rscript.exe" -e "RcppParallel::RcppParallelLibs()") From 0bd9d027f5d29f41326f6ed4a9437ba33cb99bf0 Mon Sep 17 00:00:00 2001 From: fmorgante Date: Fri, 24 Mar 2023 22:42:49 -0400 Subject: [PATCH 065/103] Bump up version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 2f3f9df..ba546fb 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Encoding: UTF-8 Type: Package Package: mr.mash.alpha -Version: 0.3-7 -Date: 2023-03-22 +Version: 0.3-8 +Date: 2023-03-24 Title: Multiple Regression with Multivariate Adaptive Shrinkage Description: Provides an implementation of methods for multivariate multiple regression with adaptive shrinkage priors. From 63b7ba23aee635ab292c53311f245050cd68804d Mon Sep 17 00:00:00 2001 From: fmorgante Date: Sat, 25 Mar 2023 14:29:58 -0400 Subject: [PATCH 066/103] Add additional flags --- src/Makevars | 2 +- src/Makevars.win | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Makevars b/src/Makevars index 6596210..47ce9f4 100644 --- a/src/Makevars +++ b/src/Makevars @@ -1,6 +1,6 @@ CXX_STD = CXX11 PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) -DARMA_WARN_LEVEL=1 -DARMA_64BIT_WORD \ -DARMA_DONT_USE_OPENMP -DARMA_USE_TBB_ALLOC -DARMA_NO_DEBUG \ - -DARMA_USE_BLAS + -DARMA_USE_BLAS -DARMA_USE_LAPACK PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) \ $(shell ${R_HOME}/bin/Rscript -e "RcppParallel::RcppParallelLibs()") diff --git a/src/Makevars.win b/src/Makevars.win index 2d2d16a..77b5b0c 100644 --- a/src/Makevars.win +++ b/src/Makevars.win @@ -2,6 +2,6 @@ CXX_STD = CXX11 PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) -DARMA_WARN_LEVEL=1 \ -DARMA_64BIT_WORD -DARMA_DONT_USE_OPENMP \ -DARMA_USE_TBB_ALLOC -DRCPP_PARALLEL_USE_TBB=1 \ - -DARMA_NO_DEBUG -DARMA_USE_BLAS + -DARMA_NO_DEBUG -DARMA_USE_BLAS -DARMA_USE_LAPACK PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) \ $(shell "${R_HOME}/bin${R_ARCH_BIN}/Rscript.exe" -e "RcppParallel::RcppParallelLibs()") From 86bba4b793f30415ee72f7f81dac3c17693a85cb Mon Sep 17 00:00:00 2001 From: Fabio Morgante Date: Thu, 30 Mar 2023 13:13:17 -0400 Subject: [PATCH 067/103] Fix computations --- R/mr_mash_rss_updates.R | 26 +++++++++++++++----------- src/mr_mash_updates.cpp | 36 ++++++++++++++++++++---------------- 2 files changed, 35 insertions(+), 27 deletions(-) diff --git a/R/mr_mash_rss_updates.R b/R/mr_mash_rss_updates.R index 94400c4..6282d33 100644 --- a/R/mr_mash_rss_updates.R +++ b/R/mr_mash_rss_updates.R @@ -1,5 +1,5 @@ ###Update variational parameters, expected residuals, and ELBO components with or without scaling X -inner_loop_general_rss_R <- function(n, XtY, XtXmu1, mu1, V, Vinv, w0, S0, ###note: V is only needed when not scaling X +inner_loop_general_rss_R <- function(n, XtX, XtY, XtRbar, mu1, V, Vinv, w0, S0, ###note: V is only needed when not scaling X precomp_quants, standardize, compute_ELBO, update_V, update_order, eps){ ###Create variables to store quantities @@ -21,8 +21,9 @@ inner_loop_general_rss_R <- function(n, XtY, XtXmu1, mu1, V, Vinv, w0, S0, ###no xtx <- precomp_quants$xtx[j] } - #Remove j-th effect from expected residuals - xtRbar_j <- XtY[j, ] - XtXmu1[j, ] + xtx*mu1[j, ] + #Remove j-th effect from expected residuals + XtRbar <- XtRbar + outer(XtX[, j], mu1[j, ]) + xtRbar_j <- XtRbar[j, ] #Run Bayesian SLR if(standardize){ @@ -50,6 +51,9 @@ inner_loop_general_rss_R <- function(n, XtY, XtXmu1, mu1, V, Vinv, w0, S0, ###no if(update_V){ var_part_ERSS <- compute_var_part_ERSS(var_part_ERSS, bfit, xtx) } + + #Update expected residuals + XtRbar <- XtRbar - outer(XtX[, j], mu1[j, ]) } ###Return output @@ -71,11 +75,11 @@ inner_loop_general_rss_R <- function(n, XtY, XtXmu1, mu1, V, Vinv, w0, S0, ###no #' @importFrom RcppParallel RcppParallelLibs #' @useDynLib mr.mash.alpha #' -inner_loop_general_rss_Rcpp <- function(n, XtY, XtXmu1, mu1, V, Vinv, w0, S0, precomp_quants, +inner_loop_general_rss_Rcpp <- function(n, XtX, XtY, XtRbar, mu1, V, Vinv, w0, S0, precomp_quants, standardize, compute_ELBO, update_V, update_order, eps, nthreads){ - out <- inner_loop_general_rss_rcpp(n, XtY, XtXmu1, mu1, V, Vinv, w0, S0, precomp_quants, + out <- inner_loop_general_rss_rcpp(n, XtX, XtY, XtRbar, mu1, V, Vinv, w0, S0, precomp_quants, standardize, compute_ELBO, update_V, update_order, eps, nthreads) @@ -94,15 +98,15 @@ inner_loop_general_rss_Rcpp <- function(n, XtY, XtXmu1, mu1, V, Vinv, w0, S0, pr } ###Wrapper of the inner loop with R or Rcpp -inner_loop_general_rss <- function(n, XtY, XtXmu1, mu1, V, Vinv, w0, S0, precomp_quants, +inner_loop_general_rss <- function(n, XtX, XtY, XtRbar, mu1, V, Vinv, w0, S0, precomp_quants, standardize, compute_ELBO, update_V, version, update_order, eps, nthreads){ if(version=="R"){ - out <- inner_loop_general_rss_R(n, XtY, XtXmu1, mu1, V, Vinv, w0, S0, precomp_quants, + out <- inner_loop_general_rss_R(n, XtX, XtY, XtRbar, mu1, V, Vinv, w0, S0, precomp_quants, standardize, compute_ELBO, update_V, update_order, eps) } else if(version=="Rcpp"){ update_order <- as.integer(update_order-1) - out <- inner_loop_general_rss_Rcpp(n, XtY, XtXmu1, mu1, V, Vinv, w0, simplify2array_custom(S0), precomp_quants, + out <- inner_loop_general_rss_Rcpp(n, XtX, XtY, XtRbar, mu1, V, Vinv, w0, simplify2array_custom(S0), precomp_quants, standardize, compute_ELBO, update_V, update_order, eps, nthreads) } @@ -118,11 +122,11 @@ mr_mash_update_general_rss <- function(n, XtX, XtY, YtY, mu1_t, V, Vinv, ldetV, - ##Compute ?? - XtXmu1 <- XtX%*%mu1_t + ##Compute expected residuals + XtRbar <- XtY - XtX %*% mu1_t ##Update variational parameters, expected residuals, and ELBO components - updates <- inner_loop_general_rss(n=n, XtY=XtY, XtXmu1=XtXmu1, mu1=mu1_t, V=V, Vinv=Vinv, w0=w0, S0=S0, + updates <- inner_loop_general_rss(n=n, XtX=XtX, XtY=XtY, XtRbar=XtRbar, mu1=mu1_t, V=V, Vinv=Vinv, w0=w0, S0=S0, precomp_quants=precomp_quants, standardize=standardize, compute_ELBO=compute_ELBO, update_V=update_V, version=version, update_order=update_order, eps=eps, nthreads=nthreads) diff --git a/src/mr_mash_updates.cpp b/src/mr_mash_updates.cpp index 3244d2d..f359327 100644 --- a/src/mr_mash_updates.cpp +++ b/src/mr_mash_updates.cpp @@ -49,9 +49,9 @@ void impute_missing_Y (mat& Y, const mat& mu, const mat& Vinv, mat& Y_cov, double& sum_neg_ent_Y_miss); // Inner loop rss -void inner_loop_general_rss (unsigned int n, const mat& XtY, const mat& XtXmu1, mat& mu1, const mat& V, - const mat& Vinv, const vec& w0, const cube& S0, - const mr_mash_precomputed_quantities& precomp_quants, +void inner_loop_general_rss (unsigned int n, const mat& XtX, const mat& XtY, mat& XtRbar, + mat& mu1, const mat& V, const mat& Vinv, const vec& w0, + const cube& S0, const mr_mash_precomputed_quantities& precomp_quants, bool standardize, bool compute_ELBO, bool update_V, const vec& update_order, double eps, unsigned int nthreads, cube& S1, mat& w1, double& var_part_tr_wERSS, @@ -135,7 +135,7 @@ void inner_loop_general (const mat& X, mat& Rbar, mat& mu1, const mat& V, x = X.col(j); mu1_j = trans(mu1.row(j)); - // Disregard the ith predictor in the expected residuals. + // Disregard the jth predictor in the expected residuals. Rbar_j = Rbar + (x * trans(mu1_j)); // Update the posterior quantities for the jth @@ -250,8 +250,9 @@ void impute_missing_Y (mat& Y, const mat& mu, const mat& Vinv, // [[Rcpp::depends(RcppArmadillo)]] // [[Rcpp::depends(RcppParallel)]] // [[Rcpp::export]] -List inner_loop_general_rss_rcpp (unsigned int n, const arma::mat& XtY, const arma::mat& XtXmu1, arma::mat& mu1, - const arma::mat& V, const arma::mat& Vinv, const arma::vec& w0, +List inner_loop_general_rss_rcpp (unsigned int n, const arma::mat& XtX, const arma::mat& XtY, + arma::mat& XtRbar, arma::mat& mu1, const arma::mat& V, + const arma::mat& Vinv, const arma::vec& w0, const arma::cube& S0, const List& precomp_quants_list, bool standardize, bool compute_ELBO, bool update_V, const arma::vec& update_order, double eps, unsigned int nthreads) { @@ -273,7 +274,7 @@ List inner_loop_general_rss_rcpp (unsigned int n, const arma::mat& XtY, const ar as(precomp_quants_list["d"]), as(precomp_quants_list["QtimesV_chol"]), as(precomp_quants_list["xtx"])); - inner_loop_general_rss(n, XtY, XtXmu1, mu1_new, V, Vinv, w0, S0, precomp_quants, + inner_loop_general_rss(n, XtX, XtY, XtRbar, mu1_new, V, Vinv, w0, S0, precomp_quants, standardize, compute_ELBO, update_V, update_order, eps, nthreads, S1, w1, var_part_tr_wERSS, neg_KL, var_part_ERSS); return List::create(Named("mu1") = mu1_new, @@ -285,19 +286,19 @@ List inner_loop_general_rss_rcpp (unsigned int n, const arma::mat& XtY, const ar } // Perform the inner loop rss -void inner_loop_general_rss (unsigned int n, const mat& XtY, const mat& XtXmu1, mat& mu1, const mat& V, - const mat& Vinv, const vec& w0, const cube& S0, - const mr_mash_precomputed_quantities& precomp_quants, +void inner_loop_general_rss (unsigned int n, const mat& XtX, const mat& XtY, mat& XtRbar, + mat& mu1, const mat& V, const mat& Vinv, const vec& w0, + const cube& S0, const mr_mash_precomputed_quantities& precomp_quants, bool standardize, bool compute_ELBO, bool update_V, const vec& update_order, double eps, unsigned int nthreads, cube& S1, mat& w1, double& var_part_tr_wERSS, double& neg_KL, mat& var_part_ERSS) { + unsigned int p = mu1.n_rows; unsigned int r = mu1.n_cols; unsigned int k = w0.n_elem; + vec Xtx(p); vec XtRbar_j(r); vec mu1_j(r); - vec XtY_j(r); - vec XtXmu1_j(r); vec mu1_mix(r); mat S1_mix(r,r); vec w1_mix(k); @@ -319,12 +320,12 @@ void inner_loop_general_rss (unsigned int n, const mat& XtY, const mat& XtXmu1, else xtx_j = precomp_quants.xtx(j); + Xtx = XtX.col(j); mu1_j = trans(mu1.row(j)); - XtY_j = trans(XtY.row(j)); - XtXmu1_j = trans(XtXmu1.row(j)); - // Disregard the ith predictor in the expected residuals. - XtRbar_j = XtY_j - XtXmu1_j + xtx_j * mu1_j; + // Disregard the jth predictor in the expected residuals. + XtRbar += (Xtx * trans(mu1_j)); + XtRbar_j = trans(XtRbar.row(j)); // Update the posterior quantities for the jth // predictor. @@ -351,5 +352,8 @@ void inner_loop_general_rss (unsigned int n, const mat& XtY, const mat& XtXmu1, // Compute V parameters if (update_V) compute_var_part_ERSS(var_part_ERSS, S1_mix, xtx_j); + + // Update the expected residuals. + XtRbar -= (Xtx * trans(mu1_mix)); } } From df5f41c7608d38fe9e37d92bc1516925a182c1db Mon Sep 17 00:00:00 2001 From: Fabio Morgante Date: Thu, 30 Mar 2023 13:14:04 -0400 Subject: [PATCH 068/103] Run devtools::document() --- R/RcppExports.R | 4 ++-- src/RcppExports.cpp | 11 ++++++----- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/R/RcppExports.R b/R/RcppExports.R index 7c60dc3..fb2c5af 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -9,8 +9,8 @@ impute_missing_Y_rcpp <- function(Y, mu, Vinv, miss, non_miss) { .Call('_mr_mash_alpha_impute_missing_Y_rcpp', PACKAGE = 'mr.mash.alpha', Y, mu, Vinv, miss, non_miss) } -inner_loop_general_rss_rcpp <- function(n, XtY, XtXmu1, mu1, V, Vinv, w0, S0, precomp_quants_list, standardize, compute_ELBO, update_V, update_order, eps, nthreads) { - .Call('_mr_mash_alpha_inner_loop_general_rss_rcpp', PACKAGE = 'mr.mash.alpha', n, XtY, XtXmu1, mu1, V, Vinv, w0, S0, precomp_quants_list, standardize, compute_ELBO, update_V, update_order, eps, nthreads) +inner_loop_general_rss_rcpp <- function(n, XtX, XtY, XtRbar, mu1, V, Vinv, w0, S0, precomp_quants_list, standardize, compute_ELBO, update_V, update_order, eps, nthreads) { + .Call('_mr_mash_alpha_inner_loop_general_rss_rcpp', PACKAGE = 'mr.mash.alpha', n, XtX, XtY, XtRbar, mu1, V, Vinv, w0, S0, precomp_quants_list, standardize, compute_ELBO, update_V, update_order, eps, nthreads) } scale_rcpp <- function(M, a, b) { diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index aa63683..5427160 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -51,14 +51,15 @@ BEGIN_RCPP END_RCPP } // inner_loop_general_rss_rcpp -List inner_loop_general_rss_rcpp(unsigned int n, const arma::mat& XtY, const arma::mat& XtXmu1, arma::mat& mu1, const arma::mat& V, const arma::mat& Vinv, const arma::vec& w0, const arma::cube& S0, const List& precomp_quants_list, bool standardize, bool compute_ELBO, bool update_V, const arma::vec& update_order, double eps, unsigned int nthreads); -RcppExport SEXP _mr_mash_alpha_inner_loop_general_rss_rcpp(SEXP nSEXP, SEXP XtYSEXP, SEXP XtXmu1SEXP, SEXP mu1SEXP, SEXP VSEXP, SEXP VinvSEXP, SEXP w0SEXP, SEXP S0SEXP, SEXP precomp_quants_listSEXP, SEXP standardizeSEXP, SEXP compute_ELBOSEXP, SEXP update_VSEXP, SEXP update_orderSEXP, SEXP epsSEXP, SEXP nthreadsSEXP) { +List inner_loop_general_rss_rcpp(unsigned int n, const arma::mat& XtX, const arma::mat& XtY, arma::mat& XtRbar, arma::mat& mu1, const arma::mat& V, const arma::mat& Vinv, const arma::vec& w0, const arma::cube& S0, const List& precomp_quants_list, bool standardize, bool compute_ELBO, bool update_V, const arma::vec& update_order, double eps, unsigned int nthreads); +RcppExport SEXP _mr_mash_alpha_inner_loop_general_rss_rcpp(SEXP nSEXP, SEXP XtXSEXP, SEXP XtYSEXP, SEXP XtRbarSEXP, SEXP mu1SEXP, SEXP VSEXP, SEXP VinvSEXP, SEXP w0SEXP, SEXP S0SEXP, SEXP precomp_quants_listSEXP, SEXP standardizeSEXP, SEXP compute_ELBOSEXP, SEXP update_VSEXP, SEXP update_orderSEXP, SEXP epsSEXP, SEXP nthreadsSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< unsigned int >::type n(nSEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type XtX(XtXSEXP); Rcpp::traits::input_parameter< const arma::mat& >::type XtY(XtYSEXP); - Rcpp::traits::input_parameter< const arma::mat& >::type XtXmu1(XtXmu1SEXP); + Rcpp::traits::input_parameter< arma::mat& >::type XtRbar(XtRbarSEXP); Rcpp::traits::input_parameter< arma::mat& >::type mu1(mu1SEXP); Rcpp::traits::input_parameter< const arma::mat& >::type V(VSEXP); Rcpp::traits::input_parameter< const arma::mat& >::type Vinv(VinvSEXP); @@ -71,7 +72,7 @@ BEGIN_RCPP Rcpp::traits::input_parameter< const arma::vec& >::type update_order(update_orderSEXP); Rcpp::traits::input_parameter< double >::type eps(epsSEXP); Rcpp::traits::input_parameter< unsigned int >::type nthreads(nthreadsSEXP); - rcpp_result_gen = Rcpp::wrap(inner_loop_general_rss_rcpp(n, XtY, XtXmu1, mu1, V, Vinv, w0, S0, precomp_quants_list, standardize, compute_ELBO, update_V, update_order, eps, nthreads)); + rcpp_result_gen = Rcpp::wrap(inner_loop_general_rss_rcpp(n, XtX, XtY, XtRbar, mu1, V, Vinv, w0, S0, precomp_quants_list, standardize, compute_ELBO, update_V, update_order, eps, nthreads)); return rcpp_result_gen; END_RCPP } @@ -138,7 +139,7 @@ END_RCPP static const R_CallMethodDef CallEntries[] = { {"_mr_mash_alpha_inner_loop_general_rcpp", (DL_FUNC) &_mr_mash_alpha_inner_loop_general_rcpp, 14}, {"_mr_mash_alpha_impute_missing_Y_rcpp", (DL_FUNC) &_mr_mash_alpha_impute_missing_Y_rcpp, 5}, - {"_mr_mash_alpha_inner_loop_general_rss_rcpp", (DL_FUNC) &_mr_mash_alpha_inner_loop_general_rss_rcpp, 15}, + {"_mr_mash_alpha_inner_loop_general_rss_rcpp", (DL_FUNC) &_mr_mash_alpha_inner_loop_general_rss_rcpp, 16}, {"_mr_mash_alpha_scale_rcpp", (DL_FUNC) &_mr_mash_alpha_scale_rcpp, 3}, {"_mr_mash_alpha_scale2_rcpp", (DL_FUNC) &_mr_mash_alpha_scale2_rcpp, 3}, {"_mr_mash_alpha_rescale_post_mean_covar_rcpp", (DL_FUNC) &_mr_mash_alpha_rescale_post_mean_covar_rcpp, 3}, From 93a5ba3bcb1143a38d200157a5c4e24f3a6cfd51 Mon Sep 17 00:00:00 2001 From: Fabio Morgante Date: Thu, 30 Mar 2023 13:14:36 -0400 Subject: [PATCH 069/103] Bump up version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index ba546fb..500ae8d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Encoding: UTF-8 Type: Package Package: mr.mash.alpha -Version: 0.3-8 -Date: 2023-03-24 +Version: 0.3-9 +Date: 2023-03-30 Title: Multiple Regression with Multivariate Adaptive Shrinkage Description: Provides an implementation of methods for multivariate multiple regression with adaptive shrinkage priors. From 5b42823e047fd83b3f189aed0bee21aec90696ac Mon Sep 17 00:00:00 2001 From: Fabio Morgante Date: Thu, 30 Mar 2023 15:53:57 -0400 Subject: [PATCH 070/103] Add option for update order --- R/mr_mash_rss.R | 19 ++++++------ R/update_order_rss.R | 44 ++++++++++++++++++++++++++ src/update_order.cpp | 73 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 126 insertions(+), 10 deletions(-) create mode 100644 R/update_order_rss.R diff --git a/R/mr_mash_rss.R b/R/mr_mash_rss.R index d8730b8..3a79041 100644 --- a/R/mr_mash_rss.R +++ b/R/mr_mash_rss.R @@ -74,7 +74,8 @@ #' prior matrices to improve numerical stability of the updates. #' #' @param ca_update_order The order with which coordinates are -#' updated. So far, "consecutive" is supported. +#' updated. So far, "consecutive", "decreasing_logBF", +#' "increasing_logBF" are supported. #' #' @param X_colmeans a p-vector of variable means. #' @@ -266,8 +267,6 @@ mr.mash.rss <- function(Bhat, Shat, Z, R, covY, n, S0, w0=rep(1/(length(S0)), le stop("mu1_init must be a matrix.") if(convergence_criterion=="ELBO" && !compute_ELBO) stop("ELBO needs to be computed with convergence_criterion=\"ELBO\".") - if(ca_update_order!="consecutive") - stop("ca_update_order=\"consecutive\" is the only option with summary data for now.") # PRE-PROCESSING STEPS # -------------------- @@ -390,13 +389,13 @@ mr.mash.rss <- function(Bhat, Shat, Z, R, covY, n, S0, w0=rep(1/(length(S0)), le ###Set the ordering of the coordinate ascent updates if(ca_update_order=="consecutive"){ update_order <- 1:p - } # else if(ca_update_order=="decreasing_logBF"){ - # update_order <- compute_rank_variables_BFmix(X, Y, V, Vinv, w0, S0, comps, standardize, version, - # decreasing=TRUE, eps, nthreads) - # } else if(ca_update_order=="increasing_logBF"){ - # update_order <- compute_rank_variables_BFmix(X, Y, V, Vinv, w0, S0, comps, standardize, version, - # decreasing=FALSE, eps, nthreads) - # } + } else if(ca_update_order=="decreasing_logBF"){ + update_order <- compute_rank_variables_BFmix_rss(n, XtY, V, Vinv, w0, S0, comps, standardize, version, + decreasing=TRUE, eps, nthreads) + } else if(ca_update_order=="increasing_logBF"){ + update_order <- compute_rank_variables_BFmix_rss(n, XtY, V, Vinv, w0, S0, comps, standardize, version, + decreasing=FALSE, eps, nthreads) + } if(verbose) cat("Done!\n") diff --git a/R/update_order_rss.R b/R/update_order_rss.R new file mode 100644 index 0000000..c2a6bbe --- /dev/null +++ b/R/update_order_rss.R @@ -0,0 +1,44 @@ +###Compute logbf from Bayesian multivariate simple regression with mixture prior +compute_logbf_rss_R <- function(n, XtY, V, Vinv, w0, S0, precomp_quants, standardize, eps, nthreads){ + p <- nrow(XtY) + logbf <- vector("numeric", p) + + for(j in 1:p){ + #Run Bayesian SLR + if(standardize){ + bfit <- bayes_mvr_mix_standardized_X_rss(n, XtY[j, ], w0, S0, precomp_quants$S, precomp_quants$S1, + precomp_quants$SplusS0_chol, precomp_quants$S_chol, eps) + } else { + bfit <- bayes_mvr_mix_centered_X_rss(XtY[j, ], V, w0, S0, precomp_quants$xtx[j], Vinv, + precomp_quants$V_chol, precomp_quants$d, + precomp_quants$QtimesV_chol, eps) + } + + logbf[j] <- bfit$logbf + } + + return(logbf) +} + +###Compute rank of logbf from Bayesian multivariate simple regression with mixture prior +compute_rank_variables_BFmix_rss <- function(n, XtY, V, Vinv, w0, S0, precomp_quants, standardize, version, + decreasing, eps, nthreads){ + if(version=="R"){ + logbfs <- compute_logbf_rss_R(n, XtY, V, Vinv, w0, S0, precomp_quants, standardize, eps) + } else if(version=="Rcpp"){ + logbfs <- compute_logbf_rss_rcpp(n, XtY, V, Vinv, w0, simplify2array_custom(S0), precomp_quants, + standardize, eps , nthreads) + logbfs <- drop(logbfs) + } + + if(decreasing){ + ##Negative sign is needed because rank() by default ranks from smallest to largest + ##while we want from largest to smallest + rank_variables_BFmix <- rank(-logbfs, ties.method="first", na.last="keep") + } else { + rank_variables_BFmix <- rank(logbfs, ties.method="first", na.last="keep") + } + + return(rank_variables_BFmix) +} + diff --git a/src/update_order.cpp b/src/update_order.cpp index d01b6e4..41bca8d 100644 --- a/src/update_order.cpp +++ b/src/update_order.cpp @@ -37,6 +37,12 @@ vec compute_logbf (const mat& X, const mat& Y, const mat& V, const mr_mash_precomputed_quantities& precomp_quants, bool standardize, double eps, unsigned int nthreads); +vec compute_logbf_rss (unsigned int n, const mat& XtY, const mat& V, + const mat& Vinv, const vec& w0, const cube& S0, + const mr_mash_precomputed_quantities& precomp_quants, + bool standardize, double eps, unsigned int nthreads); + + // FUNCTION DEFINITIONS // -------------------- // Compute logbf for each variable @@ -61,6 +67,7 @@ arma::vec compute_logbf_rcpp (const arma::mat& X, const arma::mat& Y, const arma return compute_logbf(X, Y, V, Vinv, w0, S0, precomp_quants, standardize, eps, nthreads); } + vec compute_logbf (const mat& X, const mat& Y, const mat& V, const mat& Vinv, const vec& w0, const cube& S0, const mr_mash_precomputed_quantities& precomp_quants, @@ -101,3 +108,69 @@ vec compute_logbf (const mat& X, const mat& Y, const mat& V, return logbf_mix_all; } + + +// Compute logbf for each variable from summary data only +// [[Rcpp::depends(RcppArmadillo)]] +// [[Rcpp::export]] +arma::vec compute_logbf_rss_rcpp (unsigned int n, const arma::mat& XtY, const arma::mat& V, + const arma::mat& Vinv, const arma::vec& w0, const arma::cube& S0, + const List& precomp_quants_list, + bool standardize, double eps, unsigned int nthreads) { + + mr_mash_precomputed_quantities precomp_quants + (as(precomp_quants_list["S"]), + as(precomp_quants_list["S_chol"]), + as(precomp_quants_list["S1"]), + as(precomp_quants_list["SplusS0_chol"]), + as(precomp_quants_list["V_chol"]), + as(precomp_quants_list["d"]), + as(precomp_quants_list["QtimesV_chol"]), + as(precomp_quants_list["xtx"])); + + + return compute_logbf_rss(n, XtY, V, Vinv, w0, S0, precomp_quants, standardize, eps, nthreads); +} + + +vec compute_logbf_rss (unsigned int n, const mat& XtY, const mat& V, + const mat& Vinv, const vec& w0, const cube& S0, + const mr_mash_precomputed_quantities& precomp_quants, + bool standardize, double eps, unsigned int nthreads){ + + unsigned int p = XtY.n_rows; + unsigned int r = XtY.n_cols; + unsigned int k = S0.n_slices; + vec XtY_j(r); + vec mu1_mix(r); + mat S1_mix(r,r); + vec w1_mix(k); + double logbf_mix; + vec logbf_mix_all(p); + + // Repeat for each predictor. + for (unsigned int j = 0; j < p; j++) { + + XtY_j = trans(XtY.row(j)); + + // Update the posterior quantities for the jth + // predictor. + if (standardize) + logbf_mix = bayes_mvr_mix_standardized_X_rss(n, XtY_j, w0, S0, precomp_quants.S, + precomp_quants.S1, + precomp_quants.SplusS0_chol, + precomp_quants.S_chol, eps, nthreads, + mu1_mix, S1_mix, w1_mix); + else { + double xtx_j = precomp_quants.xtx(j); + logbf_mix = bayes_mvr_mix_centered_X_rss(XtY_j, V, w0, S0, xtx_j, Vinv, + precomp_quants.V_chol, precomp_quants.d, + precomp_quants.QtimesV_chol, eps, nthreads, + mu1_mix, S1_mix, w1_mix); + } + + logbf_mix_all(j) = logbf_mix; + } + + return logbf_mix_all; +} From fe5cf36fe342d4ed3d359f27e1c63fb61722bc60 Mon Sep 17 00:00:00 2001 From: Fabio Morgante Date: Thu, 30 Mar 2023 15:54:29 -0400 Subject: [PATCH 071/103] Run devtools::document() --- R/RcppExports.R | 4 ++++ man/mr.mash.rss.Rd | 3 ++- src/RcppExports.cpp | 21 +++++++++++++++++++++ 3 files changed, 27 insertions(+), 1 deletion(-) diff --git a/R/RcppExports.R b/R/RcppExports.R index fb2c5af..bae2e1d 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -29,3 +29,7 @@ compute_logbf_rcpp <- function(X, Y, V, Vinv, w0, S0, precomp_quants_list, stand .Call('_mr_mash_alpha_compute_logbf_rcpp', PACKAGE = 'mr.mash.alpha', X, Y, V, Vinv, w0, S0, precomp_quants_list, standardize, eps, nthreads) } +compute_logbf_rss_rcpp <- function(n, XtY, V, Vinv, w0, S0, precomp_quants_list, standardize, eps, nthreads) { + .Call('_mr_mash_alpha_compute_logbf_rss_rcpp', PACKAGE = 'mr.mash.alpha', n, XtY, V, Vinv, w0, S0, precomp_quants_list, standardize, eps, nthreads) +} + diff --git a/man/mr.mash.rss.Rd b/man/mr.mash.rss.Rd index cae2a75..73c8efb 100644 --- a/man/mr.mash.rss.Rd +++ b/man/mr.mash.rss.Rd @@ -108,7 +108,8 @@ coordinate ascent updates.} prior matrices to improve numerical stability of the updates.} \item{ca_update_order}{The order with which coordinates are -updated. So far, "consecutive" is supported.} +updated. So far, "consecutive", "decreasing_logBF", +"increasing_logBF" are supported.} \item{X_colmeans}{a p-vector of variable means.} diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 5427160..6b5fae5 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -135,6 +135,26 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// compute_logbf_rss_rcpp +arma::vec compute_logbf_rss_rcpp(unsigned int n, const arma::mat& XtY, const arma::mat& V, const arma::mat& Vinv, const arma::vec& w0, const arma::cube& S0, const List& precomp_quants_list, bool standardize, double eps, unsigned int nthreads); +RcppExport SEXP _mr_mash_alpha_compute_logbf_rss_rcpp(SEXP nSEXP, SEXP XtYSEXP, SEXP VSEXP, SEXP VinvSEXP, SEXP w0SEXP, SEXP S0SEXP, SEXP precomp_quants_listSEXP, SEXP standardizeSEXP, SEXP epsSEXP, SEXP nthreadsSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< unsigned int >::type n(nSEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type XtY(XtYSEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type V(VSEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type Vinv(VinvSEXP); + Rcpp::traits::input_parameter< const arma::vec& >::type w0(w0SEXP); + Rcpp::traits::input_parameter< const arma::cube& >::type S0(S0SEXP); + Rcpp::traits::input_parameter< const List& >::type precomp_quants_list(precomp_quants_listSEXP); + Rcpp::traits::input_parameter< bool >::type standardize(standardizeSEXP); + Rcpp::traits::input_parameter< double >::type eps(epsSEXP); + Rcpp::traits::input_parameter< unsigned int >::type nthreads(nthreadsSEXP); + rcpp_result_gen = Rcpp::wrap(compute_logbf_rss_rcpp(n, XtY, V, Vinv, w0, S0, precomp_quants_list, standardize, eps, nthreads)); + return rcpp_result_gen; +END_RCPP +} static const R_CallMethodDef CallEntries[] = { {"_mr_mash_alpha_inner_loop_general_rcpp", (DL_FUNC) &_mr_mash_alpha_inner_loop_general_rcpp, 14}, @@ -144,6 +164,7 @@ static const R_CallMethodDef CallEntries[] = { {"_mr_mash_alpha_scale2_rcpp", (DL_FUNC) &_mr_mash_alpha_scale2_rcpp, 3}, {"_mr_mash_alpha_rescale_post_mean_covar_rcpp", (DL_FUNC) &_mr_mash_alpha_rescale_post_mean_covar_rcpp, 3}, {"_mr_mash_alpha_compute_logbf_rcpp", (DL_FUNC) &_mr_mash_alpha_compute_logbf_rcpp, 10}, + {"_mr_mash_alpha_compute_logbf_rss_rcpp", (DL_FUNC) &_mr_mash_alpha_compute_logbf_rss_rcpp, 10}, {NULL, NULL, 0} }; From f850a79206293f2cc43f32312a53ee356c9c88fa Mon Sep 17 00:00:00 2001 From: Fabio Morgante Date: Thu, 30 Mar 2023 15:54:59 -0400 Subject: [PATCH 072/103] Bump up version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 500ae8d..be632a1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Encoding: UTF-8 Type: Package Package: mr.mash.alpha -Version: 0.3-9 +Version: 0.3-10 Date: 2023-03-30 Title: Multiple Regression with Multivariate Adaptive Shrinkage Description: Provides an implementation of methods for multivariate From b987f1f076b117673c98ac42ece874abb09750bb Mon Sep 17 00:00:00 2001 From: Fabio Morgante Date: Thu, 30 Mar 2023 16:07:17 -0400 Subject: [PATCH 073/103] Add option for a random order of updates --- R/mr_mash.R | 7 ++++--- R/mr_mash_rss.R | 7 ++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/R/mr_mash.R b/R/mr_mash.R index ef5f946..7c503e6 100644 --- a/R/mr_mash.R +++ b/R/mr_mash.R @@ -63,7 +63,7 @@ #' #' @param ca_update_order The order with which coordinates are #' updated. So far, "consecutive", "decreasing_logBF", -#' "increasing_logBF" are supported. +#' "increasing_logBF", "random" are supported. #' #' @param nthreads Number of RcppParallel threads to use for the #' updates. When \code{nthreads} is \code{NA}, the default number of @@ -166,7 +166,7 @@ mr.mash <- function(X, Y, S0, w0=rep(1/(length(S0)), length(S0)), V=NULL, max_iter=5000, update_w0=TRUE, update_w0_method="EM", w0_threshold=0, compute_ELBO=TRUE, standardize=TRUE, verbose=TRUE, update_V=FALSE, update_V_method=c("full", "diagonal"), version=c("Rcpp", "R"), e=1e-8, - ca_update_order=c("consecutive", "decreasing_logBF", "increasing_logBF"), + ca_update_order=c("consecutive", "decreasing_logBF", "increasing_logBF", "random"), nthreads=as.integer(NA)) { if(verbose){ @@ -356,7 +356,8 @@ mr.mash <- function(X, Y, S0, w0=rep(1/(length(S0)), length(S0)), V=NULL, } else if(ca_update_order=="increasing_logBF"){ update_order <- compute_rank_variables_BFmix(X, Y, V, Vinv, w0, S0, comps, standardize, version, decreasing=FALSE, eps, nthreads) - } + } else if(ca_update_order=="random") + update_order <- sample(x=1:p, size=p) if(!Y_has_missing){ Y_cov <- matrix(0, nrow=r, ncol=r) diff --git a/R/mr_mash_rss.R b/R/mr_mash_rss.R index 3a79041..beb4274 100644 --- a/R/mr_mash_rss.R +++ b/R/mr_mash_rss.R @@ -75,7 +75,7 @@ #' #' @param ca_update_order The order with which coordinates are #' updated. So far, "consecutive", "decreasing_logBF", -#' "increasing_logBF" are supported. +#' "increasing_logBF", "random" are supported. #' #' @param X_colmeans a p-vector of variable means. #' @@ -182,7 +182,7 @@ mr.mash.rss <- function(Bhat, Shat, Z, R, covY, n, S0, w0=rep(1/(length(S0)), le max_iter=5000, update_w0=TRUE, update_w0_method="EM", w0_threshold=0, compute_ELBO=TRUE, standardize=TRUE, verbose=TRUE, update_V=FALSE, update_V_method=c("full", "diagonal"), version=c("Rcpp", "R"), e=1e-8, - ca_update_order=c("consecutive", "decreasing_logBF", "increasing_logBF"), + ca_update_order=c("consecutive", "decreasing_logBF", "increasing_logBF", "random"), X_colmeans=NULL, Y_colmeans=NULL, check_R=TRUE, R_tol=1e-08, nthreads=as.integer(NA)) { @@ -395,7 +395,8 @@ mr.mash.rss <- function(Bhat, Shat, Z, R, covY, n, S0, w0=rep(1/(length(S0)), le } else if(ca_update_order=="increasing_logBF"){ update_order <- compute_rank_variables_BFmix_rss(n, XtY, V, Vinv, w0, S0, comps, standardize, version, decreasing=FALSE, eps, nthreads) - } + } else if(ca_update_order=="random") + update_order <- sample(x=1:p, size=p) if(verbose) cat("Done!\n") From 32de28a033f095a3103ed325078ed05656962849 Mon Sep 17 00:00:00 2001 From: Fabio Morgante Date: Thu, 30 Mar 2023 16:08:11 -0400 Subject: [PATCH 074/103] Run devtools::document --- man/mr.mash.Rd | 4 ++-- man/mr.mash.rss.Rd | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/man/mr.mash.Rd b/man/mr.mash.Rd index c993c9c..8d6cb59 100644 --- a/man/mr.mash.Rd +++ b/man/mr.mash.Rd @@ -24,7 +24,7 @@ mr.mash( update_V_method = c("full", "diagonal"), version = c("Rcpp", "R"), e = 1e-08, - ca_update_order = c("consecutive", "decreasing_logBF", "increasing_logBF"), + ca_update_order = c("consecutive", "decreasing_logBF", "increasing_logBF", "random"), nthreads = as.integer(NA) ) } @@ -89,7 +89,7 @@ prior matrices to improve numerical stability of the updates.} \item{ca_update_order}{The order with which coordinates are updated. So far, "consecutive", "decreasing_logBF", -"increasing_logBF" are supported.} +"increasing_logBF", "random" are supported.} \item{nthreads}{Number of RcppParallel threads to use for the updates. When \code{nthreads} is \code{NA}, the default number of diff --git a/man/mr.mash.rss.Rd b/man/mr.mash.rss.Rd index 73c8efb..496139a 100644 --- a/man/mr.mash.rss.Rd +++ b/man/mr.mash.rss.Rd @@ -29,7 +29,7 @@ mr.mash.rss( update_V_method = c("full", "diagonal"), version = c("Rcpp", "R"), e = 1e-08, - ca_update_order = c("consecutive", "decreasing_logBF", "increasing_logBF"), + ca_update_order = c("consecutive", "decreasing_logBF", "increasing_logBF", "random"), X_colmeans = NULL, Y_colmeans = NULL, check_R = TRUE, @@ -109,7 +109,7 @@ prior matrices to improve numerical stability of the updates.} \item{ca_update_order}{The order with which coordinates are updated. So far, "consecutive", "decreasing_logBF", -"increasing_logBF" are supported.} +"increasing_logBF", "random" are supported.} \item{X_colmeans}{a p-vector of variable means.} From bcf658a1d9746828257ce7309108c4a5f2931d0e Mon Sep 17 00:00:00 2001 From: Fabio Morgante Date: Thu, 30 Mar 2023 16:08:43 -0400 Subject: [PATCH 075/103] Bump up version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index be632a1..dac8673 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Encoding: UTF-8 Type: Package Package: mr.mash.alpha -Version: 0.3-10 +Version: 0.3-11 Date: 2023-03-30 Title: Multiple Regression with Multivariate Adaptive Shrinkage Description: Provides an implementation of methods for multivariate From 6b153b08e1128685e06adc356fdcdd2d85612f16 Mon Sep 17 00:00:00 2001 From: fmorgante Date: Fri, 31 Mar 2023 12:31:38 -0400 Subject: [PATCH 076/103] Add some rss version unit tests --- ...t_compute_rank_variables_BFmix_R_vs_Rcpp.R | 18 +++ tests/testthat/test_mr.mash_multithreading.R | 22 ++++ tests/testthat/test_mr.mash_vs_mr.mash.rss.R | 115 ++++++++++++++++++ 3 files changed, 155 insertions(+) create mode 100644 tests/testthat/test_mr.mash_vs_mr.mash.rss.R diff --git a/tests/testthat/test_compute_rank_variables_BFmix_R_vs_Rcpp.R b/tests/testthat/test_compute_rank_variables_BFmix_R_vs_Rcpp.R index 4f584e2..1b75f18 100644 --- a/tests/testthat/test_compute_rank_variables_BFmix_R_vs_Rcpp.R +++ b/tests/testthat/test_compute_rank_variables_BFmix_R_vs_Rcpp.R @@ -34,6 +34,9 @@ test_that("R and Rcpp version of compute_rank_variables_BFmix return the same re ###Compute the inverse of V Vinv <- solve(V) + ###Compute quantities needed for rss version + XtY <- crossprod(X, scale(Y, center=TRUE, scale=FALSE)) + ###Set eps eps <- .Machine$double.eps @@ -43,7 +46,12 @@ test_that("R and Rcpp version of compute_rank_variables_BFmix return the same re comps_rcpp <- precompute_quants(n, V, S0mix, standardize=TRUE, version="Rcpp") ranks_rcpp <- compute_rank_variables_BFmix(X, Y, V, Vinv, w0, S0mix, comps_rcpp, standardize=TRUE, version="Rcpp", decreasing=TRUE, eps, nthreads=1) + + ranks_rss_r <- compute_rank_variables_BFmix_rss(n, XtY, V, Vinv, w0, S0mix, comps_r, standardize=TRUE, version="R", decreasing=TRUE, eps) + + ranks_rss_rcpp <- compute_rank_variables_BFmix_rss(n, XtY, V, Vinv, w0, S0mix, comps_rcpp, standardize=TRUE, version="Rcpp", decreasing=TRUE, eps, nthreads=1) + ###Compute logbf with standardize=FALSE comps1_r <- precompute_quants(n, V, S0mix, standardize=FALSE, version="R") comps1_r$xtx <- colSums(X^2) @@ -52,8 +60,18 @@ test_that("R and Rcpp version of compute_rank_variables_BFmix return the same re comps1_rcpp <- precompute_quants(n, V, S0mix, standardize=FALSE, version="Rcpp") comps1_rcpp$xtx <- colSums(X^2) ranks1_rcpp <- compute_rank_variables_BFmix(X, Y, V, Vinv, w0, S0mix, comps1_rcpp, standardize=FALSE, version="Rcpp", decreasing=TRUE, eps, nthreads=1) + + ranks1_rss_r <- compute_rank_variables_BFmix_rss(n, XtY, V, Vinv, w0, S0mix, comps1_r, standardize=FALSE, version="R", decreasing=TRUE, eps) + ranks1_rss_rcpp <- compute_rank_variables_BFmix_rss(n, XtY, V, Vinv, w0, S0mix, comps1_rcpp, standardize=FALSE, version="Rcpp", decreasing=TRUE, eps, nthreads=1) + + ###Tests expect_equal(ranks_r, ranks_rcpp, tolerance = 1e-10, scale = 1) expect_equal(ranks1_r, ranks1_rcpp, tolerance = 1e-10, scale = 1) + expect_equal(ranks_rss_r, ranks_rss_rcpp, tolerance = 1e-10, scale = 1) + expect_equal(ranks1_rss_r, ranks1_rss_rcpp, tolerance = 1e-10, scale = 1) + expect_equal(ranks_rss_rcpp, ranks_rcpp, tolerance = 1e-10, scale = 1) + expect_equal(ranks1_rss_rcpp, ranks1_rcpp, tolerance = 1e-10, scale = 1) + }) \ No newline at end of file diff --git a/tests/testthat/test_mr.mash_multithreading.R b/tests/testthat/test_mr.mash_multithreading.R index 7f927b0..4a2bbfa 100644 --- a/tests/testthat/test_mr.mash_multithreading.R +++ b/tests/testthat/test_mr.mash_multithreading.R @@ -42,6 +42,12 @@ test_that("mr.mash with 1 or 2 thread(s) return the same results", { ###Estimate residual covariance V_est <- cov(Y) + ###Compute quantities needed for mr.mash.rss + out <- compute_univariate_sumstats(X=X, Y=Y, standardize=FALSE, standardize.response=FALSE, mc.cores=1) + R <- cor(X) + X_colMeans <- colMeans(X) + Y_colMeans <- colMeans(Y) + ###Fit with current implementation (1 thread) capture.output( fit_1 <- mr.mash(X, Y, S0mix, w0, V_est, update_w0=TRUE, @@ -56,6 +62,14 @@ test_that("mr.mash with 1 or 2 thread(s) return the same results", { fit_1_miss$progress <- fit_1_miss$progress[, -2] ##This line is needed to remove the timing column --> ##hopefully faster when using multiple threads + fit_1_rss <- mr.mash.rss(Bhat=out$Bhat, Shat=out$Shat, covY=V_est, R=R, n=n, S0=S0mix, + w0=w0, V=V_est, update_w0=TRUE, compute_ELBO=TRUE, standardize=TRUE, + verbose=FALSE, update_V=TRUE, X_colmeans=X_colMeans, Y_colmeans=Y_colMeans, + nthreads=1) + fit_1_rss$progress <- fit_1_rss$progress[, -2] ##This line is needed to remove the timing column --> + ##hopefully faster when using multiple threads + + ###Fit with current implementation (2 threads) capture.output( @@ -70,6 +84,14 @@ test_that("mr.mash with 1 or 2 thread(s) return the same results", { verbose=FALSE, update_V=TRUE, nthreads=2)) fit_2_miss$progress <- fit_2_miss$progress[, -2] + fit_2_rss <- mr.mash.rss(Bhat=out$Bhat, Shat=out$Shat, covY=V_est, R=R, n=n, S0=S0mix, + w0=w0, V=V_est, update_w0=TRUE, compute_ELBO=TRUE, standardize=TRUE, + verbose=FALSE, update_V=TRUE, X_colmeans=X_colMeans, Y_colmeans=Y_colMeans, + nthreads=2) + fit_2_rss$progress <- fit_2_rss$progress[, -2] ##This line is needed to remove the timing column --> + ##hopefully faster when using multiple threads + + ###Test expect_equal(fit_1, fit_2, tolerance=1e-10, scale=1) diff --git a/tests/testthat/test_mr.mash_vs_mr.mash.rss.R b/tests/testthat/test_mr.mash_vs_mr.mash.rss.R new file mode 100644 index 0000000..80a3173 --- /dev/null +++ b/tests/testthat/test_mr.mash_vs_mr.mash.rss.R @@ -0,0 +1,115 @@ +context("mr.mash and mr.mash.rss versions return same result") + +test_that("mr.mash and mr.mash.rss return the same results", { + ###Set seed + set.seed(123) + + ###Simulate X and Y + n <- 100 + p <- 10 + + ###Set residual covariance + V <- rbind(c(1.0,0.2), + c(0.2,0.4)) + + ###Set true effects + B <- matrix(c(-2, -2, + 5, 5, + rep(0, (p-2)*2)), byrow=TRUE, ncol=2) + + ###Simulate X + X <- matrix(rnorm(n*p), nrow=n, ncol=p) + X <- scale(X, center=TRUE, scale=FALSE) + + ###Simulate Y from MN(XB, I_n, V) where I_n is an nxn identity + ###matrix and V is the residual covariance + Y <- sim_mvr(X, B, V) + + ###Specify the mixture weights and covariance matrices for the + ###mixture-of-normals prior + grid <- seq(1, 5) + S0mix <- compute_cov_canonical(ncol(Y), singletons=TRUE, + hetgrid=c(0, 0.25, 0.5, 0.75, 0.99), grid, + zeromat=TRUE) + + w0 <- rep(1/(length(S0mix)), length(S0mix)) + + ###Estimate residual covariance + V_est <- cov(Y) + + ###Fit mr.mash + fit <- mr.mash(X, Y, S0mix, w0, V_est, update_w0=TRUE, + update_w0_method="EM", compute_ELBO=TRUE, standardize=FALSE, + verbose=FALSE, update_V=FALSE) + fit$progress <- fit$fitted <- fit$pve <- fit$G <- NULL + + fit_scaled <- mr.mash(X, Y, S0mix, w0, V_est, update_w0=TRUE, + update_w0_method="EM", compute_ELBO=TRUE, + standardize=TRUE, verbose=FALSE, update_V=FALSE) + fit_scaled$progress <- fit_scaled$fitted <- fit_scaled$pve <- fit_scaled$G <- NULL + + fit_V <- mr.mash(X, Y, S0mix, w0, V_est, update_w0=TRUE, + update_w0_method="EM", compute_ELBO=TRUE, + standardize=FALSE, verbose=FALSE, update_V=TRUE) + fit_V$progress <- fit_V$fitted <- fit_V$pve <- fit_V$G <- NULL + + fit_scaled_V <- mr.mash(X, Y, S0mix, w0, V_est, update_w0=TRUE, + update_w0_method="EM", compute_ELBO=TRUE, + standardize=TRUE, verbose=FALSE, update_V=TRUE) + fit_scaled_V$progress <- fit_scaled_V$fitted <- fit_scaled_V$pve <- fit_scaled_V$G <- NULL + + fit_scaled_V_declogBF <- mr.mash(X, Y, S0mix, w0, V_est, update_w0=TRUE, + update_w0_method="EM", compute_ELBO=TRUE, + standardize=TRUE, verbose=FALSE, update_V=TRUE, + ca_update_order="decreasing_logBF") + fit_scaled_V_declogBF$progress <- fit_scaled_V_declogBF$fitted <- fit_scaled_V_declogBF$pve <- fit_scaled_V_declogBF$G <- NULL + + + ###Fit mr.mash.rss + out <- compute_univariate_sumstats(X=X, Y=Y, standardize=FALSE, standardize.response=FALSE, mc.cores=1) + R <- cor(X) + X_colMeans <- colMeans(X) + Y_colMeans <- colMeans(Y) + + fit_rss <- mr.mash.rss(Bhat=out$Bhat, Shat=out$Shat, covY=V_est, R=R, n=n, S0=S0mix, + w0=w0, V=V_est, update_w0=TRUE, compute_ELBO=TRUE, standardize=FALSE, + verbose=FALSE, update_V=FALSE, X_colmeans=X_colMeans, Y_colmeans=Y_colMeans) + fit_rss$progress <- NULL + + fit_scaled_rss <- mr.mash.rss(Bhat=out$Bhat, Shat=out$Shat, covY=V_est, R=R, n=n, S0=S0mix, + w0=w0, V=V_est, update_w0=TRUE, compute_ELBO=TRUE, + standardize=TRUE, verbose=FALSE, update_V=FALSE, + X_colmeans=X_colMeans, Y_colmeans=Y_colMeans) + fit_scaled_rss$progress <- NULL + + fit_V_rss <- mr.mash.rss(Bhat=out$Bhat, Shat=out$Shat, covY=V_est, R=R, n=n, S0=S0mix, + w0=w0, V=V_est, update_w0=TRUE, compute_ELBO=TRUE, + standardize=FALSE, verbose=FALSE, update_V=TRUE, + X_colmeans=X_colMeans, Y_colmeans=Y_colMeans) + fit_V_rss$progress <- NULL + + + fit_scaled_V_rss <- mr.mash.rss(Bhat=out$Bhat, Shat=out$Shat, covY=V_est, R=R, n=n, S0=S0mix, + w0=w0, V=V_est, update_w0=TRUE, compute_ELBO=TRUE, + standardize=TRUE, verbose=FALSE, update_V=TRUE, + X_colmeans=X_colMeans, Y_colmeans=Y_colMeans) + fit_scaled_V_rss$progress <- NULL + + + fit_scaled_V_declogBF_rss <- mr.mash.rss(Bhat=out$Bhat, Shat=out$Shat, covY=V_est, R=R, n=n, S0=S0mix, + w0=w0, V=V_est, update_w0=TRUE, compute_ELBO=TRUE, + standardize=TRUE, verbose=FALSE, update_V=TRUE, + ca_update_order="decreasing_logBF", + X_colmeans=X_colMeans, Y_colmeans=Y_colMeans) + fit_scaled_V_declogBF_rss$progress <- NULL + + + + + ###Tests + expect_equal(unclass(fit), unclass(fit_rss), tolerance=1e-10, scale=1) + expect_equal(unclass(fit_scaled), unclass(fit_scaled_rss), tolerance=1e-10, scale=1) + expect_equal(unclass(fit_V), unclass(fit_V_rss), tolerance=1e-10, scale=1) + expect_equal(unclass(fit_scaled_V), unclass(fit_scaled_V_rss), tolerance=1e-10, scale=1) + expect_equal(unclass(fit_scaled_V_declogBF), unclass(fit_scaled_V_declogBF_rss), tolerance=1e-10, scale=1) +}) From e2dc8acc7a0d88468511d995636492959d136348 Mon Sep 17 00:00:00 2001 From: fmorgante Date: Fri, 31 Mar 2023 12:32:04 -0400 Subject: [PATCH 077/103] Bump up version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index dac8673..90d8fa0 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Encoding: UTF-8 Type: Package Package: mr.mash.alpha -Version: 0.3-11 -Date: 2023-03-30 +Version: 0.3-12 +Date: 2023-03-31 Title: Multiple Regression with Multivariate Adaptive Shrinkage Description: Provides an implementation of methods for multivariate multiple regression with adaptive shrinkage priors. From 230d4bd825e1c5a8ddaaf9780fb05ae201695634 Mon Sep 17 00:00:00 2001 From: fmorgante Date: Fri, 31 Mar 2023 17:04:19 -0400 Subject: [PATCH 078/103] Update README and run pkgdown::build_site() --- README.md | 4 +- docs/404.html | 6 +- docs/LICENSE-text.html | 8 +- docs/authors.html | 20 +- docs/index.html | 25 +- docs/pkgdown.yml | 6 +- docs/reference/autoselect.mixsd.html | 15 +- docs/reference/coef.mr.mash.html | 17 +- docs/reference/coef.mr.mash.rss.html | 98 +++++ docs/reference/compute_canonical_covs.html | 25 +- docs/reference/compute_data_driven_covs.html | 37 +- .../compute_univariate_sumstats.html | 36 +- docs/reference/expand_covs.html | 17 +- docs/reference/index.html | 19 +- docs/reference/mr.mash.html | 408 +++++++----------- docs/reference/mr.mash.rss.html | 384 +++++++++++++++++ docs/reference/predict.mr.mash.html | 19 +- docs/reference/predict.mr.mash.rss.html | 101 +++++ docs/reference/simulate_mr_mash_data.html | 112 +++-- docs/sitemap.xml | 9 + 20 files changed, 1001 insertions(+), 365 deletions(-) create mode 100644 docs/reference/coef.mr.mash.rss.html create mode 100644 docs/reference/mr.mash.rss.html create mode 100644 docs/reference/predict.mr.mash.rss.html diff --git a/README.md b/README.md index 9745ddc..d45361a 100644 --- a/README.md +++ b/README.md @@ -35,8 +35,8 @@ repository useful for your work, please cite: ## License -Copyright (c) 2020-2022, Fabio Morgante, Peter Carbonetto and Matthew -Stephens. +Copyright (c) 2020-2023, Fabio Morgante, Deborah Kunkel, Peter Carbonetto +and Matthew Stephens. All source code and software in this repository are made available under the terms of the [MIT license][mit-license]. diff --git a/docs/404.html b/docs/404.html index 06f5ec5..f712146 100644 --- a/docs/404.html +++ b/docs/404.html @@ -32,7 +32,7 @@ mr.mash.alpha - 0.2-21 + 0.3-12 @@ -80,12 +80,12 @@

Page not found (404)

-

Site built with pkgdown 2.0.2.

+

Site built with pkgdown 2.0.7.

diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html index 6891c66..4ac8e86 100644 --- a/docs/LICENSE-text.html +++ b/docs/LICENSE-text.html @@ -17,7 +17,7 @@ mr.mash.alpha - 0.2-21 + 0.3-12 @@ -44,7 +44,7 @@

License

YEAR: 2020
-COPYRIGHT HOLDER: Fabio Morgante, Peter Carbonetto, Matthew Stephens
+COPYRIGHT HOLDER: Fabio Morgante, Deborah Kunkel, Peter Carbonetto, Matthew Stephens
 
@@ -58,11 +58,11 @@

License

-

Site built with pkgdown 2.0.2.

+

Site built with pkgdown 2.0.7.

diff --git a/docs/authors.html b/docs/authors.html index 48c1e81..2a099b4 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -17,7 +17,7 @@ mr.mash.alpha - 0.2-21 + 0.3-12 @@ -49,6 +49,10 @@

Authors

Fabio Morgante. Maintainer, author.

+
  • +

    Deborah Kunkel. Author. +

    +
  • Peter Carbonetto. Author.

    @@ -66,15 +70,15 @@

    Citation

    -

    Morgante F, Carbonetto P, Stephens M (2022). +

    Morgante F, Kunkel D, Carbonetto P, Stephens M (2023). mr.mash.alpha: Multiple Regression with Multivariate Adaptive Shrinkage. -R package version 0.2-21, https://github.com/stephenslab/mr.mash.alpha. +R package version 0.3-12, https://github.com/stephenslab/mr.mash.alpha.

    @Manual{,
       title = {mr.mash.alpha: Multiple Regression with Multivariate Adaptive Shrinkage},
    -  author = {Fabio Morgante and Peter Carbonetto and Matthew Stephens},
    -  year = {2022},
    -  note = {R package version 0.2-21},
    +  author = {Fabio Morgante and Deborah Kunkel and Peter Carbonetto and Matthew Stephens},
    +  year = {2023},
    +  note = {R package version 0.3-12},
       url = {https://github.com/stephenslab/mr.mash.alpha},
     }
    @@ -85,11 +89,11 @@

    Citation

    -

    Site built with pkgdown 2.0.2.

    +

    Site built with pkgdown 2.0.7.

    diff --git a/docs/index.html b/docs/index.html index 30145b8..2b2425d 100644 --- a/docs/index.html +++ b/docs/index.html @@ -34,7 +34,7 @@ mr.mash.alpha - 0.2-21 + 0.3-12 @@ -64,19 +64,31 @@
    - +
    +

    Citing this work +

    +

    If you find the mr.mash.alpha package or any of the source code in this repository useful for your work, please cite: > Morgante, F., Carbonetto, P., Wang, G., Zou, Y., Sarkar, A. & > Stephens, M. (2022). A flexible empirical Bayes approach to > multivariate multiple regression, and its improved accuracy > in predicting multi-tissue gene expression from genotypes. > bioRxiv https://doi.org/10.1101/2022.11.22.517471

    +
    +
    +

    License +

    +

    Copyright (c) 2020-2023, Fabio Morgante, Deborah Kunkel, Peter Carbonetto and Matthew Stephens.

    +

    All source code and software in this repository are made available under the terms of the MIT license.

    +
    @@ -109,6 +121,7 @@

    Citation

    Developers

    • Fabio Morgante
      Maintainer, author
    • +
    • Deborah Kunkel
      Author
    • Peter Carbonetto
      Author
    • Matthew Stephens
      Author
    @@ -122,12 +135,12 @@

    Developers

    -

    Site built with pkgdown 2.0.2.

    +

    Site built with pkgdown 2.0.7.

    diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index 94ff745..595982a 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -1,6 +1,6 @@ -pandoc: 2.3.1 -pkgdown: 2.0.2 +pandoc: 3.1.1 +pkgdown: 2.0.7 pkgdown_sha: ~ articles: {} -last_built: 2022-04-14T19:26Z +last_built: 2023-03-31T21:00Z diff --git a/docs/reference/autoselect.mixsd.html b/docs/reference/autoselect.mixsd.html index 5755e42..d720506 100644 --- a/docs/reference/autoselect.mixsd.html +++ b/docs/reference/autoselect.mixsd.html @@ -18,7 +18,7 @@ mr.mash.alpha - 0.2-21 + 0.3-12
    @@ -52,7 +52,7 @@

    Compute a grid of standard deviations to scale the canonical covariance matr
    -
    autoselect.mixsd(data, mult = 2)
    +
    autoselect.mixsd(data, mult = 2)
    @@ -60,12 +60,17 @@

    Arguments

    data

    a list with two elements. 1 - Bhat, a numeric vector of regression coefficients. 2 - Shat, a numeric vector of of standard erros for the regression coefficients.

    + +
    mult

    a scalar affecting how dense the resulting grid of standard deviations will be.

    +

    Value

    -

    A numeric vector of standard deviations.

    + + +

    A numeric vector of standard deviations.

    @@ -76,11 +81,11 @@

    Value

    -

    Site built with pkgdown 2.0.2.

    +

    Site built with pkgdown 2.0.7.

    diff --git a/docs/reference/coef.mr.mash.html b/docs/reference/coef.mr.mash.html index dceed6d..a09264b 100644 --- a/docs/reference/coef.mr.mash.html +++ b/docs/reference/coef.mr.mash.html @@ -17,7 +17,7 @@ mr.mash.alpha - 0.2-21 + 0.3-12 @@ -50,20 +50,25 @@

    Extract coefficients from mr.mash fit.

    -
    # S3 method for mr.mash
    -coef(object, ...)
    +
    # S3 method for mr.mash
    +coef(object, ...)

    Arguments

    object

    a mr.mash fit.

    + +
    ...

    Other arguments (not used).

    +

    Value

    -

    (p+1) x r matrix of coefficients.

    + + +

    (p+1) x r matrix of coefficients.

    @@ -74,11 +79,11 @@

    Value

    -

    Site built with pkgdown 2.0.2.

    +

    Site built with pkgdown 2.0.7.

    diff --git a/docs/reference/coef.mr.mash.rss.html b/docs/reference/coef.mr.mash.rss.html new file mode 100644 index 0000000..5cc1b78 --- /dev/null +++ b/docs/reference/coef.mr.mash.rss.html @@ -0,0 +1,98 @@ + +Extract coefficients from mr.mash.rss fit. — coef.mr.mash.rss • mr.mash.alpha + + +
    +
    + + + +
    +
    + + +
    +

    Extract coefficients from mr.mash.rss fit.

    +
    + +
    +
    # S3 method for mr.mash.rss
    +coef(object, ...)
    +
    + +
    +

    Arguments

    +
    object
    +

    a mr.mash fit.

    + + +
    ...
    +

    Other arguments (not used).

    + +
    +
    +

    Value

    + + +

    p x r or (p+1) x r matrix of coefficients, + depending on whether an intercept was computed.

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/compute_canonical_covs.html b/docs/reference/compute_canonical_covs.html index 4fda6db..0844c5d 100644 --- a/docs/reference/compute_canonical_covs.html +++ b/docs/reference/compute_canonical_covs.html @@ -17,7 +17,7 @@ mr.mash.alpha - 0.2-21 + 0.3-12 @@ -50,27 +50,34 @@

    Compute canonical covariance matrices.

    -
    compute_canonical_covs(
    -  r,
    -  singletons = TRUE,
    -  hetgrid = c(0, 0.25, 0.5, 0.75, 1)
    -)
    +
    compute_canonical_covs(
    +  r,
    +  singletons = TRUE,
    +  hetgrid = c(0, 0.25, 0.5, 0.75, 1)
    +)

    Arguments

    r

    number of responses.

    + +
    singletons

    if TRUE, the response-specific effect matrices will be included.

    + +
    hetgrid

    scalar or numeric vector of positive correlation [0, 1] of the effects across responses.

    +

    Value

    -

    A list containing the canonical covariance matrices.

    + + +

    A list containing the canonical covariance matrices.

    @@ -81,11 +88,11 @@

    Value

    -

    Site built with pkgdown 2.0.2.

    +

    Site built with pkgdown 2.0.7.

    diff --git a/docs/reference/compute_data_driven_covs.html b/docs/reference/compute_data_driven_covs.html index 827fe48..3b97a2e 100644 --- a/docs/reference/compute_data_driven_covs.html +++ b/docs/reference/compute_data_driven_covs.html @@ -18,7 +18,7 @@ mr.mash.alpha - 0.2-21 + 0.3-12 @@ -52,14 +52,14 @@

    Compute data-driven covariance matrices.

    -
    compute_data_driven_covs(
    -  sumstats,
    -  subset_thresh = NULL,
    -  n_pcs = 3,
    -  flash_factors = c("default", "nonneg"),
    -  flash_remove_singleton = FALSE,
    -  Gamma = diag(ncol(sumstats$Bhat))
    -)
    +
    compute_data_driven_covs(
    +  sumstats,
    +  subset_thresh = NULL,
    +  n_pcs = 3,
    +  flash_factors = c("default", "nonneg"),
    +  flash_remove_singleton = FALSE,
    +  Gamma = diag(ncol(sumstats$Bhat))
    +)
    @@ -67,23 +67,36 @@

    Arguments

    sumstats

    a list with two elements. 1 - Bhat, a numeric vector of regression coefficients. 2 - Shat, a numeric vector of of standard erros for the regression coefficients.

    + +
    subset_thresh

    scalar indicating the threshold for selecting the effects to be used for computing the covariance matrices based on false local sign rate (lfsr) for a response-by-response ash analysis.

    + +
    n_pcs

    indicating the number of principal components to be selected.

    + +
    flash_factors

    factors "default" to use flashr default function to initialize factors, currently udv_si. "nonneg" to implement a non-negative constraint on the factors

    + +
    flash_remove_singleton

    whether or not factors corresponding to singleton matrices should be removed from output.

    + +
    Gamma

    an r x r correlation matrix for the residuals; must be positive definite.

    +

    Value

    -

    A list containing the (de-noised) data-driven covariance matrices.

    + + +

    A list containing the (de-noised) data-driven covariance matrices.

    @@ -94,11 +107,11 @@

    Value

    -

    Site built with pkgdown 2.0.2.

    +

    Site built with pkgdown 2.0.7.

    diff --git a/docs/reference/compute_univariate_sumstats.html b/docs/reference/compute_univariate_sumstats.html index 75dd281..7206692 100644 --- a/docs/reference/compute_univariate_sumstats.html +++ b/docs/reference/compute_univariate_sumstats.html @@ -18,7 +18,7 @@ mr.mash.alpha - 0.2-21 + 0.3-12 @@ -52,37 +52,51 @@

    Compute summary statistics from univariate simple linear regression.

    -
    compute_univariate_sumstats(
    -  X,
    -  Y,
    -  standardize = FALSE,
    -  standardize.response = FALSE,
    -  mc.cores = 1
    -)
    +
    compute_univariate_sumstats(
    +  X,
    +  Y,
    +  standardize = FALSE,
    +  standardize.response = FALSE,
    +  mc.cores = 1
    +)

    Arguments

    X

    n x p matrix of covariates.

    + +
    Y

    n x r matrix of responses.

    + +
    standardize

    If TRUE, X is "standardized" using the sample means and sample standard deviations.

    + +
    standardize.response

    If TRUE, Y is "standardized" using the sample means and sample standard deviations.

    + +
    mc.cores

    Number of cores to use. Parallelization is done over responses.

    +

    Value

    -

    A list with following elements:

    + + +

    A list with following elements:

    Bhat

    p x r matrix of the regression coeffcients.

    + +
    Shat

    p x r matrix of the standard errors for regression coeffcients.

    +
    @@ -93,11 +107,11 @@

    Value

    -

    Site built with pkgdown 2.0.2.

    +

    Site built with pkgdown 2.0.7.

    diff --git a/docs/reference/expand_covs.html b/docs/reference/expand_covs.html index 0c879b5..8577dbf 100644 --- a/docs/reference/expand_covs.html +++ b/docs/reference/expand_covs.html @@ -17,7 +17,7 @@ mr.mash.alpha - 0.2-21 + 0.3-12 @@ -50,21 +50,28 @@

    Expand covariance matrices by a grid of variances for use in mr.mash
    -
    expand_covs(mats, grid, zeromat = TRUE)
    +
    expand_covs(mats, grid, zeromat = TRUE)

    Arguments

    mats

    a list of covariance matrices.

    + +
    grid

    scalar or numeric vector of variances of the effects.

    + +
    zeromat

    if TRUE, the no-effect matrix will be included.

    +

    Value

    -

    A list containing the scaled covariance matrices.

    + + +

    A list containing the scaled covariance matrices.

    @@ -75,11 +82,11 @@

    Value

    -

    Site built with pkgdown 2.0.2.

    +

    Site built with pkgdown 2.0.7.

    diff --git a/docs/reference/index.html b/docs/reference/index.html index 2b45a0c..06a9c40 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -17,7 +17,7 @@ mr.mash.alpha - 0.2-21 + 0.3-12 @@ -55,6 +55,10 @@

    All functions coef(<mr.mash>)

    Extract coefficients from mr.mash fit.

    + +

    coef(<mr.mash.rss>)

    + +

    Extract coefficients from mr.mash.rss fit.

    compute_canonical_covs()

    @@ -75,10 +79,19 @@

    All functions mr.mash()

    Multiple Regression with Multivariate Adaptive Shrinkage.

    + +

    mr.mash.rss()

    + +

    Multiple Regression with Multivariate Adaptive Shrinkage + from summary data.

    predict(<mr.mash>)

    Predict future observations from mr.mash fit.

    + +

    predict(<mr.mash.rss>)

    + +

    Predict future observations from mr.mash.rss fit.

    simulate_mr_mash_data()

    @@ -92,11 +105,11 @@

    All functions
    -

    Site built with pkgdown 2.0.2.

    +

    Site built with pkgdown 2.0.7.

    diff --git a/docs/reference/mr.mash.html b/docs/reference/mr.mash.html index 3357643..48cfa2b 100644 --- a/docs/reference/mr.mash.html +++ b/docs/reference/mr.mash.html @@ -18,7 +18,7 @@ mr.mash.alpha - 0.2-21 + 0.3-12 @@ -52,117 +52,168 @@

    Multiple Regression with Multivariate Adaptive Shrinkage.

    -
    mr.mash(
    -  X,
    -  Y,
    -  S0,
    -  w0 = rep(1/(length(S0)), length(S0)),
    -  V = NULL,
    -  mu1_init = matrix(0, nrow = ncol(X), ncol = ncol(Y)),
    -  tol = 0.0001,
    -  convergence_criterion = c("mu1", "ELBO"),
    -  max_iter = 5000,
    -  update_w0 = TRUE,
    -  update_w0_method = "EM",
    -  w0_threshold = 0,
    -  compute_ELBO = TRUE,
    -  standardize = TRUE,
    -  verbose = TRUE,
    -  update_V = FALSE,
    -  update_V_method = c("full", "diagonal"),
    -  version = c("Rcpp", "R"),
    -  e = 1e-08,
    -  ca_update_order = c("consecutive", "decreasing_logBF", "increasing_logBF"),
    -  nthreads = as.integer(NA)
    -)
    +
    mr.mash(
    +  X,
    +  Y,
    +  S0,
    +  w0 = rep(1/(length(S0)), length(S0)),
    +  V = NULL,
    +  mu1_init = matrix(0, nrow = ncol(X), ncol = ncol(Y)),
    +  tol = 1e-04,
    +  convergence_criterion = c("mu1", "ELBO"),
    +  max_iter = 5000,
    +  update_w0 = TRUE,
    +  update_w0_method = "EM",
    +  w0_threshold = 0,
    +  compute_ELBO = TRUE,
    +  standardize = TRUE,
    +  verbose = TRUE,
    +  update_V = FALSE,
    +  update_V_method = c("full", "diagonal"),
    +  version = c("Rcpp", "R"),
    +  e = 1e-08,
    +  ca_update_order = c("consecutive", "decreasing_logBF", "increasing_logBF", "random"),
    +  nthreads = as.integer(NA)
    +)

    Arguments

    X

    n x p matrix of covariates.

    + +
    Y

    n x r matrix of responses.

    + +
    S0

    List of length K containing the desired r x r prior covariance matrices on the regression coefficients.

    + +
    w0

    K-vector with prior mixture weights, each associated with the respective covariance matrix in S0.

    + +
    V

    r x r residual covariance matrix.

    + +
    mu1_init

    p x r matrix of initial estimates of the posterior mean regression coefficients. These should be on the same scale as the X provided. If standardize=TRUE, mu1_init will be scaled appropriately after standardizing X.

    + +
    tol

    Convergence tolerance.

    + +
    convergence_criterion

    Criterion to use for convergence check.

    + +
    max_iter

    Maximum number of iterations for the optimization algorithm.

    + +
    update_w0

    If TRUE, prior weights are updated.

    + +
    update_w0_method

    Method to update prior weights. Only EM is currently supported.

    + +
    w0_threshold

    Drop mixture components with weight less than this value. Components are dropped at each iteration after 15 initial iterations. This is done to prevent from dropping some poetentially important components prematurely.

    + +
    compute_ELBO

    If TRUE, ELBO is computed.

    + +
    standardize

    If TRUE, X is "standardized" using the sample means and sample standard deviations. Standardizing X allows a faster implementation, but the prior has a different interpretation. Coefficients and covariances are returned on the original scale.

    + +
    verbose

    If TRUE, some information about the algorithm's process is printed at each iteration.

    + +
    update_V

    if TRUE, residual covariance is updated.

    + +
    update_V_method

    Method to update residual covariance. So far, "full" and "diagonal" are supported. If update_V=TRUE and V is not provided by the user, this option will determine how V is computed (and fixed) internally from mu1_init.

    + +
    version

    Whether to use R or C++ code to perform the coordinate ascent updates.

    + +
    e

    A small number to add to the diagonal elements of the prior matrices to improve numerical stability of the updates.

    + +
    ca_update_order

    The order with which coordinates are updated. So far, "consecutive", "decreasing_logBF", -"increasing_logBF" are supported.

    +"increasing_logBF", "random" are supported.

    + +
    nthreads

    Number of RcppParallel threads to use for the updates. When nthreads is NA, the default number of threads is used; see defaultNumThreads. This setting is ignored when version = "R".

    +

    Value

    -

    A mr.mash fit, stored as a list with some or all of the + + +

    A mr.mash fit, stored as a list with some or all of the following elements:

    mu1

    p x r matrix of posterior means for the regression coeffcients.

    + +
    S1

    r x r x p array of posterior covariances for the regression coeffcients.

    + +
    w1

    p x K matrix of posterior assignment probabilities to the mixture components.

    + +
    V

    r x r residual covariance matrix

    + +
    w0

    K-vector with (updated, if update_w0=TRUE) prior mixture weights, each associated with the respective covariance matrix in S0

    @@ -174,256 +225,101 @@

    Value

    intercept

    r-vector containing posterior mean estimate of the intercept.

    + +
    fitted

    n x r matrix of fitted values.

    + +
    G

    r x r covariance matrix of fitted values.

    + +
    pve

    r-vector of proportion of variance explained by the covariates.

    + +
    ELBO

    Evidence Lower Bound (ELBO) at last iteration.

    + +
    progress

    A data frame including information regarding convergence criteria at each iteration.

    + +
    converged

    TRUE or FALSE, indicating whether the optimization algorithm converged to a solution within the chosen tolerance level.

    + +
    Y

    n x r matrix of responses at last iteration (only relevant when missing values are present in the input Y).

    +

    Examples

    -
    ###Set seed
    -set.seed(123)
    -
    -###Simulate X and Y
    -##Set parameters
    -n  <- 1000
    -p <- 100
    -p_causal <- 20
    -r <- 5
    -
    -###Simulate data
    -out <- simulate_mr_mash_data(n, p, p_causal, r, pve=0.5, B_cor=1,
    -                             B_scale=1, X_cor=0, X_scale=1, V_cor=0)
    -
    -###Split the data in training and test sets
    -Ytrain <- out$Y[-c(1:200), ]
    -Xtrain <- out$X[-c(1:200), ]
    -Ytest <- out$Y[c(1:200), ]
    -Xtest <- out$X[c(1:200), ]
    -
    -###Specify the covariance matrices for the mixture-of-normals prior.
    -univ_sumstats <- compute_univariate_sumstats(Xtrain, Ytrain,
    -                   standardize=TRUE, standardize.response=FALSE)
    -grid <- autoselect.mixsd(univ_sumstats, mult=sqrt(2))^2
    -S0 <- compute_canonical_covs(ncol(Ytrain), singletons=TRUE,
    -                             hetgrid=c(0, 0.25, 0.5, 0.75, 1))
    -S0 <- expand_covs(S0, grid, zeromat=TRUE)
    -
    -###Fit mr.mash
    -fit <- mr.mash(Xtrain, Ytrain, S0, update_V=TRUE)
    -#> Processing the inputs... Done!
    -#> Fitting the optimization algorithm using 4 RcppParallel threads... 
    -#>  iter    mu1_max.diff     ELBO_diff               ELBO
    -#>    1       1.71e+00            Inf      -1.17642529463347200362e+04
    -#>    2       3.16e-01       3.86e+02      -1.13785637059042273904e+04
    -#>    3       7.32e-02       2.38e+01      -1.13547399263873048767e+04
    -#>    4       2.64e-02       2.75e+00      -1.13519874963371403283e+04
    -#>    5       1.63e-02       1.28e+00      -1.13507113883505189733e+04
    -#>    6       1.23e-02       7.76e-01      -1.13499353194769155380e+04
    -#>    7       9.55e-03       5.23e-01      -1.13494121034913623589e+04
    -#>    8       7.63e-03       3.77e-01      -1.13490351448482251726e+04
    -#>    9       6.23e-03       2.85e-01      -1.13487496705359881162e+04
    -#>   10       5.18e-03       2.25e-01      -1.13485247759129561018e+04
    -#>   11       4.38e-03       1.83e-01      -1.13483418498666960659e+04
    -#>   12       3.76e-03       1.53e-01      -1.13481890973898334778e+04
    -#>   13       3.27e-03       1.30e-01      -1.13480587225331200898e+04
    -#>   14       2.87e-03       1.13e-01      -1.13479453880556593504e+04
    -#>   15       2.54e-03       1.00e-01      -1.13478453302035195520e+04
    -#>   16       2.27e-03       8.95e-02      -1.13477558277724110667e+04
    -#>   17       2.05e-03       8.10e-02      -1.13476748714051100251e+04
    -#>   18       1.86e-03       7.39e-02      -1.13476009505299771263e+04
    -#>   19       1.70e-03       6.80e-02      -1.13475329118713616481e+04
    -#>   20       1.56e-03       6.30e-02      -1.13474698629157774121e+04
    -#>   21       1.44e-03       5.88e-02      -1.13474111044581350143e+04
    -#>   22       1.34e-03       5.50e-02      -1.13473560824759751995e+04
    -#>   23       1.24e-03       5.17e-02      -1.13473043531740622711e+04
    -#>   24       1.16e-03       4.88e-02      -1.13472555572093006049e+04
    -#>   25       1.15e-03       4.62e-02      -1.13472094004458704148e+04
    -#>   26       1.14e-03       4.38e-02      -1.13471656394390265632e+04
    -#>   27       1.13e-03       4.16e-02      -1.13471240703959247185e+04
    -#>   28       1.11e-03       3.95e-02      -1.13470845207260881580e+04
    -#>   29       1.09e-03       3.77e-02      -1.13470468425409617339e+04
    -#>   30       1.06e-03       3.59e-02      -1.13470109076327444200e+04
    -#>   31       1.03e-03       3.43e-02      -1.13469766035832471971e+04
    -#>   32       9.99e-04       3.28e-02      -1.13469438307399304904e+04
    -#>   33       9.66e-04       3.13e-02      -1.13469124998599418177e+04
    -#>   34       9.32e-04       3.00e-02      -1.13468825302697750885e+04
    -#>   35       8.97e-04       2.87e-02      -1.13468538484235305077e+04
    -#>   36       8.62e-04       2.75e-02      -1.13468263867695677618e+04
    -#>   37       8.45e-04       2.63e-02      -1.13468000828559906950e+04
    -#>   38       8.27e-04       2.52e-02      -1.13467748786209594982e+04
    -#>   39       8.08e-04       2.42e-02      -1.13467507198262501333e+04
    -#>   40       7.88e-04       2.32e-02      -1.13467275556018321367e+04
    -#>   41       7.69e-04       2.22e-02      -1.13467053380766210466e+04
    -#>   42       7.48e-04       2.13e-02      -1.13466840220762533136e+04
    -#>   43       7.28e-04       2.05e-02      -1.13466635648730698449e+04
    -#>   44       7.08e-04       1.96e-02      -1.13466439259770686476e+04
    -#>   45       6.87e-04       1.89e-02      -1.13466250669590608595e+04
    -#>   46       6.67e-04       1.81e-02      -1.13466069512992762611e+04
    -#>   47       6.46e-04       1.74e-02      -1.13465895442564869882e+04
    -#>   48       6.26e-04       1.67e-02      -1.13465728127534348459e+04
    -#>   49       6.06e-04       1.61e-02      -1.13465567252757282404e+04
    -#>   50       5.86e-04       1.55e-02      -1.13465412517817003391e+04
    -#>   51       5.67e-04       1.49e-02      -1.13465263636214604048e+04
    -#>   52       5.48e-04       1.43e-02      -1.13465120334636267216e+04
    -#>   53       5.29e-04       1.38e-02      -1.13464982352285896923e+04
    -#>   54       5.10e-04       1.33e-02      -1.13464849440273919754e+04
    -#>   55       4.92e-04       1.28e-02      -1.13464721361054143927e+04
    -#>   56       4.74e-04       1.23e-02      -1.13464597887902673392e+04
    -#>   57       4.57e-04       1.19e-02      -1.13464478804433110781e+04
    -#>   58       4.40e-04       1.15e-02      -1.13464363904144338449e+04
    -#>   59       4.23e-04       1.11e-02      -1.13464252989995929966e+04
    -#>   60       4.07e-04       1.07e-02      -1.13464145874008609098e+04
    -#>   61       3.91e-04       1.03e-02      -1.13464042376887045975e+04
    -#>   62       3.76e-04       1.00e-02      -1.13463942327661479794e+04
    -#>   63       3.61e-04       9.68e-03      -1.13463845563347349525e+04
    -#>   64       3.47e-04       9.36e-03      -1.13463751928620313265e+04
    -#>   65       3.32e-04       9.07e-03      -1.13463661275504564401e+04
    -#>   66       3.19e-04       8.78e-03      -1.13463573463074662868e+04
    -#>   67       3.06e-04       8.51e-03      -1.13463488357167952927e+04
    -#>   68       2.93e-04       8.25e-03      -1.13463405830108058581e+04
    -#>   69       2.80e-04       8.01e-03      -1.13463325760437292047e+04
    -#>   70       2.68e-04       7.77e-03      -1.13463248032658666489e+04
    -#>   71       2.57e-04       7.55e-03      -1.13463172536985803163e+04
    -#>   72       2.45e-04       7.34e-03      -1.13463099169100987638e+04
    -#>   73       2.34e-04       7.13e-03      -1.13463027829920665681e+04
    -#>   74       2.24e-04       6.94e-03      -1.13462958425368451572e+04
    -#>   75       2.14e-04       6.76e-03      -1.13462890866154957621e+04
    -#>   76       2.04e-04       6.58e-03      -1.13462825067564572237e+04
    -#>   77       1.95e-04       6.41e-03      -1.13462760949249059195e+04
    -#>   78       1.86e-04       6.25e-03      -1.13462698435027650703e+04
    -#>   79       1.80e-04       6.10e-03      -1.13462637452693834348e+04
    -#>   80       1.74e-04       5.95e-03      -1.13462577933828670211e+04
    -#>   81       1.69e-04       5.81e-03      -1.13462519813620401692e+04
    -#>   82       1.65e-04       5.68e-03      -1.13462463030690068990e+04
    -#>   83       1.60e-04       5.55e-03      -1.13462407526924489503e+04
    -#>   84       1.55e-04       5.43e-03      -1.13462353247314367763e+04
    -#>   85       1.51e-04       5.31e-03      -1.13462300139798917371e+04
    -#>   86       1.47e-04       5.20e-03      -1.13462248155116994894e+04
    -#>   87       1.43e-04       5.09e-03      -1.13462197246663490660e+04
    -#>   88       1.38e-04       4.99e-03      -1.13462147370352431608e+04
    -#>   89       1.35e-04       4.89e-03      -1.13462098484485013614e+04
    -#>   90       1.31e-04       4.79e-03      -1.13462050549624327687e+04
    -#>   91       1.27e-04       4.70e-03      -1.13462003528474579070e+04
    -#>   92       1.23e-04       4.61e-03      -1.13461957385766636435e+04
    -#>   93       1.20e-04       4.53e-03      -1.13461912088148419571e+04
    -#>   94       1.17e-04       4.45e-03      -1.13461867604080362071e+04
    -#>   95       1.13e-04       4.37e-03      -1.13461823903736094508e+04
    -#>   96       1.10e-04       4.29e-03      -1.13461780958908111643e+04
    -#>   97       1.13e-04       4.22e-03      -1.13461738742917714262e+04
    -#>   98       1.17e-04       4.15e-03      -1.13461697230529589433e+04
    -#>   99       1.20e-04       4.08e-03      -1.13461656397870901856e+04
    -#>  100       1.23e-04       4.02e-03      -1.13461616222354532511e+04
    -#>  101       1.25e-04       3.95e-03      -1.13461576682606155373e+04
    -#>  102       1.28e-04       3.89e-03      -1.13461537758395334095e+04
    -#>  103       1.30e-04       3.83e-03      -1.13461499430570183904e+04
    -#>  104       1.32e-04       3.77e-03      -1.13461461680995817005e+04
    -#>  105       1.34e-04       3.72e-03      -1.13461424492495752929e+04
    -#>  106       1.36e-04       3.66e-03      -1.13461387848796657636e+04
    -#>  107       1.38e-04       3.61e-03      -1.13461351734476611455e+04
    -#>  108       1.40e-04       3.56e-03      -1.13461316134915432485e+04
    -#>  109       1.41e-04       3.51e-03      -1.13461281036248619785e+04
    -#>  110       1.43e-04       3.46e-03      -1.13461246425323170115e+04
    -#>  111       1.44e-04       3.41e-03      -1.13461212289656486973e+04
    -#>  112       1.45e-04       3.37e-03      -1.13461178617397163180e+04
    -#>  113       1.46e-04       3.32e-03      -1.13461145397288491949e+04
    -#>  114       1.47e-04       3.28e-03      -1.13461112618633414968e+04
    -#>  115       1.48e-04       3.23e-03      -1.13461080271262308088e+04
    -#>  116       1.48e-04       3.19e-03      -1.13461048345501912991e+04
    -#>  117       1.49e-04       3.15e-03      -1.13461016832146342495e+04
    -#>  118       1.50e-04       3.11e-03      -1.13460985722429759335e+04
    -#>  119       1.50e-04       3.07e-03      -1.13460955008001055830e+04
    -#>  120       1.50e-04       3.03e-03      -1.13460924680898842780e+04
    -#>  121       1.51e-04       2.99e-03      -1.13460894733529294172e+04
    -#>  122       1.51e-04       2.96e-03      -1.13460865158644264739e+04
    -#>  123       1.51e-04       2.92e-03      -1.13460835949321244698e+04
    -#>  124       1.51e-04       2.89e-03      -1.13460807098944133031e+04
    -#>  125       1.51e-04       2.85e-03      -1.13460778601185193111e+04
    -#>  126       1.51e-04       2.82e-03      -1.13460750449988463515e+04
    -#>  127       1.50e-04       2.78e-03      -1.13460722639553459885e+04
    -#>  128       1.50e-04       2.75e-03      -1.13460695164320277399e+04
    -#>  129       1.50e-04       2.71e-03      -1.13460668018955493608e+04
    -#>  130       1.49e-04       2.68e-03      -1.13460641198338671529e+04
    -#>  131       1.49e-04       2.65e-03      -1.13460614697549990524e+04
    -#>  132       1.49e-04       2.62e-03      -1.13460588511858259153e+04
    -#>  133       1.48e-04       2.59e-03      -1.13460562636709782964e+04
    -#>  134       1.47e-04       2.56e-03      -1.13460537067717923492e+04
    -#>  135       1.47e-04       2.53e-03      -1.13460511800652911916e+04
    -#>  136       1.46e-04       2.50e-03      -1.13460486831433081534e+04
    -#>  137       1.45e-04       2.47e-03      -1.13460462156115299877e+04
    -#>  138       1.45e-04       2.44e-03      -1.13460437770887328952e+04
    -#>  139       1.44e-04       2.41e-03      -1.13460413672059548844e+04
    -#>  140       1.43e-04       2.38e-03      -1.13460389856057845464e+04
    -#>  141       1.42e-04       2.35e-03      -1.13460366319416516490e+04
    -#>  142       1.41e-04       2.33e-03      -1.13460343058771777578e+04
    -#>  143       1.40e-04       2.30e-03      -1.13460320070855414087e+04
    -#>  144       1.39e-04       2.27e-03      -1.13460297352489287732e+04
    -#>  145       1.38e-04       2.25e-03      -1.13460274900579624955e+04
    -#>  146       1.37e-04       2.22e-03      -1.13460252712111705478e+04
    -#>  147       1.36e-04       2.19e-03      -1.13460230784145296639e+04
    -#>  148       1.35e-04       2.17e-03      -1.13460209113809833070e+04
    -#>  149       1.34e-04       2.14e-03      -1.13460187698300251213e+04
    -#>  150       1.33e-04       2.12e-03      -1.13460166534872714692e+04
    -#>  151       1.32e-04       2.09e-03      -1.13460145620840958145e+04
    -#>  152       1.31e-04       2.07e-03      -1.13460124953572612867e+04
    -#>  153       1.30e-04       2.04e-03      -1.13460104530485768919e+04
    -#>  154       1.29e-04       2.02e-03      -1.13460084349045864656e+04
    -#>  155       1.28e-04       1.99e-03      -1.13460064406762630824e+04
    -#>  156       1.27e-04       1.97e-03      -1.13460044701187216560e+04
    -#>  157       1.25e-04       1.95e-03      -1.13460025229909697373e+04
    -#>  158       1.24e-04       1.92e-03      -1.13460005990556564939e+04
    -#>  159       1.23e-04       1.90e-03      -1.13459986980788125948e+04
    -#>  160       1.22e-04       1.88e-03      -1.13459968198296628543e+04
    -#>  161       1.21e-04       1.86e-03      -1.13459949640804261435e+04
    -#>  162       1.19e-04       1.83e-03      -1.13459931306060934730e+04
    -#>  163       1.18e-04       1.81e-03      -1.13459913191842588276e+04
    -#>  164       1.17e-04       1.79e-03      -1.13459895295949809224e+04
    -#>  165       1.16e-04       1.77e-03      -1.13459877616205958475e+04
    -#>  166       1.14e-04       1.75e-03      -1.13459860150455879193e+04
    -#>  167       1.13e-04       1.73e-03      -1.13459842896564441617e+04
    -#>  168       1.12e-04       1.70e-03      -1.13459825852415324334e+04
    -#>  169       1.11e-04       1.68e-03      -1.13459809015910159360e+04
    -#>  170       1.09e-04       1.66e-03      -1.13459792384966785903e+04
    -#>  171       1.08e-04       1.64e-03      -1.13459775957519086660e+04
    -#>  172       1.07e-04       1.62e-03      -1.13459759731515332533e+04
    -#>  173       1.06e-04       1.60e-03      -1.13459743704917909781e+04
    -#>  174       1.05e-04       1.58e-03      -1.13459727875702319579e+04
    -#>  175       1.03e-04       1.56e-03      -1.13459712241856323089e+04
    -#>  176       1.02e-04       1.54e-03      -1.13459696801379523095e+04
    -#>  177       1.01e-04       1.52e-03      -1.13459681552283000201e+04
    -#>  178       9.98e-05       1.51e-03      -1.13459666492588166875e+04
    -#> Done!
    -#> Processing the outputs... Done!
    -#> mr.mash successfully executed in 0.2228629 minutes!
    -
    -# Compare the "fitted" values of Y against the true Y in the training set.
    -plot(fit$fitted,Ytrain,pch = 20,col = "darkblue",xlab = "true",
    -     ylab = "fitted")
    -abline(a = 0,b = 1,col = "magenta",lty = "dotted")
    -
    -
    -# Predict the multivariate outcomes in the test set using the fitted model.
    -Ytest_est <- predict(fit,Xtest)
    -plot(Ytest_est,Ytest,pch = 20,col = "darkblue",xlab = "true",
    -     ylab = "predicted")
    -abline(a = 0,b = 1,col = "magenta",lty = "dotted")
    -
    -
    +    
    ###Set seed
    +set.seed(123)
    +
    +###Simulate X and Y
    +##Set parameters
    +n  <- 1000
    +p <- 100
    +p_causal <- 20
    +r <- 5
    +
    +###Simulate data
    +out <- simulate_mr_mash_data(n, p, p_causal, r, pve=0.5, B_cor=1,
    +                             B_scale=1, X_cor=0, X_scale=1, V_cor=0)
    +#> Error in simulate_mr_mash_data(n, p, p_causal, r, pve = 0.5, B_cor = 1,     B_scale = 1, X_cor = 0, X_scale = 1, V_cor = 0): seed argument must be provided.
    +
    +###Split the data in training and test sets
    +Ytrain <- out$Y[-c(1:200), ]
    +#> Error in eval(expr, envir, enclos): object 'out' not found
    +Xtrain <- out$X[-c(1:200), ]
    +#> Error in eval(expr, envir, enclos): object 'out' not found
    +Ytest <- out$Y[c(1:200), ]
    +#> Error in eval(expr, envir, enclos): object 'out' not found
    +Xtest <- out$X[c(1:200), ]
    +#> Error in eval(expr, envir, enclos): object 'out' not found
    +
    +###Specify the covariance matrices for the mixture-of-normals prior.
    +univ_sumstats <- compute_univariate_sumstats(Xtrain, Ytrain,
    +                   standardize=TRUE, standardize.response=FALSE)
    +#> Error in ncol(Y): object 'Ytrain' not found
    +grid <- autoselect.mixsd(univ_sumstats, mult=sqrt(2))^2
    +#> Error in autoselect.mixsd(univ_sumstats, mult = sqrt(2)): object 'univ_sumstats' not found
    +S0 <- compute_canonical_covs(ncol(Ytrain), singletons=TRUE,
    +                             hetgrid=c(0, 0.25, 0.5, 0.75, 1))
    +#> Error in ncol(Ytrain): object 'Ytrain' not found
    +S0 <- expand_covs(S0, grid, zeromat=TRUE)
    +#> Error in ncol(mats[[1]]): object 'S0' not found
    +
    +###Fit mr.mash
    +fit <- mr.mash(Xtrain, Ytrain, S0, update_V=TRUE)
    +#> Processing the inputs... 
    +#> Error in mr.mash(Xtrain, Ytrain, S0, update_V = TRUE): object 'Ytrain' not found
    +
    +# Compare the "fitted" values of Y against the true Y in the training set.
    +plot(fit$fitted,Ytrain,pch = 20,col = "darkblue",xlab = "true",
    +     ylab = "fitted")
    +#> Error in plot(fit$fitted, Ytrain, pch = 20, col = "darkblue", xlab = "true",     ylab = "fitted"): object 'fit' not found
    +abline(a = 0,b = 1,col = "magenta",lty = "dotted")
    +#> Error in int_abline(a = a, b = b, h = h, v = v, untf = untf, ...): plot.new has not been called yet
    +
    +# Predict the multivariate outcomes in the test set using the fitted model.
    +Ytest_est <- predict(fit,Xtest)
    +#> Error in predict(fit, Xtest): object 'fit' not found
    +plot(Ytest_est,Ytest,pch = 20,col = "darkblue",xlab = "true",
    +     ylab = "predicted")
    +#> Error in plot(Ytest_est, Ytest, pch = 20, col = "darkblue", xlab = "true",     ylab = "predicted"): object 'Ytest_est' not found
    +abline(a = 0,b = 1,col = "magenta",lty = "dotted")
    +#> Error in int_abline(a = a, b = b, h = h, v = v, untf = untf, ...): plot.new has not been called yet
    +
     
    @@ -434,11 +330,11 @@

    Examples

    -

    Site built with pkgdown 2.0.2.

    +

    Site built with pkgdown 2.0.7.

    diff --git a/docs/reference/mr.mash.rss.html b/docs/reference/mr.mash.rss.html new file mode 100644 index 0000000..129b924 --- /dev/null +++ b/docs/reference/mr.mash.rss.html @@ -0,0 +1,384 @@ + +Multiple Regression with Multivariate Adaptive Shrinkage + from summary data. — mr.mash.rss • mr.mash.alpha + + +
    +
    + + + +
    +
    + + +
    +

    Performs multivariate multiple regression with + mixture-of-normals prior.

    +
    + +
    +
    mr.mash.rss(
    +  Bhat,
    +  Shat,
    +  Z,
    +  R,
    +  covY,
    +  n,
    +  S0,
    +  w0 = rep(1/(length(S0)), length(S0)),
    +  V = NULL,
    +  mu1_init = NULL,
    +  tol = 1e-04,
    +  convergence_criterion = c("mu1", "ELBO"),
    +  max_iter = 5000,
    +  update_w0 = TRUE,
    +  update_w0_method = "EM",
    +  w0_threshold = 0,
    +  compute_ELBO = TRUE,
    +  standardize = TRUE,
    +  verbose = TRUE,
    +  update_V = FALSE,
    +  update_V_method = c("full", "diagonal"),
    +  version = c("Rcpp", "R"),
    +  e = 1e-08,
    +  ca_update_order = c("consecutive", "decreasing_logBF", "increasing_logBF", "random"),
    +  X_colmeans = NULL,
    +  Y_colmeans = NULL,
    +  check_R = TRUE,
    +  R_tol = 1e-08,
    +  nthreads = as.integer(NA)
    +)
    +
    + +
    +

    Arguments

    +
    Bhat
    +

    p x r matrix of regression coefficients from univariate +simple linear regression.

    + + +
    Shat
    +

    p x r matrix of standard errors of the regression coefficients +from univariate simple linear regression.

    + + +
    Z
    +

    p x r matrix of Z-scores from univariate +simple linear regression.

    + + +
    R
    +

    p x p correlation matrix among the variables.

    + + +
    covY
    +

    r x r covariance matrix across responses.

    + + +
    n
    +

    scalar indicating the sample size.

    + + +
    S0
    +

    List of length K containing the desired r x r prior +covariance matrices on the regression coefficients.

    + + +
    w0
    +

    K-vector with prior mixture weights, each associated with +the respective covariance matrix in S0.

    + + +
    V
    +

    r x r residual covariance matrix.

    + + +
    mu1_init
    +

    p x r matrix of initial estimates of the posterior +mean regression coefficients. These should be on the same scale as +the X provided. If standardize=TRUE, mu1_init will be scaled +appropriately after standardizing X.

    + + +
    tol
    +

    Convergence tolerance.

    + + +
    convergence_criterion
    +

    Criterion to use for convergence check.

    + + +
    max_iter
    +

    Maximum number of iterations for the optimization +algorithm.

    + + +
    update_w0
    +

    If TRUE, prior weights are updated.

    + + +
    update_w0_method
    +

    Method to update prior weights. Only EM is +currently supported.

    + + +
    w0_threshold
    +

    Drop mixture components with weight less than this value. +Components are dropped at each iteration after 15 initial iterations. +This is done to prevent from dropping some poetentially important +components prematurely.

    + + +
    compute_ELBO
    +

    If TRUE, ELBO is computed.

    + + +
    standardize
    +

    If TRUE, X is "standardized" using the +sample means and sample standard deviations. Standardizing X +allows a faster implementation, but the prior has a different +interpretation. Coefficients and covariances are returned on the +original scale.

    + + +
    verbose
    +

    If TRUE, some information about the +algorithm's process is printed at each iteration.

    + + +
    update_V
    +

    if TRUE, residual covariance is updated.

    + + +
    update_V_method
    +

    Method to update residual covariance. So far, +"full" and "diagonal" are supported. If update_V=TRUE and V +is not provided by the user, this option will determine how V is +computed (and fixed) internally from mu1_init.

    + + +
    version
    +

    Whether to use R or C++ code to perform the +coordinate ascent updates.

    + + +
    e
    +

    A small number to add to the diagonal elements of the +prior matrices to improve numerical stability of the updates.

    + + +
    ca_update_order
    +

    The order with which coordinates are +updated. So far, "consecutive", "decreasing_logBF", +"increasing_logBF", "random" are supported.

    + + +
    X_colmeans
    +

    a p-vector of variable means.

    + + +
    Y_colmeans
    +

    a r-vector of response means.

    + + +
    check_R
    +

    If TRUE, R is checked to be positive semidefinite.

    + + +
    R_tol
    +

    tolerance to declare positive semi-definiteness of R.

    + + +
    nthreads
    +

    Number of RcppParallel threads to use for the +updates. When nthreads is NA, the default number of +threads is used; see +defaultNumThreads. This setting is +ignored when version = "R".

    + +
    +
    +

    Value

    + + +

    A mr.mash.rss fit, stored as a list with some or all of the +following elements:

    +
    mu1
    +

    p x r matrix of posterior means for the regression + coeffcients.

    + + +
    S1
    +

    r x r x p array of posterior covariances for the + regression coeffcients.

    + + +
    w1
    +

    p x K matrix of posterior assignment probabilities to the + mixture components.

    + + +
    V
    +

    r x r residual covariance matrix

    + + +
    w0
    +

    K-vector with (updated, if update_w0=TRUE) prior mixture weights, each associated with + the respective covariance matrix in S0

    +

    .

    +
    S0
    +

    r x r x K array of prior covariance matrices + on the regression coefficients

    +

    .

    +
    intercept
    +

    r-vector containing posterior mean estimate of the + intercept, if X_colmeans and Y_colmeans are provided. + Otherwise, NA is output.

    + + +
    ELBO
    +

    Evidence Lower Bound (ELBO) at last iteration.

    + + +
    progress
    +

    A data frame including information regarding + convergence criteria at each iteration.

    + + +
    converged
    +

    TRUE or FALSE, indicating whether + the optimization algorithm converged to a solution within the chosen tolerance + level.

    + + +
    Y
    +

    n x r matrix of responses at last iteration (only relevant when missing values + are present in the input Y).

    + +
    + +
    +

    Examples

    +
    ###Set seed
    +set.seed(123)
    +
    +###Simulate X and Y
    +##Set parameters
    +n  <- 1000
    +p <- 100
    +p_causal <- 20
    +r <- 5
    +
    +###Simulate data
    +out <- simulate_mr_mash_data(n, p, p_causal, r, pve=0.5, B_cor=1,
    +                             B_scale=1, X_cor=0, X_scale=1, V_cor=0)
    +#> Error in simulate_mr_mash_data(n, p, p_causal, r, pve = 0.5, B_cor = 1,     B_scale = 1, X_cor = 0, X_scale = 1, V_cor = 0): seed argument must be provided.
    +
    +###Split the data in training and test sets
    +Ytrain <- out$Y[-c(1:200), ]
    +#> Error in eval(expr, envir, enclos): object 'out' not found
    +Xtrain <- out$X[-c(1:200), ]
    +#> Error in eval(expr, envir, enclos): object 'out' not found
    +Ytest <- out$Y[c(1:200), ]
    +#> Error in eval(expr, envir, enclos): object 'out' not found
    +Xtest <- out$X[c(1:200), ]
    +#> Error in eval(expr, envir, enclos): object 'out' not found
    +
    +###Specify the covariance matrices for the mixture-of-normals prior.
    +univ_sumstats <- compute_univariate_sumstats(Xtrain, Ytrain,
    +                   standardize=TRUE, standardize.response=FALSE)
    +#> Error in ncol(Y): object 'Ytrain' not found
    +grid <- autoselect.mixsd(univ_sumstats, mult=sqrt(2))^2
    +#> Error in autoselect.mixsd(univ_sumstats, mult = sqrt(2)): object 'univ_sumstats' not found
    +S0 <- compute_canonical_covs(ncol(Ytrain), singletons=TRUE,
    +                             hetgrid=c(0, 0.25, 0.5, 0.75, 1))
    +#> Error in ncol(Ytrain): object 'Ytrain' not found
    +S0 <- expand_covs(S0, grid, zeromat=TRUE)
    +#> Error in ncol(mats[[1]]): object 'S0' not found
    +
    +###Fit mr.mash
    +covY <- cov(Ytrain)
    +#> Error in is.data.frame(x): object 'Ytrain' not found
    +corX <- cor(Xtrain)
    +#> Error in is.data.frame(x): object 'Xtrain' not found
    +n_train <- nrow(Ytrain)
    +#> Error in nrow(Ytrain): object 'Ytrain' not found
    +fit <- mr.mash.rss(Bhat=univ_sumstats$Bhat, Shat=univ_sumstats$Shat, S0=S0, 
    +                   covY=covY, R=corX, n=n_train, V=covY, update_V=TRUE,
    +                   X_colmeans=colMeans(Xtrain), Y_colmeans=colMeans(Ytrain))
    +#> Processing the inputs... 
    +#> Error in mr.mash.rss(Bhat = univ_sumstats$Bhat, Shat = univ_sumstats$Shat,     S0 = S0, covY = covY, R = corX, n = n_train, V = covY, update_V = TRUE,     X_colmeans = colMeans(Xtrain), Y_colmeans = colMeans(Ytrain)): object 'univ_sumstats' not found
    +
    +# Predict the multivariate outcomes in the test set using the fitted model.
    +Ytest_est <- predict(fit,Xtest)
    +#> Error in predict(fit, Xtest): object 'fit' not found
    +plot(Ytest_est,Ytest,pch = 20,col = "darkblue",xlab = "true",
    +     ylab = "predicted")
    +#> Error in plot(Ytest_est, Ytest, pch = 20, col = "darkblue", xlab = "true",     ylab = "predicted"): object 'Ytest_est' not found
    +abline(a = 0,b = 1,col = "magenta",lty = "dotted")
    +#> Error in int_abline(a = a, b = b, h = h, v = v, untf = untf, ...): plot.new has not been called yet
    +
    +
    +
    +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/predict.mr.mash.html b/docs/reference/predict.mr.mash.html index a1c08cb..6bba585 100644 --- a/docs/reference/predict.mr.mash.html +++ b/docs/reference/predict.mr.mash.html @@ -17,7 +17,7 @@ mr.mash.alpha - 0.2-21 + 0.3-12 @@ -50,22 +50,29 @@

    Predict future observations from mr.mash fit.

    -
    # S3 method for mr.mash
    -predict(object, newx, ...)
    +
    # S3 method for mr.mash
    +predict(object, newx, ...)

    Arguments

    object

    a mr.mash fit.

    + +
    newx

    a new value for X at which to do predictions.

    + +
    ...

    Additional arguments (not used).

    +

    Value

    -

    Matrix of predicted values.

    + + +

    Matrix of predicted values.

    @@ -76,11 +83,11 @@

    Value

    -

    Site built with pkgdown 2.0.2.

    +

    Site built with pkgdown 2.0.7.

    diff --git a/docs/reference/predict.mr.mash.rss.html b/docs/reference/predict.mr.mash.rss.html new file mode 100644 index 0000000..99530c5 --- /dev/null +++ b/docs/reference/predict.mr.mash.rss.html @@ -0,0 +1,101 @@ + +Predict future observations from mr.mash.rss fit. — predict.mr.mash.rss • mr.mash.alpha + + +
    +
    + + + +
    +
    + + +
    +

    Predict future observations from mr.mash.rss fit.

    +
    + +
    +
    # S3 method for mr.mash.rss
    +predict(object, newx, ...)
    +
    + +
    +

    Arguments

    +
    object
    +

    a mr.mash.rss fit.

    + + +
    newx
    +

    a new value for X at which to do predictions.

    + + +
    ...
    +

    Additional arguments (not used).

    + +
    +
    +

    Value

    + + +

    Matrix of predicted values.

    +
    + +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/simulate_mr_mash_data.html b/docs/reference/simulate_mr_mash_data.html index d0c7bfb..6330005 100644 --- a/docs/reference/simulate_mr_mash_data.html +++ b/docs/reference/simulate_mr_mash_data.html @@ -18,7 +18,7 @@ mr.mash.alpha - 0.2-21 + 0.3-12 @@ -52,93 +52,153 @@

    Simulate data to test mr.mash.

    -
    simulate_mr_mash_data(
    -  n,
    -  p,
    -  p_causal,
    -  r,
    -  r_causal = list(1:r),
    -  intercepts = rep(1, r),
    -  pve = 0.2,
    -  B_cor = 1,
    -  B_scale = 1,
    -  w = 1,
    -  X_cor = 0,
    -  X_scale = 1,
    -  V_cor = 0
    -)
    +
    simulate_mr_mash_data(
    +  n,
    +  p,
    +  p_causal,
    +  r,
    +  r_causal = list(1:r),
    +  intercepts = rep(1, r),
    +  pve = 0.2,
    +  B_cor = 1,
    +  B_scale = 1,
    +  w = 1,
    +  X_cor = 0,
    +  X_scale = 1,
    +  V_cor = 0,
    +  seed = NULL,
    +  e = 1e-08
    +)

    Arguments

    n

    scalar indicating the number of samples.

    + +
    p

    scalar indicating the number of variables.

    + +
    p_causal

    scalar indicating the number of causal variables.

    + +
    r

    scalar indicating the number of responses.

    + +
    r_causal

    a list of numeric vectors (one element for each mixture component) indicating in which responses the causal variables have an effect.

    + +
    intercepts

    numeric vector of intercept for each response.

    + +
    pve

    per-response proportion of variance explained by the causal variables.

    + +
    B_cor

    scalar or numeric vector (one element for each mixture component) with positive correlation [0, 1] between causal effects.

    + +
    B_scale

    scalar or numeric vector (one element for each mixture component) with the diagonal value for Sigma_k;

    + +
    w

    scalar or numeric vector (one element for each mixture component) with mixture proportions associated to each mixture component.

    + +
    X_cor

    scalar indicating the positive correlation [0, 1] between variables.

    + +
    X_scale

    scalar indicating the diagonal value for Gamma.

    + +
    V_cor

    scalar indicating the positive correlation [0, 1] between residuals

    + + +
    seed
    +

    seed for random number generation used by Rfast::rmvnorm. +and Rfast::Rnorm. However, some computations will also need a general +set.seed() to be reproducible.

    + + +
    e
    +

    A small number to add to the diagonal elements of the +covariance matrices to make them positive definite.

    +

    Value

    -

    A list with some or all of the + + +

    A list with some or all of the following elements:

    X

    n x p matrix of variables.

    + +
    Y

    n x r matrix of responses.

    + +
    B

    p x r matrix of effects.

    + +
    V

    r x r residual covariance matrix among responses.

    + +
    Sigma

    list of r x r covariance matrices among the effects.

    + +
    Gamma

    p x p covariance matrix among the variables.

    + +
    intercepts

    r-vector of intercept for each response.

    + +
    causal_responses

    a list of numeric vectors of indexes indicating which responses have causal effects for each mixture component.

    + +
    causal_variables

    p_causal-vector of indexes indicating which variables are causal.

    + +
    causal_vars_to_mixture_comps

    p_causal-vector of indexes indicating from which mixture components each causal effect comes.

    +

    Examples

    -
    dat <- simulate_mr_mash_data(n=50, p=40, p_causal=20, r=5,
    -                             r_causal=list(1:2, 3:4), intercepts=rep(1, 5),
    -                             pve=0.2, B_cor=c(0, 1), B_scale=c(0.5, 1),
    -                             w=c(0.5, 0.5), X_cor=0.5, X_scale=1, 
    -                             V_cor=0)
    -                             
    -                             
    +    
    set.seed(1)
    +dat <- simulate_mr_mash_data(n=50, p=40, p_causal=20, r=5,
    +                             r_causal=list(1:2, 3:4), intercepts=rep(1, 5),
    +                             pve=0.2, B_cor=c(0, 1), B_scale=c(0.5, 1),
    +                             w=c(0.5, 0.5), X_cor=0.5, X_scale=1, 
    +                             V_cor=0, seed=1)
    +                             
    +                             
     
    @@ -149,11 +209,11 @@

    Examples

    -

    Site built with pkgdown 2.0.2.

    +

    Site built with pkgdown 2.0.7.

    diff --git a/docs/sitemap.xml b/docs/sitemap.xml index 7078026..62f13df 100644 --- a/docs/sitemap.xml +++ b/docs/sitemap.xml @@ -18,6 +18,9 @@ /reference/coef.mr.mash.html + + /reference/coef.mr.mash.rss.html + /reference/compute_canonical_covs.html @@ -36,9 +39,15 @@ /reference/mr.mash.html + + /reference/mr.mash.rss.html + /reference/predict.mr.mash.html + + /reference/predict.mr.mash.rss.html + /reference/simulate_mr_mash_data.html From b012f8fde5653f27581e806b3161413ea86f9018 Mon Sep 17 00:00:00 2001 From: fmorgante Date: Mon, 17 Apr 2023 14:34:52 -0400 Subject: [PATCH 079/103] Fix bug --- R/mr_mash_rss.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/mr_mash_rss.R b/R/mr_mash_rss.R index beb4274..14e71fa 100644 --- a/R/mr_mash_rss.R +++ b/R/mr_mash_rss.R @@ -263,7 +263,7 @@ mr.mash.rss <- function(Bhat, Shat, Z, R, covY, n, S0, w0=rep(1/(length(S0)), le stop("Elements of w0 must sum to 1.") if(length(S0)!=length(w0)) stop("S0 and w0 must have the same length.") - if(!missing(mu1_init) && !is.matrix(mu1_init)) + if(!is.null(mu1_init) && !is.matrix(mu1_init)) stop("mu1_init must be a matrix.") if(convergence_criterion=="ELBO" && !compute_ELBO) stop("ELBO needs to be computed with convergence_criterion=\"ELBO\".") From 363c4288a7916bb874364796aec52bee12270949 Mon Sep 17 00:00:00 2001 From: fmorgante Date: Mon, 17 Apr 2023 14:35:28 -0400 Subject: [PATCH 080/103] Bump up version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 90d8fa0..ce74226 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Encoding: UTF-8 Type: Package Package: mr.mash.alpha -Version: 0.3-12 -Date: 2023-03-31 +Version: 0.3-13 +Date: 2023-04-17 Title: Multiple Regression with Multivariate Adaptive Shrinkage Description: Provides an implementation of methods for multivariate multiple regression with adaptive shrinkage priors. From e667df9061a4df9cd48a0692d7ad3bcf7691a452 Mon Sep 17 00:00:00 2001 From: Fabio Morgante Date: Thu, 27 Apr 2023 13:31:35 -0400 Subject: [PATCH 081/103] Fix example --- R/mr_mash.R | 3 ++- R/mr_mash_rss.R | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/R/mr_mash.R b/R/mr_mash.R index 7c503e6..9a99f6c 100644 --- a/R/mr_mash.R +++ b/R/mr_mash.R @@ -125,7 +125,8 @@ #' #' ###Simulate data #' out <- simulate_mr_mash_data(n, p, p_causal, r, pve=0.5, B_cor=1, -#' B_scale=1, X_cor=0, X_scale=1, V_cor=0) +#' B_scale=1, X_cor=0, X_scale=1, V_cor=0, +#' seed=123) #' #' ###Split the data in training and test sets #' Ytrain <- out$Y[-c(1:200), ] diff --git a/R/mr_mash_rss.R b/R/mr_mash_rss.R index beb4274..4208634 100644 --- a/R/mr_mash_rss.R +++ b/R/mr_mash_rss.R @@ -140,7 +140,8 @@ #' #' ###Simulate data #' out <- simulate_mr_mash_data(n, p, p_causal, r, pve=0.5, B_cor=1, -#' B_scale=1, X_cor=0, X_scale=1, V_cor=0) +#' B_scale=1, X_cor=0, X_scale=1, V_cor=0, +#' seed=123) #' #' ###Split the data in training and test sets #' Ytrain <- out$Y[-c(1:200), ] From ab6660b4dbb3b318637a4a7b0568e5cf6c86cdf9 Mon Sep 17 00:00:00 2001 From: Fabio Morgante Date: Thu, 27 Apr 2023 13:33:10 -0400 Subject: [PATCH 082/103] Run devtools::document() --- man/mr.mash.Rd | 3 ++- man/mr.mash.rss.Rd | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/man/mr.mash.Rd b/man/mr.mash.Rd index 8d6cb59..1ea14bb 100644 --- a/man/mr.mash.Rd +++ b/man/mr.mash.Rd @@ -156,7 +156,8 @@ r <- 5 ###Simulate data out <- simulate_mr_mash_data(n, p, p_causal, r, pve=0.5, B_cor=1, - B_scale=1, X_cor=0, X_scale=1, V_cor=0) + B_scale=1, X_cor=0, X_scale=1, V_cor=0, + seed=123) ###Split the data in training and test sets Ytrain <- out$Y[-c(1:200), ] diff --git a/man/mr.mash.rss.Rd b/man/mr.mash.rss.Rd index 496139a..231cfe8 100644 --- a/man/mr.mash.rss.Rd +++ b/man/mr.mash.rss.Rd @@ -179,7 +179,8 @@ r <- 5 ###Simulate data out <- simulate_mr_mash_data(n, p, p_causal, r, pve=0.5, B_cor=1, - B_scale=1, X_cor=0, X_scale=1, V_cor=0) + B_scale=1, X_cor=0, X_scale=1, V_cor=0, + seed=123) ###Split the data in training and test sets Ytrain <- out$Y[-c(1:200), ] From 614497d2b65399a5546e9922bc8bd71b4191e807 Mon Sep 17 00:00:00 2001 From: Fabio Morgante Date: Thu, 27 Apr 2023 13:33:44 -0400 Subject: [PATCH 083/103] Bump up version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index ce74226..1a5def7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Encoding: UTF-8 Type: Package Package: mr.mash.alpha -Version: 0.3-13 -Date: 2023-04-17 +Version: 0.3-14 +Date: 2023-04-27 Title: Multiple Regression with Multivariate Adaptive Shrinkage Description: Provides an implementation of methods for multivariate multiple regression with adaptive shrinkage priors. From 9c0445b183ed3a0b7a6b254e6ed55195d84ef176 Mon Sep 17 00:00:00 2001 From: Fabio Morgante Date: Fri, 27 Oct 2023 13:00:09 -0400 Subject: [PATCH 084/103] Use updated flashier interface --- DESCRIPTION | 2 +- R/misc.R | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 1a5def7..14ffbe3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -23,7 +23,7 @@ Imports: matrixStats, mashr (>= 0.2.41), ebnm, - flashier, + flashier (>= 1.0.7), parallel, Rfast Suggests: diff --git a/R/misc.R b/R/misc.R index 1bb098a..0fb3594 100644 --- a/R/misc.R +++ b/R/misc.R @@ -276,18 +276,18 @@ scale_fast2 <- function(M, scale=TRUE, na.rm=TRUE){ compute_cov_flash <- function(Y, error_cache = NULL){ covar <- diag(ncol(Y)) tryCatch({ - fl <- flash(Y, var.type = 2, - ebnm.fn = c(ebnm_normal,ebnm_normal_scale_mixture), + fl <- flash(Y, var_type = 2, + ebnm_fn = c(ebnm_normal,ebnm_normal_scale_mixture), backfit = TRUE,verbose = 0) - if (fl$n.factors == 0) { - covar <- diag(fl$residuals.sd^2) + if (fl$n_factors == 0) { + covar <- diag(fl$residuals_sd^2) } else { - fsd <- sapply(fl$L.ghat,"[[","sd") - covar <- diag(fl$residuals.sd^2) + crossprod(t(fl$F.pm) * fsd) + fsd <- sapply(fl$L_ghat,"[[","sd") + covar <- diag(fl$residuals_sd^2) + crossprod(t(fl$F_pm) * fsd) } if (nrow(covar) == 0) { - covar <- diag(ncol(Y)) - stop("Computed covariance matrix has zero rows") + covar <- diag(ncol(Y)) + stop("Computed covariance matrix has zero rows") } }, error = function(e) { if (!is.null(error_cache)) { From e89af0420d77406f675b1b6e03e8cbe955484ca7 Mon Sep 17 00:00:00 2001 From: Fabio Morgante Date: Fri, 27 Oct 2023 13:03:45 -0400 Subject: [PATCH 085/103] Bump up version --- DESCRIPTION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 14ffbe3..fa559be 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Encoding: UTF-8 Type: Package Package: mr.mash.alpha -Version: 0.3-14 -Date: 2023-04-27 +Version: 0.3-15 +Date: 2023-10-27 Title: Multiple Regression with Multivariate Adaptive Shrinkage Description: Provides an implementation of methods for multivariate multiple regression with adaptive shrinkage priors. From 77b5ac5f6913c3d64c1768eb62f818a5f7079042 Mon Sep 17 00:00:00 2001 From: fmorgante Date: Wed, 15 Nov 2023 09:22:13 -0500 Subject: [PATCH 086/103] Fix bug --- DESCRIPTION | 4 ++-- R/simulate_demo_data.R | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index fa559be..841ba58 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Encoding: UTF-8 Type: Package Package: mr.mash.alpha -Version: 0.3-15 -Date: 2023-10-27 +Version: 0.3-16 +Date: 2023-11-15 Title: Multiple Regression with Multivariate Adaptive Shrinkage Description: Provides an implementation of methods for multivariate multiple regression with adaptive shrinkage priors. diff --git a/R/simulate_demo_data.R b/R/simulate_demo_data.R index 539b34a..6302271 100644 --- a/R/simulate_demo_data.R +++ b/R/simulate_demo_data.R @@ -138,7 +138,7 @@ simulate_mr_mash_data <- function(n, p, p_causal, r, r_causal=list(1:r), interce Gamma <- makePD(Gamma, e) X <- rmvnorm(n=n, mu=rep(0, p), sigma=Gamma, seed) } else { - X <- sapply(seed:(seed+(p-1)), sample_norm, n=n, m=0, s2=sqrt(X_scale)) + X <- sapply(seed:(seed+(p-1)), sample_norm, n=n, m=0, s2=X_scale) } X <- scale_fast2(X, scale=FALSE)$M From 78b3970975642e1c96cf727ff4d5f7aea9d70916 Mon Sep 17 00:00:00 2001 From: fmorgante Date: Wed, 15 Nov 2023 09:50:18 -0500 Subject: [PATCH 087/103] Minor improvement --- DESCRIPTION | 2 +- R/simulate_demo_data.R | 11 +++++------ man/simulate_mr_mash_data.Rd | 7 +------ 3 files changed, 7 insertions(+), 13 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 841ba58..e879a97 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Encoding: UTF-8 Type: Package Package: mr.mash.alpha -Version: 0.3-16 +Version: 0.3-17 Date: 2023-11-15 Title: Multiple Regression with Multivariate Adaptive Shrinkage Description: Provides an implementation of methods for multivariate diff --git a/R/simulate_demo_data.R b/R/simulate_demo_data.R index 6302271..ca7c0bd 100644 --- a/R/simulate_demo_data.R +++ b/R/simulate_demo_data.R @@ -33,10 +33,6 @@ #' #' @param V_cor scalar indicating the positive correlation [0, 1] between residuals #' -#' @param seed seed for random number generation used by \code{Rfast::rmvnorm}. -#' and \code{Rfast::Rnorm}. However, some computations will also need a general -#' \code{set.seed()} to be reproducible. -#' #' @param e A small number to add to the diagonal elements of the #' covariance matrices to make them positive definite. #' @@ -78,12 +74,12 @@ #' r_causal=list(1:2, 3:4), intercepts=rep(1, 5), #' pve=0.2, B_cor=c(0, 1), B_scale=c(0.5, 1), #' w=c(0.5, 0.5), X_cor=0.5, X_scale=1, -#' V_cor=0, seed=1) +#' V_cor=0) #' #' simulate_mr_mash_data <- function(n, p, p_causal, r, r_causal=list(1:r), intercepts=rep(1, r), pve=0.2, B_cor=1, B_scale=1, w=1, - X_cor=0, X_scale=1, V_cor=0, seed=NULL, e=1e-8){ + X_cor=0, X_scale=1, V_cor=0, e=1e-8){ ##Check that the inputs are correct if(length(intercepts)!=r) stop("intercepts must be of length equal to r.") @@ -101,6 +97,9 @@ simulate_mr_mash_data <- function(n, p, p_causal, r, r_causal=list(1:r), interce ##Get number of mixture components K <- length(w) + ##Sample seed + seed <- sample.int(.Machine$integer.max, 1) + ##Simulate true effects from N_r(0, Sigma) or \sum_K w_k N_r(0, Sigma_k) where Sigma and Sigma_k are given ##covariance matrices across traits and w_k is the mixture proportion associated to Sigma_k Sigma <- vector("list", K) diff --git a/man/simulate_mr_mash_data.Rd b/man/simulate_mr_mash_data.Rd index 8ac16f0..357dbd9 100644 --- a/man/simulate_mr_mash_data.Rd +++ b/man/simulate_mr_mash_data.Rd @@ -18,7 +18,6 @@ simulate_mr_mash_data( X_cor = 0, X_scale = 1, V_cor = 0, - seed = NULL, e = 1e-08 ) } @@ -53,10 +52,6 @@ proportions associated to each mixture component.} \item{V_cor}{scalar indicating the positive correlation [0, 1] between residuals} -\item{seed}{seed for random number generation used by \code{Rfast::rmvnorm}. -and \code{Rfast::Rnorm}. However, some computations will also need a general -\code{set.seed()} to be reproducible.} - \item{e}{A small number to add to the diagonal elements of the covariance matrices to make them positive definite.} } @@ -96,7 +91,7 @@ dat <- simulate_mr_mash_data(n=50, p=40, p_causal=20, r=5, r_causal=list(1:2, 3:4), intercepts=rep(1, 5), pve=0.2, B_cor=c(0, 1), B_scale=c(0.5, 1), w=c(0.5, 0.5), X_cor=0.5, X_scale=1, - V_cor=0, seed=1) + V_cor=0) } From f59ca6ca848ae4155dfc0dd08c4fe5b3dc2ed59e Mon Sep 17 00:00:00 2001 From: Fabio Morgante Date: Thu, 11 Jan 2024 13:11:23 -0500 Subject: [PATCH 088/103] Add tests for diagonal update of V --- tests/testthat/test_mr.mash_vs_mr.mash.rss.R | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/testthat/test_mr.mash_vs_mr.mash.rss.R b/tests/testthat/test_mr.mash_vs_mr.mash.rss.R index 80a3173..30aa9c1 100644 --- a/tests/testthat/test_mr.mash_vs_mr.mash.rss.R +++ b/tests/testthat/test_mr.mash_vs_mr.mash.rss.R @@ -53,6 +53,12 @@ test_that("mr.mash and mr.mash.rss return the same results", { standardize=FALSE, verbose=FALSE, update_V=TRUE) fit_V$progress <- fit_V$fitted <- fit_V$pve <- fit_V$G <- NULL + fit_V_diag <- mr.mash(X, Y, S0mix, w0, V_est, update_w0=TRUE, + update_w0_method="EM", compute_ELBO=TRUE, + standardize=FALSE, verbose=FALSE, update_V=TRUE, + update_V_method="diagonal") + fit_V_diag$progress <- fit_V_diag$fitted <- fit_V_diag$pve <- fit_V_diag$G <- NULL + fit_scaled_V <- mr.mash(X, Y, S0mix, w0, V_est, update_w0=TRUE, update_w0_method="EM", compute_ELBO=TRUE, standardize=TRUE, verbose=FALSE, update_V=TRUE) @@ -88,6 +94,12 @@ test_that("mr.mash and mr.mash.rss return the same results", { X_colmeans=X_colMeans, Y_colmeans=Y_colMeans) fit_V_rss$progress <- NULL + fit_V_diag_rss <- mr.mash.rss(Bhat=out$Bhat, Shat=out$Shat, covY=V_est, R=R, n=n, S0=S0mix, + w0=w0, V=V_est, update_w0=TRUE, compute_ELBO=TRUE, + standardize=FALSE, verbose=FALSE, update_V=TRUE, + X_colmeans=X_colMeans, Y_colmeans=Y_colMeans, + update_V_method="diagonal") + fit_V_diag_rss$progress <- NULL fit_scaled_V_rss <- mr.mash.rss(Bhat=out$Bhat, Shat=out$Shat, covY=V_est, R=R, n=n, S0=S0mix, w0=w0, V=V_est, update_w0=TRUE, compute_ELBO=TRUE, @@ -110,6 +122,7 @@ test_that("mr.mash and mr.mash.rss return the same results", { expect_equal(unclass(fit), unclass(fit_rss), tolerance=1e-10, scale=1) expect_equal(unclass(fit_scaled), unclass(fit_scaled_rss), tolerance=1e-10, scale=1) expect_equal(unclass(fit_V), unclass(fit_V_rss), tolerance=1e-10, scale=1) + expect_equal(unclass(fit_V_diag), unclass(fit_V_diag_rss), tolerance=1e-10, scale=1) expect_equal(unclass(fit_scaled_V), unclass(fit_scaled_V_rss), tolerance=1e-10, scale=1) expect_equal(unclass(fit_scaled_V_declogBF), unclass(fit_scaled_V_declogBF_rss), tolerance=1e-10, scale=1) }) From 0cd4d61c6f5894900c4caa5acab5313c86144d8d Mon Sep 17 00:00:00 2001 From: fmorgante Date: Wed, 24 Apr 2024 15:21:24 -0400 Subject: [PATCH 089/103] Fix merge conflicts --- DESCRIPTION | 1 + R/mr_mash.R | 3 +- R/mr_mash_rss.R | 3 +- R/simulate_demo_data.R | 2 - docs/articles/index.html | 4 +- docs/articles/mr_mash_intro.html | 42 +++++++++--------- .../figure-html/plot-coefs-1.png | Bin 38567 -> 36946 bytes .../figure-html/plot-pred-test-1.png | Bin 94564 -> 79048 bytes docs/reference/coef.mr.mash.rss.html | 7 ++- docs/reference/mr.mash.rss.html | 39 +++++++++------- docs/reference/predict.mr.mash.rss.html | 7 ++- docs/sitemap.xml | 3 ++ man/mr.mash.Rd | 5 +-- man/mr.mash.rss.Rd | 3 +- 14 files changed, 65 insertions(+), 54 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index f5fd787..90ca4bf 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -37,3 +37,4 @@ LinkingTo: Rcpp, RcppArmadillo (>= 0.10.4.0.0), RcppParallel +RoxygenNote: 7.2.3 diff --git a/R/mr_mash.R b/R/mr_mash.R index 9a99f6c..7c503e6 100644 --- a/R/mr_mash.R +++ b/R/mr_mash.R @@ -125,8 +125,7 @@ #' #' ###Simulate data #' out <- simulate_mr_mash_data(n, p, p_causal, r, pve=0.5, B_cor=1, -#' B_scale=1, X_cor=0, X_scale=1, V_cor=0, -#' seed=123) +#' B_scale=1, X_cor=0, X_scale=1, V_cor=0) #' #' ###Split the data in training and test sets #' Ytrain <- out$Y[-c(1:200), ] diff --git a/R/mr_mash_rss.R b/R/mr_mash_rss.R index 0e9b09c..14e71fa 100644 --- a/R/mr_mash_rss.R +++ b/R/mr_mash_rss.R @@ -140,8 +140,7 @@ #' #' ###Simulate data #' out <- simulate_mr_mash_data(n, p, p_causal, r, pve=0.5, B_cor=1, -#' B_scale=1, X_cor=0, X_scale=1, V_cor=0, -#' seed=123) +#' B_scale=1, X_cor=0, X_scale=1, V_cor=0) #' #' ###Split the data in training and test sets #' Ytrain <- out$Y[-c(1:200), ] diff --git a/R/simulate_demo_data.R b/R/simulate_demo_data.R index ca7c0bd..9e767d5 100644 --- a/R/simulate_demo_data.R +++ b/R/simulate_demo_data.R @@ -91,8 +91,6 @@ simulate_mr_mash_data <- function(n, p, p_causal, r, r_causal=list(1:r), interce stop("Elements of w must sum to 1.") if(length(pve)!=1 & length(pve)!=r) stop("pve must be of length equal to 1 or r.") - if(is.null(seed)) - stop("seed argument must be provided.") ##Get number of mixture components K <- length(w) diff --git a/docs/articles/index.html b/docs/articles/index.html index ad70b11..21a12cd 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -17,7 +17,7 @@ mr.mash.alpha - 0.2-27 + 0.2.32 @@ -58,7 +58,7 @@

    All vignettes

    diff --git a/docs/reference/mr.mash.rss.html b/docs/reference/mr.mash.rss.html index 129b924..270402c 100644 --- a/docs/reference/mr.mash.rss.html +++ b/docs/reference/mr.mash.rss.html @@ -20,7 +20,7 @@ mr.mash.alpha - 0.3-12 + 0.2.32 @@ -28,6 +28,9 @@