Merge branch 'devel'

MarioniLab · May 10, 2024 · f92a2bb · f92a2bb
2 parents 67defb6 + f1bb7f0
commit f92a2bb
Show file tree

Hide file tree

Showing 24 changed files with 171 additions and 2,399 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: miloR
 Type: Package
 Title: Differential neighbourhood abundance testing on a graph
-Version: 1.99.9
+Version: 1.99.16
 Authors@R:
     c(person("Mike", "Morgan", role=c("aut", "cre"), email="[email protected]",
      comment=c(ORCID="0000-0003-0757-0711")),
@@ -15,8 +15,7 @@ URL: https://marionilab.github.io/miloR
 BugReports: https://github.com/MarioniLab/miloR/issues
 biocViews: SingleCell, MultipleComparison, FunctionalGenomics, Software
 LinkingTo: Rcpp,
-           RcppArmadillo,
-           RcppEigen
+           RcppArmadillo
 Depends: R (>= 4.0.0),
          edgeR
 Imports: BiocNeighbors,
@@ -69,7 +68,7 @@ Suggests:
     curl,
     scRNAseq,
     graphics
-RoxygenNote: 7.2.3
+RoxygenNote: 7.3.1
 NeedsCompilation: no
 Packaged: 2020-07-31 14:15:28 UTC; morgan02
 Collate: 

diff --git a/NAMESPACE b/NAMESPACE
@@ -99,6 +99,7 @@ importFrom(Matrix,Matrix)
 importFrom(Matrix,colSums)
 importFrom(Matrix,crossprod)
 importFrom(Matrix,diag)
+importFrom(Matrix,kronecker)
 importFrom(Matrix,rowMeans)
 importFrom(Matrix,rowSums)
 importFrom(Matrix,solve)
@@ -156,7 +157,6 @@ importFrom(igraph,as_ids)
 importFrom(igraph,cluster_louvain)
 importFrom(igraph,components)
 importFrom(igraph,count_triangles)
-importFrom(igraph,graph.adjacency)
 importFrom(igraph,graph_from_adjacency_matrix)
 importFrom(igraph,induced_subgraph)
 importFrom(igraph,is_igraph)

diff --git a/R/buildFromAdjacency.R b/R/buildFromAdjacency.R
@@ -70,11 +70,11 @@ buildFromAdjacency <- function(x, k=NULL, is.binary=NULL, ...){
     if(is.square){
         if(!is.binary){
             bin.x <- as(matrix(as.numeric(x > 0), nrow=nrow(x)), "dgCMatrix")
-            nn.graph <- graph_from_adjacency_matrix(bin.x, mode="undirected",
+            nn.graph <- graph_from_adjacency_matrix(bin.x, mode="max",
                                                     weighted=NULL,
                                                     diag=FALSE)
         } else{
-            nn.graph <- graph_from_adjacency_matrix(x, mode="undirected",
+            nn.graph <- graph_from_adjacency_matrix(x, mode="max",
                                                     weighted=NULL,
                                                     diag=FALSE)
         }

diff --git a/R/buildNhoodGraph.R b/R/buildNhoodGraph.R
@@ -21,7 +21,7 @@
 #'
 #' NULL
 #'
-#' @importFrom igraph graph.adjacency set_vertex_attr vertex.attributes
+#' @importFrom igraph graph_from_adjacency_matrix set_vertex_attr vertex.attributes
 #' @export
 #' @rdname buildNhoodGraph
 buildNhoodGraph <- function(x, overlap=1){
@@ -42,7 +42,7 @@ buildNhoodGraph <- function(x, overlap=1){
     nhoodAdjacency(x) <- nh_intersect_mat
 
     ## Make igraph object
-    ig <- graph.adjacency(nh_intersect_mat, mode="undirected", weighted=TRUE)
+    ig <- graph_from_adjacency_matrix(nh_intersect_mat, mode="undirected", weighted=TRUE)
     nhood_sizes <- colSums(nhoods(x))
     ig <- set_vertex_attr(ig, name = 'size', value = nhood_sizes[vertex.attributes(ig)$name])
     ## Add to nhoodGraph slot in milo object

diff --git a/R/miloR.R b/R/miloR.R
@@ -4,7 +4,7 @@
 #' as representative neighbourhoods on a nearest neighbour graph. Hypothesis testing is performed using a
 #' negative bionomial generalized linear model.
 #'
-#' @docType package
+#' @aliases miloR
 #' @importFrom Rcpp evalCpp
 #' @useDynLib miloR
 #' @name miloR

diff --git a/R/testNhoods.R b/R/testNhoods.R
@@ -526,8 +526,8 @@ testNhoods <- function(x, design, design.df, kinship=NULL,
         rownames(res) <- 1:length(fit)
         colnames(res)[6:(6+length(rand.levels)-1)] <- paste(names(rand.levels), "variance")
     } else {
-
-        fit <- glmQLFit(dge, x.model, robust=robust)
+        # need to use legacy=TRUE to maintain original edgeR behaviour
+        fit <- glmQLFit(dge, x.model, robust=robust, legacy=TRUE)
         if(!is.null(model.contrasts)){
             mod.constrast <- makeContrasts(contrasts=model.contrasts, levels=x.model)
             res <- as.data.frame(topTags(glmQLFTest(fit, contrast=mod.constrast),

diff --git a/data/sim_nbglmm.RData b/data/sim_nbglmm.RData
diff --git a/man/miloR.Rd b/man/miloR.Rd
diff --git a/src/Makevars b/src/Makevars
@@ -1,5 +1,5 @@
 #PKG_CXXFLAGS = $(CXX11STD) -Wall -pedantic
 CXX11STD = CXX11
-PKG_CXXFLAGS = $(CFLAGS) $(CXX11STD)
-PKG_LIBS = $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS)
+PKG_CXXFLAGS = $(CFLAGS) $(CXX11STD) $(SHLIB_OPENMP_CXXFLAGS)
+PKG_LIBS = $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) $(SHLIB_OPENMP_CXXFLAGS)
 ALL_CXXFLAGS = $(R_XTRA_CXXFLAGS) $(PKG_CXXFLAGS) $(CXXPICFLAGS) $(SHLIB_CXXFLAGS) $(CXXFLAGS)
diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
@@ -2,7 +2,6 @@
 // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
 
 #include <RcppArmadillo.h>
-#include <RcppEigen.h>
 #include <Rcpp.h>
 
 using namespace Rcpp;

diff --git a/src/computeMatrices.cpp b/src/computeMatrices.cpp
@@ -3,6 +3,7 @@
 #include<Rcpp.h>
 #include "utils.h"
 // [[Rcpp::depends(RcppArmadillo)]]
+// [[Rcpp::plugins(openmp)]]
 using namespace Rcpp;
 
 arma::vec computeYStar(arma::mat X, arma::vec curr_beta, arma::mat Z, arma::mat Dinv, arma::vec curr_u, arma::vec y,
@@ -96,7 +97,7 @@ arma::mat computeVStar(arma::mat Z, arma::mat G, arma::mat W){
 }
 
 
-arma::mat computePREML (arma::mat Vsinv, arma::mat X){
+arma::mat computePREML (const arma::mat& Vsinv, const arma::mat& X){
     int n = Vsinv.n_cols;
 
     arma::mat P(n, n);

diff --git a/src/computeMatrices.h b/src/computeMatrices.h
@@ -13,7 +13,7 @@ arma::mat computeW (double disp, arma::mat Dinv, std::string vardist);
 arma::mat computeWNB(double disp, arma::mat Dinv);
 arma::mat computeWPoisson(arma::mat Dinv);
 arma::mat computeVStar (arma::mat Z, arma::mat G, arma::mat W);
-arma::mat computePREML (arma::mat Vsinv, arma::mat X);
+arma::mat computePREML (const arma::mat& Vsinv, const arma::mat& X);
 arma::mat initialiseG (Rcpp::List rlevels, arma::vec sigmas);
 arma::mat initialiseG_G (Rcpp::List u_indices, arma::vec sigmas, arma::mat Kin);
 arma::mat invGmat (Rcpp::List rlevels, arma::vec sigmas);

diff --git a/src/fitGeneticPLGlmm.cpp b/src/fitGeneticPLGlmm.cpp
@@ -1,8 +1,6 @@
 #include<RcppArmadillo.h>
-#include<RcppEigen.h>
 #include<string>
 // [[Rcpp::depends(RcppArmadillo)]]
-// [[Rcpp::depends(RcppEigen)]]
 #include "paramEst.h"
 #include "computeMatrices.h"
 #include "invertPseudoVar.h"

diff --git a/src/inference.cpp b/src/inference.cpp
@@ -1,8 +1,6 @@
 #include "inference.h"
 #include<RcppArmadillo.h>
-#include<RcppEigen.h>
 // [[Rcpp::depends(RcppArmadillo)]]
-// [[Rcpp::depends(RcppEigen)]]
 // using namespace Rcpp;
 
 // All functions used for inference

diff --git a/src/paramEst.cpp b/src/paramEst.cpp
@@ -2,10 +2,10 @@
 #include "computeMatrices.h"
 #include "utils.h"
 #include<RcppArmadillo.h>
-#include<RcppEigen.h>
 #include<cmath>
+
 // [[Rcpp::depends(RcppArmadillo)]]
-// [[Rcpp::depends(RcppEigen)]]
+// [[Rcpp::plugins(openmp)]]
 // using namespace Rcpp;
 
 // All functions used in parameter estimation
@@ -43,9 +43,11 @@ arma::mat sigmaInfoREML_arma (const Rcpp::List& pvstari, const arma::mat& P){
 
     // this is a symmetric matrix so only need to fill the upper or
     // lower triangle to make it O(n^2/2) rather than O(n^2)
+    #pragma omp parallel for
     for(int i=0; i < c; i++){
         const arma::mat& _ipP = pvstari[i]; // this is P * \d Var/ \dsigma
 
+        #pragma omp parallel for
         for(int j=i; j < c; j++){
             const arma::mat& P_jp = pvstari[j]; // this is P * \d Var/ \dsigma
             arma::mat a_ij(_ipP * P_jp); // this is the biggest bottleneck - it takes >2s!
@@ -247,9 +249,19 @@ arma::vec estHasemanElstonGenetic(const arma::mat& Z, const arma::mat& PREML,
     unsigned int n = ystar.size();
     unsigned int c = u_indices.size(); // number of variance components
     unsigned long nsq = n * (n + 1)/2; //size of vectorised components using just upper or lower triangle of covariance matrix
-
+    unsigned int i, j;      // Declare loop variables i and j for OpenMP
+    double _ycovij; // Declare temp_value
     arma::mat Ycovar(n, n);
-    Ycovar = PREML * (ystar * ystar.t()) * PREML; // project onto REML P matrix
+
+    // direct computation of Ycovar with OpenMP?
+    #pragma omp parallel for private(i, j, _ycovij)
+    for (int i = 0; i < n; i++) {
+        for (int j = i; j < n; j++) {
+            arma::mat _tmpMat = arma::trans(PREML.row(j)) % (ystar * ystar(j));
+            double _ycovij = arma::dot(PREML.row(i), _tmpMat);
+            Ycovar(i, j) = _ycovij;
+        }
+    }
 
     // select the upper triangular elements, including the diagonal
     arma::uvec upper_indices = trimatu_ind(arma::size(Ycovar));
@@ -279,14 +291,21 @@ arma::vec estHasemanElston(const arma::mat& Z, const arma::mat& PREML, const Rcp
     unsigned int n = ystar.size();
     unsigned int c = u_indices.size(); // number of variance components
     unsigned long nsq = n * (n + 1)/2; //size of vectorised components using just upper or lower triangle of covariance matrix
+    unsigned int i, j;      // Declare loop variables i and j for OpenMP
+    double _ycovij; // Declare temp_value
 
     // sparsify just the multiplication steps.
-    // arma::mat Ycovar(n, n);
     arma::mat Ycovar(n, n);
-    arma::mat YT(ystar * ystar.t());
 
-    Ycovar = PREML * YT * PREML; // project onto REML P matrix
-    // Ycovar = PREML * (ystar * ystar.t()) * PREML; // project onto REML P matrix
+    // direct computation of Ycovar with OpenMP?
+    #pragma omp parallel for private(i, j, _ycovij)
+    for (int i = 0; i < n; i++) {
+        for (int j = i; j < n; j++) {
+            arma::mat _tmpMat = arma::trans(PREML.row(j)) % (ystar * ystar(j));
+            double _ycovij = arma::dot(PREML.row(i), _tmpMat);
+            Ycovar(i, j) = _ycovij;
+        }
+    }
 
     // select the upper triangular elements, including the diagonal
     arma::uvec upper_indices = trimatu_ind(arma::size(Ycovar));
@@ -351,14 +370,22 @@ arma::vec estHasemanElstonConstrained(const arma::mat& Z, const arma::mat& PREML
     unsigned int n = ystar.size();
     unsigned int c = u_indices.size(); // number of variance components
     unsigned long nsq = n * (n + 1)/2; //size of vectorised components using just upper or lower triangle of covariance matrix
+    unsigned int i, j;      // Declare loop variables i and j for OpenMP
+    double _ycovij; // Declare temp_value
 
     // sparsification doesn't seem to help much
-    // arma::mat Ycovar(n, n);
     arma::mat Ycovar(n, n);
-    arma::mat YT(ystar * ystar.t());
 
-    Ycovar = PREML * YT * PREML; // project onto REML P matrix
-    // Ycovar = PREML * (ystar * ystar.t()) * PREML; // project onto REML P matrix
+    // direct computation of Ycovar with OpenMP?
+    #pragma omp parallel for private(i, j, _ycovij)
+    for (int i = 0; i < n; i++) {
+        for (int j = i; j < n; j++) {
+            arma::mat _tmpMat = arma::trans(PREML.row(j)) % (ystar * ystar(j));
+            double _ycovij = arma::dot(PREML.row(i), _tmpMat);
+            Ycovar(i, j) = _ycovij;
+        }
+    }
+
     // select the upper triangular elements, including the diagonal
     arma::uvec upper_indices = trimatu_ind(arma::size(Ycovar));
     arma::vec Ybig = Ycovar(upper_indices);
@@ -454,9 +481,19 @@ arma::vec estHasemanElstonConstrainedGenetic(const arma::mat& Z, const arma::mat
     unsigned int n = ystar.size();
     unsigned int c = u_indices.size(); // number of variance components
     unsigned long nsq = n * (n + 1)/2; //size of vectorised components using just upper or lower triangle of covariance matrix
+    unsigned int i, j;      // Declare loop variables i and j for OpenMP
+    double _ycovij; // Declare temp_value
 
     arma::mat Ycovar(n, n);
-    Ycovar = PREML * (ystar * ystar.t()) * PREML; // project onto REML P matrix
+    // direct computation of Ycovar with OpenMP?
+    #pragma omp parallel for private(i, j, _ycovij)
+    for (int i = 0; i < n; i++) {
+        for (int j = i; j < n; j++) {
+            arma::mat _tmpMat = arma::trans(PREML.row(j)) % (ystar * ystar(j));
+            double _ycovij = arma::dot(PREML.row(i), _tmpMat);
+            Ycovar(i, j) = _ycovij;
+        }
+    }
 
     // select the upper triangular elements, including the diagonal
     arma::uvec upper_indices = trimatu_ind(arma::size(Ycovar));
@@ -648,6 +685,8 @@ arma::mat vectoriseZ(const arma::mat& Z, const Rcpp::List& u_indices, const arma
     int c = u_indices.size();
     int n = Z.n_rows;
     unsigned long nsq = n * (n + 1)/2;
+    int j, k, l, a;
+    double temp_value;
 
     Rcpp::List _Zelements(1);
     arma::mat bigI(n, n, arma::fill::eye); // this should be the identity which we vectorise
@@ -665,10 +704,40 @@ arma::mat vectoriseZ(const arma::mat& Z, const Rcpp::List& u_indices, const arma
         unsigned int qmin = arma::min(u_idx);
         unsigned int qmax = arma::max(u_idx);
         _subZ = Z.cols(qmin-1, qmax-1);
+        arma::mat _subZT = _subZ * _subZ.t();
 
         arma::mat _ZZT(n, n);
-        _ZZT = P * (_subZ * _subZ.t()) * P; // REML projection is v.slow
+        arma::mat _pZZT(n, n);
+        // Can we turn this into a for loop and use OpenMP?
+        for (int j = 0; j < n; j++) {
+            // j = rows of P
+            #pragma omp parallel for reduction(+:temp_value)
+            for (int k = 0; k < n; k++) {
+                // k = columns of P
+                temp_value = 0.0;
+                for(int a=0; a < n; a++){
+                    temp_value += P(j, a) * _subZT(a, k);
+                }
+            _pZZT(j, k) = temp_value; // Apply P again for symmetry
+            }
 
+        }
+
+        // another loop?
+        for (int j = 0; j < n; j++) {
+            // j = rows of P
+            #pragma omp parallel for reduction(+:temp_value)
+            for (int k = 0; k < n; k++) {
+                // k = columns of P
+                temp_value = 0.0;
+                for(int a=0; a < n; a++){
+                    temp_value += _pZZT(j, a) * P(a, k);
+                }
+                _ZZT(j, k) = temp_value; // Apply P again for symmetry
+            }
+
+        }
+        // _ZZT = _pZZT * P; // REML projection is v.slow
         // vectorise
         arma::vec _vecZ = _ZZT(upper_indices);
         arma::mat _vecZZT = _Zelements(0);

diff --git a/src/paramEst.h b/src/paramEst.h
@@ -2,9 +2,8 @@
 #define PARAMEST_H
 
 #include<RcppArmadillo.h>
-#include<RcppEigen.h>
 // [[Rcpp::depends(RcppArmadillo)]]
-// [[Rcpp::depends(RcppEigen)]]
+// [[Rcpp::plugins(openmp)]]
 
 arma::vec sigmaScoreREML_arma (const Rcpp::List& pvstar_i, const arma::vec& ystar,
                                const arma::mat& P, const arma::vec& curr_beta,

diff --git a/src/pseudovarPartial.cpp b/src/pseudovarPartial.cpp
@@ -45,12 +45,29 @@ List pseudovarPartial_P(List V_partial, const arma::mat& P){
     // A Rcpp specific implementation that uses positional indexing rather than character indexes
     // don't be tempted to sparsify this - the overhead of casting is too expensive
     unsigned int items = V_partial.size();
+    unsigned int n = P.n_cols;
     List outlist(items);
+    double temp_value;
 
     for(unsigned int i = 0; i < items; i++){
         // Need to output an S4 object - arma::sp_mat uses implicit interconversion for support dg Matrices
         arma::mat _omat = V_partial(i);
-        arma::mat omat(P * _omat);
+        arma::mat omat(n, n);
+
+        // Can we turn this into a for loop and use OpenMP?
+        for (int j = 0; j < n; j++) {
+            // j = rows of P
+            #pragma omp parallel for reduction(+:temp_value)
+            for (int k = 0; k < n; k++) {
+                // k = columns of P
+                temp_value = 0.0;
+                for(int a=0; a < n; a++){
+                    temp_value += P(j, a) * _omat(a, k);
+                }
+                omat(j, k) = temp_value; // Apply P again for symmetry
+            }
+        }
+
         outlist[i] = omat;
     }
 

diff --git a/tests/testthat/test_buildGraph.R b/tests/testthat/test_buildGraph.R
@@ -97,7 +97,7 @@ test_that("buildGraph can take different inputs", {
     expect_true(igraph::identical_graphs(miloR::graph(buildGraph(sim1.pca$x, transposed=TRUE, k=21)), sim1.graph))
 
     # input are expression values
-    graph.diff <- graph.difference(miloR::graph(buildGraph(sim1.gex, transposed=FALSE, k=21)), sim1.graph)
+    graph.diff <- difference(miloR::graph(buildGraph(sim1.gex, transposed=FALSE, k=21)), sim1.graph)
 
     expect_true(length(E(graph.diff)) == 0)
     expect_true(length(V(graph.diff)) == length(V(sim1.graph)))

diff --git a/tests/testthat/test_buildNhoodGraph.R b/tests/testthat/test_buildNhoodGraph.R
@@ -55,7 +55,7 @@ test_that("Code produced identical output", {
     rownames(nh_intersect_mat) <- colnames(nhoods)
     colnames(nh_intersect_mat) <- colnames(nhoods)
 
-    ig <- graph.adjacency(nh_intersect_mat, mode="undirected", weighted=TRUE)
+    ig <- graph_from_adjacency_matrix(nh_intersect_mat, mode="undirected", weighted=TRUE)
     nhood_sizes <- sapply(nhoods(traj_milo), length)
     ig <- set_vertex_attr(ig, name = 'size', value = nhood_sizes[vertex.attributes(ig)$name])