From fe965d4a3fa26cc9c4b042455a2dc9686ccc2545 Mon Sep 17 00:00:00 2001
From: Jack Leary <j.leary@ufl.edu>
Date: Thu, 30 Nov 2023 13:03:19 -0500
Subject: [PATCH] updated README & minor docs changes

---
 .DS_Store                | Bin 10244 -> 16388 bytes
 R/plotModelCoefs.R       |   2 +-
 R/testDynamic.R          |   4 ++--
 README.Rmd               |  38 ++++++++++----------------------------
 inst/.DS_Store           | Bin 6148 -> 6148 bytes
 inst/rmarkdown/.DS_Store | Bin 6148 -> 6148 bytes
 man/testDynamic.Rd       |   2 +-
 7 files changed, 14 insertions(+), 32 deletions(-)
diff --git a/.DS_Store b/.DS_Store
index 412ed02b166f258bab4560771829b84b56019b65..64e4768f65221a3de2f1ecd35892ca1107c9644b 100644
GIT binary patch
literal 16388
zcmeHOYitzP6~1S8fmx>DF~tzCgLkP#3S=Q(8)Bn8SU*THnA-8$*g%54kHJitomqCq
zHk6V&Bx$P<MfpXOC{g;S4;3}-kA^l?lBQBqwQ1Ez5kett1Zq_&k^2162dz}~+_|&6
zcV=fXZLOL*bETO(bI&>V@tt$#+&goJ5CYv(xdb7dgb)D_noA$SV!VlA-YQ~&ohY3i
zRVj!ifX4(LA(SU1OLS5o3XwB2Sa!3g0Ctcb*mtsw43QY$f1{95^$b#8&J<L|Fk4$M
zK_D1fvUHiSOb~?!Q%Cigl$kcCjP7*t5WO4Lbv0AEPG|Nj^31rjx<}W{w4%vHOF+rV
z$nwCbqUH3NZhcD28J28{5rnW19+w(Vooe5{Js#V#J#ji7JC$fpB;v7-ZHd#T!$L#L
z9d`|lA5-(1eil2238)HaTEVjN`3byQf=7@Tq)4*UsiLOp+9Z}h_{EiFHvhs$z(>GG
zz(?T2hJYP71TNNp*dXx7>m%SJPy+$_e2C#e^Q4{6vfO%f;E7J3DV<KCt%TRO4zS8%
zcAm8JS(aO(C}W9Y&P2gpVgM7z@)%{uJZa~%EN9{bm^cBuWx8;o39QyJkKqj_NLrbH
z@)7V6@Ias$%ff7dh#tqS-xqfM+qUD*c&xpn&07B^Iy>XB?VWA^!TQ%@XzFAlmDTeF
z28QsaHvoCPx7U6FAbj(kqV)O4-{`8vnY%&G!co5ELD~V2{_*Q{<XA#XBBO##Vq|6(
zhf=s9IhE1>m~+6q64qVYaj8lEAyon=pVuqkdviEJpFnY5-UYT;0edleLyh5iD4_-?
zuaHOnQ1*(N+!f`_&F!J68uhxA7ADj9%5jot=qDyq;9~I<kD)o0mDq#!?7_t=>deXu
z&%Xbd{rIYaiua)Fb2=4dKfbE!b1A2;S~-i+v*sA4htlSd;1zgjl~Ia6b4pb_?#87&
z6gx*dE+x~4s-JuWd<1+1d<1+1d<6b)5ZH!24Uw%leO8FcPgCGkwVS8$6Ot3Nlb?Yv
zSToA9EI*I*qVtms`16-2Kp~PNX`Ha9`F2Uu`PX?G-|qcKp4xrrvv#|-EZ?pdoo|;k
z(W=SvQoH<2)T0lce0+c2`^Be96SEq;mFd`AydM1wVn20~<$DGVm(Sn`Yzp6(&}8xH
zv4Vw7nObSRn##8a4Ob~HA+!OFzG_(L<<oraekc!!hmhR<+FSf!c@Up$j5dCE);2I3
z4Bo)J=sb`lE;)d7&6Agx0Ok#Ev4HJ4I1k`G`FJJt>^xTSDq9IWV7x-+;`LCDXZtQi
z1xX&COxi-LX1?Bn+QV@uWA)_;G*z3z!@u*D7tYxa*)p~AAgQK0AYSt=YX7#J3)C<J
zl~?6r0oY?`9$<TL@xUK{uKYsHle40P>vMW2#G^i!azbRE_4>>iiwU`k>MYav-Y!o+
zT8cLuz0yZ7x}#^M?}S4OZ3OcbZGa3plpgHcg5c3oW4ecZO9*Z(Z6<uTw3#rxTohMC
zM)r@4jvW}6q$xullGU`SOv}A#Q#Qt>Is^9>_GNW#uqaRCp2E?ru1@7Oq||5AYIX{l
z<t|lCDW7KC*`lthgSw%Zimn|PGvyN|rO}EUq_KmBUNn2G%ElC|pK-0CZ44Bsu(4k*
z8kmQ1yWrYe*KgQ-$F?2!_I#p$@M4`*zv`o_Me!hRThvVDh$0t9W|W+H^g!Ck%34m*
zCP$|gL&>P}u$Iot^?;RFQnxflB4bk-<@l6ho|IYy!75$X91$PL;XwB3Ia*szdP!jX
zA8U?84pB-^QO0u)1y<Edbz)@Wrm!HThYcJWD-pMB4TmLT_!#c3v`QjkTe}dJawBRw
zBdc~%M<*8T9i3()vkULOM+nzLd<;Je<#9ZQ)u)}wNaQeWxj$`~0|i;bUj5i^J`!oZ
zPY@1JYF92NV|!A4U8FCvSCHNo4jM<(MR{Nb_k5Ph2L!=VB$m_KcI6aPFB4OgC@I>s
zg4;<EZnmpbXhGw#QcH3pyKo@Xl5AzA68&_(j<k|)vWFbT?YAe%S#plNK)y|0A{WRT
z<mcovxk5f5e<FV&*T~<=KOh7vpbjLchX{NeHo<1N9kxLdx}Y2OLO<+-0T_k{;V_It
z8jgSt1vm}{oP;y*2s{d(gD2sO@H9LF&%u}BMR*my53j=y;7#}`{0uI^6}Sq&f?vb$
z;ji#FxE2rts{;*zHGxfm&4Kp7-GRQq5bJvYSWXVwxt#@u@q&4dy$v_;ytn5>clYhy
zv$x}>^_<HqnXfdgSsSAc)Ok;L+02+>mTZx?a?7k8>bA_<@+Fr|UPO;-x>anBj-cJk
zCn^re9J1tw(lTn{g2laFY>7%s&@<TWZQ{nLv=qHUxY!hFyd@fsZWYk~q=3cT8f_D$
zAi7%EqKffo2Ra^Dv>oDIq7*`>vuQiUE^I{$ON7<b8@*3JpOJ(cHs<S^@=wUy<Q<Gi
zzeQ92Cj>!&wGe~bpcU?fdoA<rg<bGT7=-&FWf|}|Oh5**phDg<;v$@a)9?_?!o%=5
zoP{SWLw*LHMMHibz5-u`ufuux23&yez{~JGcm+-SNAMQ>lG$b1T>G8o3cq3I%E!n4
z_kZ`^YY$mV2{dGW&g!H%Vq)kiS~2pwqO-HP`Mkru{_ef+9zo2tJKhgyen9gB+P@vp
zs=de$Zq{ti4{jI8%^loarppJqjw_R;*ELmKmE~Y<E{_1{s>c-!?f;n;df7OCFK4XZ
zvm8-(!#TRI!yYIP_SfD$|ADyPe#>jg@iU8Rssr2&f8_qq!}^!9i9{#e|G%sD|9_0j
zKl=#y2rNnjAk>@cr62oBt8w=z>0bL79us)bMEPdv1tz?vWSU!B@iBM&ER6R_n?B2O
zEWz|Y{{g^X-2NAfTUSKpLsIR?&WEPjT#gXM7Qn97{P<tqU|t>Jd628A4sadu|6gTb
B{yzW!

delta 219
zcmZo^U~CDHU|?WibSh0TWMEJLGC6=4L<{gtEEJolCpp=`hD8*}XAlBnMxZ!DPP$=m
za(-^X=0J(%?3>v+SU4DkCi5uNvS%{nF%&bDOnxZRFnNJe;N(nY{>h4}*CwZ{@iQ|r
z1VP0;K<v#92IkD0m+5R`n!MiZ>EyYBZzppJ^G`k}@@{gB82{z~30W3FW{|Bw$8ZA)
tSCE4?3o?CYp3HA!IoX~^LVyY4Rt=CEpt}qvr`Sx~EN2wS2sCyTBLFn_Gfw~j

diff --git a/R/plotModelCoefs.R b/R/plotModelCoefs.R
index aeb8e36..57a311e 100644
--- a/R/plotModelCoefs.R
+++ b/R/plotModelCoefs.R
@@ -120,7 +120,7 @@ plotModelCoefs <- function(test.dyn.res = NULL,
   }
   # convert coefficient summary to grob
   coef_sumy_grob <- gridExtra::tableGrob(coef_sumy,
-                                         cols = c("Interval", "Coefficient"),
+                                         cols = c("Interval", "Slope"),
                                          theme = gridExtra::ttheme_minimal(base_size = 11,
                                                                            core = list(fg_params = list(hjust = 0, x = 0.05)),
                                                                            colhead = list(fg_params = list(hjust = 0, x = 0.05))),
diff --git a/R/testDynamic.R b/R/testDynamic.R
index 67c1d3d..3792612 100644
--- a/R/testDynamic.R
+++ b/R/testDynamic.R
@@ -2,7 +2,7 @@
 #'
 #' @name testDynamic
 #' @author Jack Leary
-#' @description This function tests whether a NB \code{marge} model is better than a null (intercept-only) NB GLM using the Likelihood Ratio Test. In effect, the test tells us whether a gene's expression changes (in any way) over pseudotime.
+#' @description This function tests whether a NB \code{marge} model is better than a null (intercept-only) model using the Likelihood Ratio Test. In effect, the test tells us whether a gene's expression changes (in any way) over pseudotime.
 #' @import glm2
 #' @import magrittr
 #' @importFrom Matrix t
@@ -381,7 +381,7 @@ testDynamic <- function(expr.mat = NULL,
   total_time <- end_time - start_time
   total_time_units <- attributes(total_time)$units
   total_time_numeric <- as.numeric(total_time)
-  time_message <- paste0("\nscLANE testing completed for ",
+  time_message <- paste0("scLANE testing completed for ",
                          length(genes),
                          " genes across ",
                          n_lineages,
diff --git a/README.Rmd b/README.Rmd
index b5d2ce7..7ffe868 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -17,7 +17,9 @@ knitr::opts_chunk$set(warning = FALSE,
                       comment = "#>",
                       fig.path = "man/figures/README-",
                       out.width = "100%", 
-                      fig.retina = TRUE)
+                      fig.retina = TRUE, 
+                      fig.width = 5, 
+                      fig.height = 3)
 ```
 
 # `scLANE`
@@ -43,18 +45,16 @@ remotes::install_github("jr-leary7/scLANE")
 
 ## Model structure
 
-The `scLANE` package enables users to accurately determine differential expression of genes over pseudotime or latent time, and to characterize gene's dynamics using interpretable model coefficients. `scLANE` builds upon the [`marge` modeling framework](https://github.com/JakubStats/marge), allowing users to characterize their trajectory's effects on mRNA expression using negative binomial [GLMs](https://en.wikipedia.org/wiki/Generalized_linear_model), [GEEs](https://en.wikipedia.org/wiki/Generalized_estimating_equation), or [GLMMs](https://en.wikipedia.org/wiki/Generalized_linear_mixed_model), depending on the experimental design & biological questions of interest. This modeling framework is an extension of the well-known [Multivariate Adapative Regression Splines (MARS)](https://en.wikipedia.org/wiki/Multivariate_adaptive_regression_spline) method, which uses truncated power basis splines to build nonlinear models using a [generalized cross validation (GCV) criterion](https://doi.org/10.48550/arXiv.1706.02495).
+The `scLANE` package enables users to accurately determine differential expression of genes over pseudotime or latent time, and to characterize gene's dynamics using interpretable model coefficients. `scLANE` builds upon the `marge` modeling framework([GitHub](https://github.com/JakubStats/marge), [paper](https://doi.org/10.1080/10618600.2017.1360780)), allowing users to characterize their trajectory's effects on gene expression using negative binomial GLMs, GEEs, or GLMMs depending on the experimental design & biological questions of interest. This modeling framework is an extension of the [Multivariate Adapative Regression Splines (MARS)](https://en.wikipedia.org/wiki/Multivariate_adaptive_regression_spline) method, which builds nonlinear models out of piecewise linear components. `scLANE` is agnostic with respect to the ordering estimation method used, and can be implemented downstream of any pseudotime or RNA velocity method. 
 
 A quickstart guide on how to use `scLANE` with simulated data continues below, and a more detailed vignette showcasing its performance on real data can be found [here](https://jr-leary7.github.io/quarto-site/tutorials/scLANE_Trajectory_DE.html). 
 
 # Usage
 
-Our method relies on a relatively simple test in order to define whether a given gene is differentially expressed (or "dynamic") over the provided trajectory. While the exact structure of the test differs by model backend, the concept is the same: the spline-based NB GLM / GEE / GLMM is treated as the alternate model, and a null model is fit using the corresponding model backend. If the GLM backend is used, then the null model is simply an intercept-only NB GLM; the GEE backend fits an intercept-only model with the same working correlation structure as the alternate model, and if the GLMM backend is used then the null model is an intercept-only model with random intercepts for each subject. The alternate hypothesis is thus that at least one of the estimated coefficients is significantly different from zero. We predict a given gene to be dynamic if the adjusted *p*-value of the test is less than an *a priori* threshold; the default threshold is $\alpha = 0.01$, and the default adjustment method is [the Holm correction](https://en.wikipedia.org/wiki/Holm–Bonferroni_method). 
+Our method relies on a relatively simple test in order to define whether a given gene is differentially expressed (or "dynamic") over the provided trajectory. While the exact structure of the test differs by model mode, the concept is the same: the spline-based NB GLM / GEE / GLMM is treated as the alternate model, and a null model is fit using the corresponding model mode. If the GLM mode is used, then the null model is simply an intercept-only NB GLM; the GEE mode fits an intercept-only model with the same working correlation structure as the alternate model, and if the GLMM mode is used then the null model is an intercept-only model with random intercepts for each subject. The alternate hypothesis is that at least one of the estimated coefficients is significantly different from zero. We predict a given gene to be dynamic if the adjusted *p*-value of the test is less than the default $\alpha = 0.01$ threshold, and classify it as static otherwise. 
 
 ## Libraries 
 
-First we'll need to load a couple packages. 
-
 ```{r libraries, results='hide', message=FALSE}
 library(dplyr)
 library(scLANE)
@@ -119,7 +119,7 @@ scLANE_models_glm <- testDynamic(sim_data,
                                  verbose = FALSE)
 ```
 
-After the function finishes running, we use `getResultsDE()` to generate a sorted table of DE test results, with one row for each gene & lineage. The GLM mode uses a simple likelihood ratio test to compare the null & alternate models, with the test statistic assumed to be [asymptotically Chi-squared distributed](https://en.wikipedia.org/wiki/Likelihood-ratio_test).  
+After the function finishes running, we use `getResultsDE()` to generate a sorted table of DE test results, with one row for each gene & lineage. The GLM mode uses a simple likelihood ratio test to compare the null & alternate models, with the test statistic assumed to be asymptotically Chi-squared distributed.  
 
 ```{r glm-results}
 scLANE_res_glm <- getResultsDE(scLANE_models_glm)
@@ -132,7 +132,7 @@ select(scLANE_res_glm, Gene, Lineage, Test_Stat, P_Val, P_Val_Adj, Gene_Dynamic_
 
 ### GEE mode
 
-The function call is essentially the same when using the GLM mode, with the exception of needing to provide a sorted vector of subject IDs & a desired correlation structure, the default being [the AR1 structure](https://en.wikipedia.org/wiki/Autoregressive_model). We also need to flip the `is.gee` flag in order to indicate that we'd like to fit estimating equations models (instead of mixed models). Since fitting GEEs is  more computationally complex than fitting GLMs, DE testing with the GEE mode takes a bit longer. Using more cores and / or running the tests on an HPC cluster speeds things up considerably. 
+The function call is essentially the same when using the GLM mode, with the exception of needing to provide a sorted vector of subject IDs & a desired correlation structure. We also need to flip the `is.gee` flag in order to indicate that we'd like to fit estimating equations models (instead of mixed models). Since fitting GEEs is  more computationally complex than fitting GLMs, DE testing with the GEE mode takes a bit longer. Using more cores and / or running the tests on an HPC cluster speeds things up considerably. 
 
 ```{r gee-models}
 scLANE_models_gee <- testDynamic(sim_data, 
@@ -146,7 +146,7 @@ scLANE_models_gee <- testDynamic(sim_data,
                                  verbose = FALSE)
 ```
 
-We again generate the table of DE test results. The variance of the estimated coefficients is determined using [the sandwich estimator](https://online.stat.psu.edu/stat504/lesson/12/12.3), and a Wald test is used to compare the null & alternate models. 
+We again generate the table of DE test results. The variance of the estimated coefficients is determined using the sandwich estimator, and a Wald test is used to compare the null & alternate models. 
 
 ```{r gee-results}
 scLANE_res_gee <- getResultsDE(scLANE_models_gee)
@@ -176,7 +176,7 @@ scLANE_models_glmm <- testDynamic(sim_data,
 
 **Note:** The GLMM mode is still under development, as we are working on further reducing runtime and increasing the odds of the underlying optimization process converging successfully. As such, updates will be frequent and functionality / results may shift slightly. 
 
-Like the GLM mode, the GLMM mode uses a likelihood ratio test to compare the null & alternate models. We fit the two nested models using maximum likelihood estimation instead of [REML](https://en.wikipedia.org/wiki/Restricted_maximum_likelihood) in order to perform this test; the null model in this case is a negative binomial GLMM with a random intercept for each subject.  
+Like the GLM mode, the GLMM mode uses a likelihood ratio test to compare the null & alternate models.   
 
 ```{r glmm-results}
 scLANE_res_glmm <- getResultsDE(scLANE_models_glmm)
@@ -191,7 +191,7 @@ select(scLANE_res_glmm, Gene, Lineage, Test_Stat, P_Val, P_Val_Adj, Gene_Dynamic
 
 ### Model comparison
 
-We can use the `plotModels()` to visually compare different types of models. It takes as input the results from `testDynamic()`, as well as a few specifications for which models & lineages should be plotted. While more complex visualizations can be created from our model output, this function gives us a good first glance at which models fit the underlying trend the best. Here we show the output generated using the GLM mode, split by model type. The intercept-only model shows the null hypothesis against which the scLANE model is compared using the likelihood ratio test and the GLM displays the inadequacy of monotonic modeling architectures for nonlinear dynamics. A GAM shows essentially the same trend as the `scLANE` model, though the fitted trend from `scLANE` is more interpretable & has a narrower confidence interval.   
+We can use the `plotModels()` to visually compare different types of models. It takes as input the results from `testDynamic()`, as well as a few specifications for which models & lineages should be plotted. While more complex visualizations can be created from our model output, this function gives us a good first glance at which models fit the underlying trend the best. Here we show the output generated using the GLM mode, split by model type. The intercept-only model shows the null hypothesis against which the scLANE model is compared using the likelihood ratio test and the GLM displays the inadequacy of monotonic modeling architectures for nonlinear dynamics. A GAM shows essentially the same trend as the `scLANE` model, though the fitted trend from `scLANE` is more interpretable.
 
 ```{r plot-models-glm}
 plotModels(scLANE_models_glm, 
@@ -205,22 +205,6 @@ plotModels(scLANE_models_glm,
            plot.scLANE = TRUE)
 ```
 
-Model comparison using the GEE mode is similar, with the only change being that we now provide a vector of subject IDs.
-
-```{r plot-models-gee}
-plotModels(scLANE_models_gee, 
-           gene = scLANE_res_gee$Gene[1], 
-           is.gee = TRUE, 
-           id.vec = sim_data$subject, 
-           pt = order_df, 
-           expr.mat = sim_data, 
-           size.factor.offset = cell_offset, 
-           plot.null = TRUE, 
-           plot.glm = TRUE, 
-           plot.gam = TRUE, 
-           plot.scLANE = TRUE)
-```
-
 When plotting the models generated using the GLMM mode, we split by lineage & color the points by subject ID instead of by lineage. The gene in question highlights the utility of the scLANE model, since the gene dynamics differ significantly by subject. 
 
 ```{r plot-models-glmm, fig.width=9, fig.height=9}
@@ -231,8 +215,6 @@ plotModels(scLANE_models_glmm,
            size.factor.offset = cell_offset, 
            id.vec = sim_data$subject, 
            is.glmm = TRUE, 
-           plot.null = FALSE, 
-           plot.glm = TRUE, 
            plot.gam = TRUE, 
            plot.scLANE = TRUE)
 ```
diff --git a/inst/.DS_Store b/inst/.DS_Store
index f7fcef9fd29c7d49b680444a9d497b6bd130df7a..366ba3e45417a09d735a9326db360dbc37f0ec03 100644
GIT binary patch
delta 345
zcmZoMXfc=|#>B!ku~2NHo+2a9#(>?7ixpUy7<nf1Fnz4gNjD5m&d)7i00Zw0K#G&0
zh#{9D5lCh;q%h<&lr!Yz=DWBg<>V&;WjXY(YYM3q9d*Q|GKEN$1sOn_x#3nfFl>I#
z#LtN8AnnKqkd+K245<voKr#jBtP+MqWals(&~Nl?1Um<dI)r2JsRKI&;u2N{2JHlf
n$&Z-dZf57;=Ku!WW<iec%#-;=961;n7?^<0Vb~lYvW6J|S!PgG

delta 74
zcmZoMXfc=|#>B)qu~2NHo+2a5#(>?7j4YEAS;ZzRvD}_Km-XjlJ@%U$8!VVNvvcrs
d099@l<oM1!nP0?_g8>K_85meLM~JLp1^^S}6E^?=

diff --git a/inst/rmarkdown/.DS_Store b/inst/rmarkdown/.DS_Store
index 271a8005d45ebbf8b9dea66b53656fca55c7497b..72fada99b0e556b7cd70b084e3dc25c8c3c59d68 100644
GIT binary patch
delta 168
zcmZoMXfc@J&&akhU^gQp+hiW5kM%j}hQZ1CxdjYhV7vxMaWa%Jq%!0(6fop4B!bz+
xx%n<GNjdpRKv@ok-<m>dMMoWRsZ1eKWkCkeW^TCE2@IQ`Gc~YmX6N|J4*;J!DAND{

delta 37
tcmZoMXfc@J&&aefU^nAr0}+<VDa^u?m6&dCp39uYGO=O7W_FIh`~c{f48i~a

diff --git a/man/testDynamic.Rd b/man/testDynamic.Rd
index 5299865..e2b21a4 100644
--- a/man/testDynamic.Rd
+++ b/man/testDynamic.Rd
@@ -57,7 +57,7 @@ testDynamic(
 A list of lists, where each element is a gene and each gene contains sublists for each element. Each gene-lineage sublist contains a gene name, lineage number, default \code{marge} vs. null model test results, model statistics, and fitted values. Use \code{\link{getResultsDE}} to tidy the results.
 }
 \description{
-This function tests whether a NB \code{marge} model is better than a null (intercept-only) NB GLM using the Likelihood Ratio Test. In effect, the test tells us whether a gene's expression changes (in any way) over pseudotime.
+This function tests whether a NB \code{marge} model is better than a null (intercept-only) model using the Likelihood Ratio Test. In effect, the test tells us whether a gene's expression changes (in any way) over pseudotime.
 }
 \details{
 \itemize{