Backup: documentation m/l revised, no bugs in tensorflow, loading dat…

…a not changed
diegommcc · Nov 13, 2023 · 192a5b3 · 192a5b3
1 parent 9a11d58
commit 192a5b3
Show file tree

Hide file tree

Showing 19 changed files with 399 additions and 260 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -53,7 +53,7 @@ Suggests:
     bluster,
     lsa
 SystemRequirements: Python (>= 2.7.0), TensorFlow (https://www.tensorflow.org/)
-RoxygenNote: 7.1.2
+RoxygenNote: 7.2.3
 Roxygen: list(markdown = TRUE)
 Collate: 
     'AllClasses.R'

diff --git a/NAMESPACE b/NAMESPACE
@@ -50,7 +50,7 @@ export(method)
 export(mixed.profiles)
 export(model)
 export(plotDistances)
-export(plotHeatmapGrads)
+export(plotHeatmapGradsAgg)
 export(plotSpatialClustering)
 export(plotSpatialGeneExpr)
 export(plotSpatialProp)

diff --git a/R/AllClasses.R b/R/AllClasses.R
@@ -8,7 +8,7 @@ NULL
 setOldClass(Classes = 'package_version')
 # TODO: error with setOldClass, check what is going on
 setClass("keras_training_history") 
-setOldClass(Classes = "keras.engine.sequential.Sequential")
+setClass("keras.engine.sequential.Sequential")
 
 setClassUnion(name = "MatrixOrNULL", members = c("matrix", "NULL"))
 setClassUnion(name = "ListOrNULL", members = c("list", "NULL"))

diff --git a/R/dnnModel.R b/R/dnnModel.R
@@ -9,9 +9,11 @@ NULL
 #' Train deconvolution model for spatial transcriptomics data
 #'
 #' Train a deep neural network model using training data from the
-#' \code{\linkS4class{SpatialDDLS}} object. In addition, the trained model is
-#' evaluated using test data, and prediction results are obtained to determine
-#' its performance (see \code{?\link{calculateEvalMetrics}}).
+#' \code{\linkS4class{SpatialDDLS}} object. This model will be used to 
+#' deconvolute spatial transcriptomics data from the same biological context as 
+#' the single-cell RNA-seq data used to train it. In addition, the trained 
+#' model is evaluated using test data, and prediction results are obtained to 
+#' determine its performance (see \code{?\link{calculateEvalMetrics}}).
 #'
 #' \strong{Simulation of mixed transcriptional profiles 'on the fly'}
 #'
@@ -35,10 +37,10 @@ NULL
 #'   \code{single.cell.real}/\code{single.cell.simul}, \code{prob.cell.types},
 #'   and \code{mixed.profiles} slots (the last only if \code{on.the.fly =
 #'   FALSE}).
-#' @param type.data.train Type of profiles used for training. It can be
+#' @param type.data.train Type of profiles to be used for training. It can be
 #'   \code{'both'}, \code{'single-cell'} or \code{'mixed'} (\code{'mixed'} by
 #'   default).
-#' @param type.data.test Type of profiles used for evaluation. It can be
+#' @param type.data.test Type of profiles to be used for evaluation. It can be
 #'   \code{'both'}, \code{'single-cell'} or \code{'mixed'} (\code{'mixed'} by
 #'   default).
 #' @param batch.size Number of samples per gradient update (64 by default).
@@ -73,7 +75,7 @@ NULL
 #' @param scaling How to scale data before training. It can be:
 #'   \code{"standardize"} (values are centered around the mean with a unit
 #'   standard deviation), \code{"rescale"} (values are shifted and rescaled so
-#'   that they end up ranging between 0 and 1, by default) or \code{"none"} (no
+#'   that they end up ranging between 0 and 1) or \code{"none"} (no
 #'   scaling is performed). \code{"standardize"} by default. 
 #' @param norm.batch.layers Whether to include batch normalization layers
 #'   between each hidden dense layer (\code{TRUE} by default).
@@ -87,16 +89,17 @@ NULL
 #'   by default).
 #' @param sc.downsampling It is only used if \code{type.data.train} is equal to
 #'   \code{'both'} or \code{'single-cell'}. It allows to set a maximum number of
-#'   single-cell profiles of a specific cell type used for training to avoid
-#'   an unbalanced representation of cell types (\code{NULL} by default).
+#'   single-cell profiles of a specific cell type for training to avoid
+#'   an unbalanced representation of classes (\code{NULL} by default).
 #' @param use.generator Boolean indicating whether to use generators during
 #'   training and test. Generators are automatically used when \code{on.the.fly
 #'   = TRUE} or HDF5 files are used, but it can be activated by the user on
 #'   demand (\code{FALSE} by default).
 #' @param on.the.fly Boolean indicating whether simulated data will be generated
 #'   'on the fly' during training (\code{FALSE} by default).
-#' @param agg.function Function used to build mixed transcriptional profiles. It
-#'   may be: \itemize{ \item \code{"AddRawCount"} (by default): single-cell
+#' @param agg.function If \code{on.the.fly == TRUE}, function used to build 
+#'   mixed transcriptional profiles. It may be: 
+#'   \itemize{ \item \code{"AddRawCount"} (by default): single-cell
 #'   profiles (raw counts) are added up across cells. Then, log-CPMs are
 #'   calculated. \item \code{"MeanCPM"}: single-cell profiles (raw counts) are
 #'   transformed into logCPM and cross-cell averages are calculated. 
@@ -1084,30 +1087,49 @@ trainDeconvModel <- function(
 ############### Deconvolution of spatial transcriptomics data ##################
 ################################################################################
 
-#' Deconvolute spatial transcriptomics data
+#' Deconvolute spatial transcriptomics data using trained model
 #'
-#' Deconvolute spatial transcriptomics data using the trained model contained in
-#' the \code{\linkS4class{SpatialDDLS}} object. After model prediction, 
-#' proportions of every spot are regularized according to its spatial location 
-#' in order to keep their spatial consistency. In particular, nearest spots for 
-#' every spot are calculated, and their profiles are added up so that a counter 
-#' transcriptional profile for each spot composed of its nearest spots is 
-#' created. We refer to these profiles as 'extrinsic' profiles, whereas the
-#' original profiles are called 'intrinsic'. Extrinsic profiles are used as 
-#' input for the trained deconvolution model, thus having two matrices of 
-#' predicted cell proportions: the intrinsic based on the intrinsic 
-#' transcriptome of each spot and the extrinsic based on their nearby spots. 
-#' Then, distances between intrinsic and extrinsic transcriptional profiles are
-#' calculated and used create a new set of regularized predicted proportions 
-#' under the following hypothesis: if extrinsic profiles are very dissimilar to 
-#' their intrinsic counterparts, intrinsic proportions will be used. On the 
-#' other hand, if extrinsic profiles are similar to their intrinsic 
-#' counterparts, a weighted mean between extrinsic and intrinsic proportions 
-#' is calculated. Weights are calculated according to transcriptome distances 
-#' between intrinsic nd extrinsic transcriptional profiles (see \code{metrics} 
-#' parameter). In this way, we leverage the spatial information contained in 
-#' spatial transcriptomics data while use the transcriptional information to 
-#' deconvolute the data. For details, see Mañanes et al., 2023.
+#' Deconvolute spatial transcriptomics data using the trained model in
+#' the \code{\linkS4class{SpatialDDLS}} object. The trained model is used 
+#' to predict cell proportions of two mirrored transcriptional profiles: 
+#' \itemize{ \item 'Intrinsic' profiles: transcriptional profiles of each spot 
+#' in the ST dataset. \item 'Extrinsic' profiles: profiles simulated from the 
+#' surrounding spots of each spot.} After prediction, cell proportions
+#' from the intrinsic profiles (intrinsic cell proportions) are regularized 
+#' based on the similarity between intrinsic and extrinsic profiles in order
+#' to maintain spatial consistency. This approach leverages both transcriptional 
+#' and spatial information. For more details, see Mañanes et al., 2023 and the
+#' Details section. 
+#' 
+#' The deconvolution process involves two main steps: predicting cell 
+#' proportions based on transcriptome using the trained neural network model, 
+#' and regularization of cell proportions based on the spatial location of each 
+#' spot. In the regularization step, a mirrored version of each spot is 
+#' simulated based on its N-nearest spots. We refer to these profiles as 
+#' 'extrinsic' profiles, whereas the transcriptional profiles of each spot are 
+#' called 'intrinsic' profiles. Extrinsic profiles are used to regularize 
+#' predictions based on intrinsic profiles. The rationale is that spots 
+#' surrounded by transcriptionally similar spots should have similar cell 
+#' compositions, and therefore predicted proportions can be smoothed to preserve
+#' their spatial consistency. On the other hand, spots surrounded by dissimilar 
+#' spots cannot be predicted by their neighbors, and thus they can only be 
+#' predicted by their own transcriptional profiles likely due to presenting very 
+#' specific cell compositions. 
+#' 
+#' Regarding the working os \pkg{SpatialDDLS}: first, extrinsic profiles are 
+#' simulated based on the N-nearest spots for each spot by summing their 
+#' transcriptomes. Distances between extrinsic and intrinsic profiles of each 
+#' spot are calculated so that similar/dissimilar spots are identified. These 
+#' two sets of transcriptional profiles are used as input for the trained neural
+#' network model, and according to the calculated distances, a weighted mean 
+#' between the predicted proportions for each spot is calculated. Spots with 
+#' distances between intrinsic and extrinsic profiles greater than 
+#' \code{alpha.cutoff} are not regularized, whereas spots with distances less 
+#' than \code{alpha.cutoff} contribute to the weighted mean. Weights are 
+#' calculated by rescaling distances less than \code{alpha.cutoff} between 0 
+#' and 0.5, so that the maximum extent to which a extrinsic profile can 
+#' modified the predictions based on intrinsic profiles is 0.5 (a regular 
+#' mean). For more details, see Mañanes et al., 2023. 
 #'
 #' This function requires a \code{\linkS4class{SpatialDDLS}} object with a
 #' trained deep neural network model (\code{\link{trained.model}} slot, and the
@@ -1122,28 +1144,27 @@ trainDeconvModel <- function(
 #'   deconvolute. If missing, all datasets will be deconvoluted.
 #' @param normalize Normalize data (logCPM) before deconvolution (\code{TRUE} by
 #'   default). 
-#' @param scaling How to scale data before training. It may be:
+#' @param scaling How to scale data before training. Options include
 #'   \code{"standardize"} (values are centered around the mean with a unit
 #'   standard deviation) or \code{"rescale"} (values are shifted and rescaled so
 #'   that they end up ranging between 0 and 1). If \code{normalize = FALSE},
 #'   data are not scaled.
 #' @param k.spots Number of nearest spots considered for each spot during 
-#'   regularization of predicted cell proportions and creation of extrinsic 
-#'   transcriptional profiles. The greater, the smoother proportions will be. 
-#'   4 by default. 
+#'   regularization and simulation of extrinsic transcriptional profiles. The 
+#'   greater, the smoother the regularization will be (4 by default). 
 #' @param pca.space Whether to use PCA space to calculate distances between 
-#'   intrinsic and extrinsic transcriptional profiles. \code{TRUE} by default. 
-#' @param pca.var Number between 0.2 and 1 indicating the cutoff of explained 
-#'   variance used to choose the number of PCs used if \code{pca.space == TRUE}. 
-#' @param metric Metric used to calculate distances between intrinsic and 
-#'   extrinsic transcriptional profiles. It may be \code{'euclidean'}, 
+#'   intrinsic and extrinsic transcriptional profiles (\code{TRUE} by default). 
+#' @param pca.var Threshold of explained 
+#'   variance (between 0.2 and 1) used to choose the number of PCs used if 
+#'   \code{pca.space == TRUE}. 
+#' @param metric Metric used to measure distance/similarity between intrinsic 
+#'   and extrinsic transcriptional profiles. It may be \code{'euclidean'}, 
 #'   \code{'cosine'} or \code{'pearson'} (\code{'euclidean'} by default).
-#' @param alpha.cutoff It determines the minimum distance by which the intrinsic 
-#'   proportions will start being regularized according to the extrinsic ones. 
-#'   It may be \code{'mean'} (distances lower than the mean distance are used) 
-#'   or \code{'quantile'} (distances lower than the \code{alpha.quantile} 
-#'   quantile are used). \code{'mean'} by default. See Mañanes et al., 2023 for
-#'   details. 
+#' @param alpha.cutoff Minimum distance for regularization. 
+#'   It may be \code{'mean'} (spots with transcriptional distances shorter than 
+#'   the mean distance of the dataset will be modified) or \code{'quantile'} 
+#'   (spots with transcriptional distances shorter than the 
+#'   \code{alpha.quantile} quantile are used). \code{'mean'} by default.
 #' @param alpha.quantile Quantile used if \code{alpha.cutoff == 'quantile'}. 
 #'   0.5 by default.  
 #' @param simplify.set List specifying which cell types should be compressed
@@ -1162,11 +1183,13 @@ trainDeconvModel <- function(
 #' @param verbose Show informative messages during the execution.
 #'
 #' @return \code{\linkS4class{SpatialDDLS}} object with a \code{deconv.spots}
-#'   slot. The output is a data frame with spot (\eqn{i}) as rows and cell types
-#'   (\eqn{j}) as columns. Each entry represents the predicted proportion of
-#'   \eqn{j} cell type in \eqn{i} spot. If \code{simplify.set} and/or
-#'   \code{simplify.majority} are provided, the \code{deconv.spots} slot will
-#'   contain a list with raw and simplified results.
+#'   slot. The output is a list containing 'Regularized', 'Intrinsic' and 
+#'   'Extrinsic' deconvoluted cell proportions, 'Distances' between intrinsic 
+#'   and extrinsic transcriptional profiles, and 'Weight.factors' with the 
+#'   final weights used to regularize intrinsic cell proportions. If 
+#'   \code{simplify.set} and/or \code{simplify.majority} are provided, 
+#'   the \code{deconv.spots} slot will contain a list with raw and simplified 
+#'   results.
 #'
 #' @export
 #'
@@ -1424,7 +1447,7 @@ deconvSpatialDDLS <- function(
         )
       )  
     }
-    if (sum(fill.features) == 0) {
+    if (sum(fill.features) != 0) {
       message(
         paste(
           "=== Setting", sum(fill.features),