From 023f75f7da5c504da14b65a73c7b53aa5627fc86 Mon Sep 17 00:00:00 2001
From: Lijiao Ning <lijiao.ning91@gmail.com>
Date: Tue, 23 Apr 2024 21:12:09 +0200
Subject: [PATCH] rephrase and reorder some codes

---
 docs/scRNAseq_basics/00_IOCsc_week0.md |  4 +--
 docs/scRNAseq_basics/01_IOCsc_week1.md | 10 ++++----
 docs/scRNAseq_basics/preprocessing.md  | 34 +++++++++++++-------------
 3 files changed, 24 insertions(+), 24 deletions(-)
diff --git a/docs/scRNAseq_basics/00_IOCsc_week0.md b/docs/scRNAseq_basics/00_IOCsc_week0.md
index f0d664c8..a966ce60 100644
--- a/docs/scRNAseq_basics/00_IOCsc_week0.md
+++ b/docs/scRNAseq_basics/00_IOCsc_week0.md
@@ -50,8 +50,8 @@ To complete this week you'll need to :
 - [x] 4. Create a Seurat Object
 - [x] 5. Create an annotation table of zebrafish genes using `biomaRt`. 
 
-Add your RMD/QMD in your trello card.
+Add your RMD/QMD in your Trello card.
 
 **Thank you for your attention and see you next week :clap: :clap: :clap:**
 
-----
+----
\ No newline at end of file
diff --git a/docs/scRNAseq_basics/01_IOCsc_week1.md b/docs/scRNAseq_basics/01_IOCsc_week1.md
index 6d772bb1..6886534f 100644
--- a/docs/scRNAseq_basics/01_IOCsc_week1.md
+++ b/docs/scRNAseq_basics/01_IOCsc_week1.md
@@ -4,14 +4,14 @@ The preprocessing is the most important part of a single cell analysis because y
 can skew your result if you filter too much **or too little** and you must really
 understand what's going on these steps.
 
-Please go read the following pages to learn more about it : [Preprocessing.](preprocessing.md).
-
-The preprocesing is composed of : 
+The preprocesing is composed of: 
 
 - Filtering of low quality barcodes
-- Barcode Normalization
+- Barcode normalization
 - Selection of most variable features
 
+Please go read the [preprocessing](preprocessing.md) pages to learn more about it.
+
 ---
 
 ![](../R-IOC/images/toolbox-do-it-yourself.png){: style="width:75px"} **Do it yourself!**
@@ -39,7 +39,7 @@ To complete this week you'll need to :
     each step of your thoughts. 
     In general, try to explain in your own words, each step of your analysis !
 
-Add your RMD/QMD in your trello card.
+Add your RMD/QMD in your Trello card.
 
 **Thank you for your attention and see you next week :clap: :clap: :clap:**
 
diff --git a/docs/scRNAseq_basics/preprocessing.md b/docs/scRNAseq_basics/preprocessing.md
index dc0c37ce..46f5c82f 100644
--- a/docs/scRNAseq_basics/preprocessing.md
+++ b/docs/scRNAseq_basics/preprocessing.md
@@ -2,7 +2,7 @@
 
 The pre-processing steps are used to clean the data in order not to distort
 the results of downstream analyses (clustering analysis, markers, differential
-expression analysis).
+expression analysis, *etc.*).
 
 ## Filter out low quality cells
 
@@ -36,7 +36,7 @@ A cell is generally considered to be in apoptosis when the transcriptome
 detects more than 20% of the genes in the MT genome. Some are more stringent
 in lowering this threshold to 10%.  
 
-``` r
+```r
 ## Retrieve genes from the MT genome using biomart
 genes_MT <- annotated_hg19$ensembl_gene_id[annotated_hg19$chromosome_name == "MT"]
 
@@ -55,7 +55,7 @@ VlnPlot(object = pbmc_small,
 
 <img src="../images/MitoGenes-1.png" style="display: block; margin: auto;" />
 
-``` r
+```r
 ## Graphical representation of QC
 ggplot(pbmc_small@meta.data,
        aes(y = nCount_RNA,
@@ -69,8 +69,9 @@ ggplot(pbmc_small@meta.data,
                         high = "red",
                         mid = "yellow",
                         midpoint = 20) +
-  ggtitle("QC plot", "Number of detected genes in function of number of UMI")+
-  labs(y = "Number of UMI per cell", x = "Number of detected genes by cell")
+  labs(x = "Number of detected genes by cell",
+       y = "Number of UMI per cell",
+       title = "QC plot", "Number of detected genes in function of number of UMI")
 ```
 
 <img src="../images/MitoGenes-2.png" style="display: block; margin: auto;" />
@@ -87,7 +88,7 @@ We can also use a histogram representation. I recommend the three types of
 figures because depending on the dataset, the best method to identify
 outliers is different.
 
-``` r
+```r
 hist(pbmc_small$nCount_RNA,
      breaks = 100,
      xlab = "Number of UMI per cell",
@@ -98,7 +99,7 @@ abline(v = 10000, col = "red")
 
 <img src="../images/histQC-1.png" style="display: block; margin: auto;" />
 
-``` r
+```r
 hist(pbmc_small$nFeature_RNA,
      breaks = 100,
      xlab = "Number of detected genes by cell",
@@ -128,14 +129,12 @@ We will remove all the cells :
 - that detect less than 300 genes or more than 2300.
 - whose percentage of expressed genes of the MT genome exceeds 10%
 
-``` r
+```r
 ## Filtering SeuratObject
 pbmc_small <- subset(pbmc_small,
                      percent_mito < 10 &
-                       nCount_RNA > 650 &
-                       nCount_RNA < 10000 &
-                       nFeature_RNA > 300 &
-                       nFeature_RNA < 2300)
+                     (nCount_RNA > 650 & nCount_RNA < 10000) &
+                     (nFeature_RNA > 300 & nFeature_RNA < 2300))
 
 ## Plot
 ggplot(pbmc_small@meta.data,
@@ -148,13 +147,14 @@ ggplot(pbmc_small@meta.data,
                         high = "red",
                         mid = "yellow",
                         midpoint = 20) +
-  ggtitle("QC plot after filtering", "Number of detected genes in function of number of UMI")+
-  labs(y = "Number of UMI per cell", x = "Number of detected genes by cell")
+  labs(x = "Number of detected genes by cell",
+       y = "Number of UMI per cell",
+       title = "QC plot after filtering", "Number of detected genes in function of number of UMI")
 ```
 
 <img src="../images/QCFilter-1.png" style="display: block; margin: auto;" />
 
-``` r
+```r
 ## Update object in R console
 pbmc_small
 ```
@@ -197,7 +197,7 @@ median of the library size (= total number of UMIs per cell, = `nCount_RNA`
 in `meta.data`). If the scale factor is equal to 1e6 then we would get
 log2(CPM+1). *CPM : Count Per Million*.
 
-``` r
+```r
 ## Inter-cell normalization
 pbmc_small <- NormalizeData(pbmc_small,                                   #SeuratObject
                             assay = "RNA",                                #Assay to use
@@ -230,7 +230,7 @@ relation between the expression mean and the variance of each gene.
 With the `nfeatures` parameter we retrieve the 2000 most variable genes
 according to the vst method.
 
-``` r
+```r
 pbmc_small <- FindVariableFeatures(pbmc_small,                 #SeuratObject
                                    selection.method = "vst",   #Method
                                    nfeatures = 2000)           #Top HVG (Highly Variable Gene), default value