-
Notifications
You must be signed in to change notification settings - Fork 0
/
help.R
1640 lines (1475 loc) · 132 KB
/
help.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
###############################Example files####################################
examples_help <- HTML(
'<h4>Example matrix input file for scRNA-seq:</h4>
<li><a href="example_files/exampleMatrix.zip" download> PBMC 3K Cell-Gene count matrix (*.txt) </a></li>
<h4>Example 10x input files for scRNA-seq:</h4>
<li><a href="example_files/barcodes.tsv.gz" download> PBMC 3K barcodes file (*.tsv.gz) </a></li>
<li><a href="example_files/features.tsv.gz" download> PBMC 3K features file (*.tsv.gz) </a></li>
<li><a href="example_files/matrix.mtx.gz" download> PBMC 3K matrix file (*.mtx.gz) </a></li>
<h4>Example files for GRN analysis:</h4>
<li><a href="example_files/processed_seurat_object-2021-12-19.zip" download> PBMC 3K seurat object (*.RDS) </a></li>
<li><a href="example_files/auc.hg19.zip" download> Pyscenic output file (*.loom) </a></li>
<h4> Example files for scATAC-seq: </h4>
<li><a href="example_files/arrowFile.zip" download> PBMC arrow file (*.arrow) </a></li>
<li><a href="example_files/PBMCs_human_signac_peaks.zip" download> PBMC peakset file (*.bed) </a></li>
')
###########################Upload###############################################
file_upload_tab_intro <- HTML('<h4 style = "line-height: 1.5; text-align:center; background-color: #ffffff; border: 1px solid #222d32; border-radius: 5px;">The <b>Data upload tab</b> enables
the user to upload scRNA-seq or scATAC-seq datasets to SCALA,
in order to initialize single-cell analysis.<br> SCALA provides the option to upload different file input formats. </h4>')
file_upload_txt <- HTML('<div class="col-md-4 scrollable">
<img src = "images/help_page/DATA_INPUT_scRNA_count_matrix.PNG" style="border: 1px solid #222d32; border-radius: 15px;">
<br>
<figcaption style = "font-size:14px" class="figure-caption text-left"><b>Figure 1: </b> The File Upload form for cell-gene count matrices </figcaption>
</div>
<div class="col-md-8 scrollable" style="background-color: #ffffff; border: 1px solid #222d32; border-radius: 15px; font-size:16px;">
<h3> 1. Upload a cell-gene count matrix </h3>
<p>
<ol>
<li> Project name: User defined project name.
<li> File Upload: Click the <b>Browse</b> button of the upload form to select a single-cell RNA-seq gene/feature by cell/barcode matrix.<br>
<b>Notes: The file should not exceed 500 MB.</b>
<li> Gene and cell filtering parameters:<br>
<ul>
<li> The first sliding bar will subset the count matrix in order to include features/genes detected in at least this many cells.
<li> The second sliding bar will subset the count matrix in order to include cells where at least this many features are detected.
</ul>
<li> Select organism: <br>
<ul>
<li> If the counting procedure was accomplished by using UCSC mm9 or mm10 genome builds as a reference, the user should choose "Mouse".
<li> If the counting procedure was accomplished by using UCSC hg19 or hg38 genome builds as a reference, the user should choose "Human".
</ul>
</ol>
</p>
<hr>
<h3>2. Load the example count matrix </h3>
<p>
By pressing the button <b>Load example</b> a scRNA-seq human example of Peripheral Blood Mononuclear Cells (PBMC),freely available from 10X Genomics, will be loaded.
</p>
<hr>
<h3>3. Export working object as .RDS file </h3>
<p>
At any point of the analysis the button <b>Export .RDS</b> exports a scRNA-seq Seurat Object as an RDS file compatible to R language.
</p>
</div>
')
file_upload_10x <- HTML('<div class="col-md-4 scrollable">
<img src = "images/help_page/DATA_INPUT_scRNA_10x.PNG" style="border: 1px solid #222d32; border-radius: 15px;">
<br>
<figcaption style = "font-size:14px" class="figure-caption text-left"><b>Figure 2: </b> The File Upload form for 10x input files </figcaption>
</div>
<div class="col-md-8 scrollable" style="background-color: #ffffff; border: 1px solid #222d32; border-radius: 15px; font-size:16px;">
<h3> 1. Upload 10x files </h3>
<p>
<ol>
<li> Project name: User defined project name.
<li> File Upload: Click the <b>Browse</b> buttons of the upload form to select "cellranger count" single-cell RNA-seq files
<ul>
<li> A gzipped tsv file the contains only detected (filtered by cellranger count pipeline) cellular barcodes. The file should have the name "barcodes.tsv.gz".
<li> A gzipped tsv file of features (genes) that correspond to row indices. Feature ID, feature name and feature type (Gene expression) are stored in the first,
second, and third column of the file, respectively. The file should have the name "features.tsv.gz".
<li> A gzipped Market Exchange Format (MEX) featutre-barcode count matrix. The file should have the name "matrix.mtx.gz". <br>
<b>Notes: The files should not exceed 500 MB in total. Additionally, the files should be named as described above in order to be uploaded successfully.</b>
</ul>
<li> Gene and cell filtering parameters:<br>
<ul>
<li> The first sliding bar will subset the count matrix in order to include features/genes detected in at least this many cells.
<li> The second sliding bar will subset the count matrix in order to include cells where at least this many features are detected.
</ul>
<li> Select organism: <br>
<ul>
<li> If the counting procedure was accomplished by using UCSC mm9 or mm10 genome builds as a reference, the user should choose "Mouse".
<li> If the counting procedure was accomplished by using UCSC hg19 or hg38 genome builds as a reference, the user should choose "Human".
</ul>
</ol>
</p>
<hr>
<h3>2. Load the example 10x files </h3>
<p>
By pressing the button <b>Load example</b> a scRNA-seq human example of Peripheral Blood Mononuclear Cells (PBMC),freely available from 10X Genomics, will be loaded.
</p>
<hr>
<h3>3. Export working object as .RDS file </h3>
<p>
At any point of the analysis the button <b>Export .RDS</b> exports a scRNA-seq Seurat Object as an RDS file compatible to R language.
</p>
</div>
')
file_upload_RDS <- HTML('<div class="col-md-4 scrollable">
<img src = "images/help_page/DATA_INPUT_scRNA_RDS.PNG" style="border: 1px solid #222d32; border-radius: 15px;">
<br>
<figcaption style = "font-size:14px" class="figure-caption text-left"><b>Figure: </b> An RDS file upload containing a seurat object. </figcaption>
</div>
<div class="col-md-8 scrollable" style="background-color: #ffffff; border: 1px solid #222d32; border-radius: 15px; font-size:16px;">
<h3> 1. Upload an RDS file </h3>
<p>
If a processed seurat object is available, the user can upload it to SCALA and proceed with visualization or analysis tasks. After the
uploading procedure is complete SCALA performs the following checks in the RDS file:
<ul>
<li> Required:
<ul>
<li> the raw count matrix stored in slot "counts" </li>
<li> metadata table containing the columns "nFeature_RNA", "nCount_RNA" </li>
</ul>
</li> Optional:
<ul>
<li> Column "percent.mt" in metadata table </li>
<li> Normalized counts in slot "data" </li>
<li> Scaled counts in slot "scale.data" </li>
<li> Highly variable genes accessible through the VariableFeatures() function</li>
<li> PCA or other embeddings stored in slot "reductions" </li>
<li> Clustering results stored in the metadata column "seurat_clusters"</li>
<li> Marker genes saved in the slot "misc" under the name "markers" </li>
</ul>
</ul>
</p>
<h3> 2. Change assay </h3>
<p>
When the upload is successfull, if more than one assay is available, the user can select which one will be active. This is often
useful in objects created by integrating two or more seurat objects.
</p>
<h3> 3. Export working object as .RDS file </h3>
<p> At any point of the analysis the button <b>Export .RDS</b> exports a scRNA-seq Seurat Object as an RDS file compatible to R language. </p>
</div>
')
file_upload_arrow <- HTML('<div class="col-md-4 scrollable">
<img src = "images/help_page/DATA_INPUT_scATAC-seq.PNG" style="border: 1px solid #222d32; border-radius: 15px;">
<br>
<figcaption style = "font-size:14px" class="figure-caption text-left"><b>Figure 3: </b> The File Upload form for arrow files </figcaption>
</div>
<div class="col-md-8 scrollable" style="background-color: #ffffff; border: 1px solid #222d32; border-radius: 15px; font-size:16px;">
<h3> 1. Upload an arow file </h3>
<p>
<ol>
<li> Project name: User defined project name.
<li> File Upload: Click the <b>Browse</b> button of the upload form to select a single-cell ATAC-seq arrow file. Arrow files is a file type that stores all of the scATAC-seq data
associated with an individual sample. Arrow files can be created by coupling scATAC-seq fragment files generated by cellranger-atac, and create_arrow_file.R Rscript (provided), as described in
SCALA\'s github page. The particular operation should be applied locally.<br>
<b>Notes: The file should not exceed 2 GB.</b>
<li> Select organism: <br>
<ul>
<li> If the counting procedure was accomplished by using UCSC mm10 genome builds as a reference, the user should choose "mm10".
<li> If the counting procedure was accomplished by using UCSC hg19 or hg38 genome builds as a reference, the user should choose "hg19" or "hg38".
</ul>
<li> Threads to be used: The number of CPU threads to be used during the analysis.
</ol>
</p>
<hr>
<h3>2. Load an example arrow file </h3>
<p>
By pressing the button <b>Load example</b> a scRNA-seq human example of Peripheral Blood Mononuclear Cells (PBMC),freely available from 10X Genomics, will be loaded.
</p>
</div>
')
file_upload_tab_new_project <- HTML('<div class="col-md-8 scrollable" style="background-color: #ffffff; border: 1px solid #222d32; border-radius: 15px;">
<h4 style = "line-height: 1.5;">
When you try to upload a new dataset at the same time that another one is already loaded, you will be prompted to discard the current working dataset. If you select <b>Yes</b> your
progress will be deleted, note that this is an irreversible action. After that you can start a new project by submitting the new input files.<br>
If you wish to continue working on your current project slect <b>No</b>.
</h4>
</div>')
file_upload_metadata_RNA <- HTML('
<div class="col-md-12 scrollable" style="background-color: #ffffff; border: 1px solid #222d32; border-radius: 15px; font-size:16px;">
<h3><u> scRNA-seq metadata table </u></h3>
<p>
If the data upload was successfull, a scRNA-seq cell metadata table will appear.
Project name, number of Unique Moleculare Identifiers (UMIs) per cell, number of detected features/genes per cell, percentage of mitochondrial RNA (percent.mt)
and cell id are stored in the 1st, 2nd, 3rd, 4th and 5th column respectively. Additional columns will be created during the next steps of the analysis, and the cell
metadata table will be updated automatically.The user is also able to download the metadata table using the <b>Save table</b> button.
</p>
</div>
<div class="col-md-12 scrollable">
<img src = "images/help_page/DATA_INPUT_scRNA_Metedata_table.PNG" style="border: 1px solid #222d32; border-radius: 15px;">
<br>
<figcaption style = "font-size:14px" class="figure-caption text-left"><b>Figure 4: </b> RNA metadata table </figcaption>
</div>
')
file_upload_metadata_ATAC <- HTML('
<div class="col-md-12 scrollable" style="background-color: #ffffff; border: 1px solid #222d32; border-radius: 15px; font-size:16px;">
<h3><u> scATAC-seq metadata table </u></h3>
<p>
If the data upload was successfull, a scATAC-seq cell metadata table will appear. Project name, per-cell TSS enrichment, per-cell Unique Molecular Identifiers (UMIs) in Transcription Start Sites (TSSs),
per-cell UMIs in promoters, per-cell UMIs in ENCODE black listed regions, per-cell fraction of accessible fragments that overlap promoters (PromoterRatio),
information about high quality cell (PassQC), per-cell nucleosome ratio, per-cell number of fragments with size greater than 294 bp (nMultiFrags), per-cell
number of fragments with size less than 147 bp (nMonoFrags), per-cell number of fragments (nFrags), per-cell number of fragments with size between 147 bp
and 294 bp (diFrags), per-cell ratio of fragments in ENCODE black listed regions, and cell id are stored in the 1st, 2nd, 3rd, 4th, 5th, 6th, 7th, 8th, 9th,
10th, 11th, 12th, 13th, and 14th columns respectively. Additional column will be created during the next steps of the analysis, and the cell metadata table
will be updated automatically. The user is also able to download the metadata table using the <b>Save table</b> button.
</p>
</div>
<div class="col-md-12 scrollable">
<img src = "images/help_page/DATA_INPUT_scATAC-seq_Metadata_table_merged.PNG" style="border: 1px solid #222d32; border-radius: 15px;">
<br>
<figcaption style = "font-size:14px" class="figure-caption text-left"><b>Figure 5: </b> ATAC metadata table </figcaption>
</div>
')
#######################################QC#######################################
qc_tab_intro <- HTML('<h4 style = "line-height: 1.5; text-align:center; background-color: #ffffff; border: 1px solid #222d32; border-radius: 5px;">The <b>
Quality control tab</b> enables the generation of Quality Control (QC) plots for scRNA-seq and scATAC-seq datasets,<br>
in order to examine the quality of the single-cell experiment. </h4>')
rna_qc <- HTML('
<div class="col-md-12 scrollable">
<img src = "images/help_page/QC_pre_params.PNG" style="border: 1px solid #222d32; border-radius: 15px;">
<br>
<figcaption style = "font-size:14px" class="figure-caption text-left"><b>Figure 6: </b> Quality control parameters and plots before filtering </figcaption>
</div>
<div class="col-md-12 scrollable" style="background-color: #ffffff; border: 1px solid #222d32; border-radius: 15px; font-size:16px;">
<h3><u> Quality control parameters </u></h3>
<p>
<ol>
<li> Display quality control plots before filtering: The user is able to visualize QC plots, before the application of any cell-specific filter.<br>
The particular visualization allows the user to explore cell count metrics and filter-out low-quality cells and non-informative genes in the next steps of this analysis.
The QC metrics included in this exploration are:
<ul>
<li> The number of detected features in each cell. Low-quality barcodes or empty droplets will exhibit low number of features, while multiplets may have a very high number of genes detected.
<li> The total number of UMIs detected in each cell. This metric correlates strongly with the feature-per-cell metric.
<li> The percentage of mitochondrial UMIs. Low-quality barcodes and dying cells contain high number of mitochondrial reads.
<li> The correlation between detected features and total counts per-cell. The particular metrics should exhibit high correlation.
<li> The correlation between mitochondrial counts and total counts per-cell. The particular metrics should exhibit negligible correlation.
<li> The total number of non-filtered cells.
</ul>
<li> Filter out low quality cells: After exploring the QC metrics of the unfiltered cells, the user is enabled to apply his/her filtering
<ul>
<li> Minimum features detected: Filter out all cells with less than the user-defined number of detected features.
<li> Maximum features detected: Filter out all cells with more than the user-defined number of detected features.
<li> Mitochondrial %: Filter out cells that have greater than the user-defined percentage of mitochondrial counts.
</ul>
</ol>
</p>
</div>
')
rna_qc_pf <- HTML('
<div class="col-md-12 scrollable">
<img src = "images/help_page/QC_post.PNG" style="border: 1px solid #222d32; border-radius: 15px;">
<br>
<figcaption style = "font-size:14px" class="figure-caption text-left"><b>Figure 7: </b> Quality control plots post filtering </figcaption>
</div>
<div class="col-md-12 scrollable" style="background-color: #ffffff; border: 1px solid #222d32; border-radius: 15px; font-size:16px;">
<h3><u> Inspection of filtering criteria </u></h3>
<p>
The particular visualization allows the user to explore cell count metrics after the filtering procedure. The user is able to readjust the defined criteria if the results
are not satisfying. The QC metrics included in this figure are described in the previous tab.
</p>
</div>
')
atac_qc <- HTML('
<div class="col-md-12 scrollable">
<img src = "images/help_page/QC_all_ATAC.PNG" style="border: 1px solid #222d32; border-radius: 15px;">
<br>
<figcaption style = "font-size:14px" class="figure-caption text-left"><b>Figure 8: </b> Quality control plots after soft filtering </figcaption>
</div>
<div class="col-md-12 scrollable" style="background-color: #ffffff; border: 1px solid #222d32; border-radius: 15px; font-size:16px;">
<h3><u> Inspection of filtering criteria for ATAC data </u></h3>
<p>
<h4>- Display soft filtered quality control plots: Visualization of soft filtered QC plots. Soft cell-filtering has been applied during the arrow file creation,
locally, by using the create_arrow_file.R Rscript (provided in github). The filtering parameters and their default values are:</h4>
<ul>
<li> "-t" or "--minTSS": The minimum numeric transcription start site (TSS) enrichment score required for a cell to pass filtering for use in downstream analyses. The default value will be set to 4.
<li> "-m" or "--minFrags": The minimum number of mapped ATAC-seq fragments required per cell to pass filtering for use in downstream analyses. The default value will be set to 1000.
</ul>
Note: If the user wants to adjust additional parameters included in the cell metadata, the ArchR function createArrowFiles() should be used in an R environment.
</p>
<p>
<h4>- The particular visualization allows the user to explore cell metrics after the soft-filtering procedure. The user is able to readjust the defined criteria if the
results are not satisfying, by using the create_arrow_file.R Rscript and by tweaking the "--minTSS" and "--minFrags" parameters. The QC metrics included in this illustration are:</h4>
<ul>
<li> Violin plot of the per-cell TSS enrichment scores.
<li> Ridge plot of the per cell logged unique nuclear fragments number.
<li> Scatter plot of the logged unique nuclear fragments number versus the Transcription Start Site (TSS) enrichment score. Dashed lines indicate the thresholds used.
</ul>
</p>
</div>
')
############################Normalization#######################################
norm_tab_intro <- HTML('<h4 style = "line-height: 1.5; text-align:center; background-color: #ffffff; border: 1px solid #222d32; border-radius: 5px;">The <b>DATA NORMALIZATION</b> tab enables the user to
perform cell-specific normalization and gene-specific scaling of scRNA-seq count data. The aforementioned procedures are essential in order to perform the downstream steps of
the analysis, including dimensionality reduction and differential expression. After these steps, the features that exhibit the higher variability in the dataset are detected.
</h4>')
rna_normalization_param <- HTML('<div class="col-md-4 scrollable">
<img src = "images/help_page/DATA_NORMALIZATION_AND_SCALING_parameters.PNG" style="border: 1px solid #222d32; border-radius: 15px;">
<br>
<figcaption style = "font-size:14px" class="figure-caption text-left"><b>Figure 9: </b> Parameters for the normalization and scaling procedure in RNA seq data </figcaption>
</div>
<div class="col-md-8 scrollable" style="background-color: #ffffff; border: 1px solid #222d32; border-radius: 15px; font-size:16px;">
<h3><u>Normalization</u></h3>
<li> A global-scaling normalization is applied. The particular methodology normalizes the per-cell gene expression counts by the total
cell counts, multiplies this value by a scale factor (10,000 by default), and finally performs log-transformation. The user is able to
alter the scaling factor.
<h3><u> Identification of highly variable features </u></h3>
Detection of a set of genes that show high cell-to-cell variation in the scRNA-seq count matrix. One of the following methods can be selected:
<ul>
<li> Variance Stabilizing Transformation (vst) method. The particular method fits a line to the relationship of logged variance and logged
mean using local polynomial regression. Consequently, standardization of feature values using the observed mean and expected variance is performed.
Feature variance is finally calculated on standardized values, after clipping to a maximum. A fixed number of variable features is returned
(default: 2,000 features. Recommended values range: 1000 - 8000).
<li> Mean-Variance method. The particular method uses a function to calculate average gene counts and gene dispersions. All genes are separated into 20
bins according to their average counts. Finally, dispersion z-scores are calculated in each gene group.
<li> Dispersion method. Feature selection according to the highest dispersion values.
</ul>
<h3><u> Scaling the data </u></h3>
<p>
A linear transformation that shifts the counts of each feature, so that the mean counts across cells is 0, and the variance across cells is 1. This step ensures that
highly-expressed genes will not introduce biases during the downstream analysis.Additionally, in order to remove unwanted sources of variation, the user is able to
select which metadata values would like to regress out during the scaling procedure. Typically, mitochondrial percentage is usually regressed out.
</p>
</div>
')
rna_normalization_output <- HTML('
<div class="col-md-12 scrollable">
<img src = "images/help_page/DATA_NORMALIZATION_AND_SCALING_mvgs.PNG" style="border: 1px solid #222d32; border-radius: 15px;">
<br>
<figcaption style = "font-size:14px" class="figure-caption text-left"><b>Figure 10: </b> Most variable genes </figcaption>
</div>
<div class="col-md-12 scrollable" style="background-color: #ffffff; border: 1px solid #222d32; border-radius: 15px; font-size:16px;">
<h3><u> Exploration of MVGs </u></h3>
<p>
The particular visualization is a scatter plot that depicts the standardized variance versus the average expression of all features (vst method applied).
If one of the other two methods is selected dispersion values are shown in the Y axis. The number of highly variable genes is also reported (red dots).
</p>
</div>
')
##########################PCA/LSI###############################################
pca_tab_intro <- HTML('<h4 style = "line-height: 1.5; text-align:center; background-color: #ffffff; border: 1px solid #222d32; border-radius: 5px;">The <b>PCA/LSI</b> tab enables the user
to perform Principal Component Analysis (PCA) to scRNA-seq datasets and Latent Semantic Indexing analysis to scATAC-seq datasets.
</h4>')
pca_optimal_pcs <- HTML('
<div class="col-md-12 scrollable">
<img src = "images/help_page/PCA_selection.PNG" style="border: 1px solid #222d32; border-radius: 15px;">
<img src = "images/help_page/PCA_slow_results.PNG" style="border: 1px solid #222d32; border-radius: 15px;">
<figcaption style = "font-size:14px" class="figure-caption text-left"><b>Figure 11:</b> Determination of optimal number of principal components </figcaption>
</div>
<div class="col-md-12 scrollable" style="background-color: #ffffff; border: 1px solid #222d32; border-radius: 15px; font-size:16px;">
<h3>The user has two options during this analysis step:</h3>
<ol>
<li> Enable the automatic identification of the optimal number of Principal Components (PCs) using 1-fold SVA-CV. This option is significantly slower.
The optimal number of PCs is indicated by the red dotted line.
<li> Perform PCA without automatic identification of the optimal number of Principal Components (PCs). The user will then decide about the dimensionality
of the dataset (the number of most informative PCS), based on the generated "elbow" plot.
<ol>
<p>
In the particular illustration,<br> (a) an elbow plot depicting the ranking of PCs based on the percentage of variance explained by each of them is illustrated,
as also<br> (b) a scatter plot of cells in 2D PCA space, using the first two PCs.
</p>
</div>
')
pca_explore_pcs <- HTML('
<div class="col-md-12 scrollable">
<img src = "images/help_page/PCA_exploration.PNG" style="border: 1px solid #222d32; border-radius: 15px;">
<figcaption style = "font-size:14px" class="figure-caption text-left"><b>Figure 12:</b> Exploration of principal components </figcaption>
</div>
<div class="col-md-12 scrollable" style="background-color: #ffffff; border: 1px solid #222d32; border-radius: 15px; font-size:16px;">
<p>
The particular visualization depicts<br> (a) the loading scores of the top genes of a PC of interest (top 30 features for the particular example), and<br>
(b) a heatmap of scaled counts of the top loadings of the PC of interest, across cells.
</p>
</div>
')
pca_lsi <- HTML('
<div class="col-md-6">
<img src = "images/help_page/PCA_lsi.PNG" style="border: 1px solid #222d32; border-radius: 15px;">
<figcaption style = "font-size:14px" class="figure-caption text-left"><b>Figure 13:</b> LSI options </figcaption>
</div>
<div class="col-md-6" style="background-color: #ffffff; border: 1px solid #222d32; border-radius: 15px; font-size:16px;">
<p>
In SCALA, LSI is run in an iterative manner (number of iterations). A first LSI transformation run is applied using the most accessible features.
This procedure identifies lower resolution clusters that are not batch confounded. Consequently, average accessibility for each of these clusters calculated across all features.
Finally, the most variable features are identified across the low resolution clusters, and are used as input for the next LSI iteration.
The parameters for identifying low resolution clusters are:
<ul>
<li> Number of variable features. Defaults to 25,000 features.
<li> Number of LSI dimensions to use. Defaults to 30 dimensions.
<li> Cluster resolution. Defaults to 1.
</ul>
</p>
</div>
')
#########################Clustering#############################################
clustering_tab_intro <- HTML('<h4 style = "line-height: 1.5; text-align:center; background-color: #ffffff; border: 1px solid #222d32; border-radius: 5px;">
The <b>CLUSTERING</b> tab enables the user to perform graph-based clustering in scRNA-seq and scATAC-seq datasets, in order to define cell types and cellular states.
</h4>')
clustering_rna_input <- HTML('
<div class="col-md-4 scrollable">
<img src = "images/help_page/Clustering Parameters.PNG" style="border: 1px solid #222d32; border-radius: 15px;">
<figcaption style = "font-size:14px" class="figure-caption text-left"><b>Figure 14:</b> Clustering options </figcaption>
</div>
<div class="col-md-8 scrollable" style="background-color: #ffffff; border: 1px solid #222d32; border-radius: 15px; font-size:16px;">
<p>
<h3><u>Clustering steps</u></h3>
<ol>
<li> Construction of the shared nearest neighbour (SNN) graph<br>
Initially, cells are embedded in a K-nearest neighbor (KNN) graph structure based on Euclidean distances in the PCA space.
Cells that exhibit similar gene expression profiles are connected with edges.
The user can define (a) the maximum number of neighbors of each cell (defaults to 20), as also the number of principal components
to use (defaults to 10).
<li> Communities\' detection (Louvain algorithm)<br>
The formed graph of the previous step is partitioned into highly interconnected communities using the Louvain algorithm.
The user can define (a) the desired clustering resolution (defaults to 0.5), as also the number of principal components to
use (defaults to 10). Higher values of these parameters will result to an increased formation of communities/cell clusters.
</ol>
</p>
</div>
')
clustering_rna_output <- HTML('<div class="col-md-12 scrollable" style="background-color: #ffffff; border: 1px solid #222d32; border-radius: 15px; font-size:16px;">
<br>
<img src = "images/help_page/Clustering Results_1.PNG" style="border: 1px solid #222d32; border-radius: 15px;">
<figcaption style = "font-size:14px" class="figure-caption text-left"><b>Figure 15:</b> Table of the identified clusters </figcaption>
<br>
<p>
The clustering results can be explored by downloading the respective data table, which encapsulates information regarding the cluster name,
the number of cells in each cluster, and the percentage of total cells that is included in each cluster. The particular information is stored
in the 1st, 2nd and 3rd columns of the aforementioned data table respectively.
</p>
<br>
</div>
<br>
<div class="col-md-12 scrollable" style="background-color: #ffffff; border: 1px solid #222d32; border-radius: 15px; font-size:16px;">
<br>
<img src = "images/help_page/Clustering Results_2.PNG" style="border: 1px solid #222d32; border-radius: 15px;">
<figcaption style = "font-size:14px" class="figure-caption text-left"><b>Figure 16:</b> Barplot of the identified clusters </figcaption>
<br>
<p>
The clustering results are also depicted in a stacked barplot of percentages of cells in each of the identified clusters.
</p>
<br>
</div>
<div class="col-md-12 scrollable" style="background-color: #ffffff; border: 1px solid #222d32; border-radius: 15px; font-size:16px;">
<br>
<img src = "images/help_page/Clustering SNN_graph.PNG" style="border: 1px solid #222d32; border-radius: 15px;">
<figcaption style = "font-size:14px" class="figure-caption text-left"><b>Figure 17:</b> The shared nearest neighbor graph (SNN) used in clustering </figcaption>
<br>
<p>
The user is also able to explore the formation of the SNN graph that led to the identification of the final cluster set.
</p>
<br>
</div>
')
clustering_atac_input <- HTML('
<div class="col-md-4 scrollable">
<img src = "images/help_page/ClusteringOptionsATAC.PNG" style="border: 1px solid #222d32; border-radius: 15px;">
<figcaption style = "font-size:14px" class="figure-caption text-left"><b>Figure 18:</b> Clustering options </figcaption>
</div>
<div class="col-md-5" style="background-color: #ffffff; border: 1px solid #222d32; border-radius: 15px; font-size:16px;">
<p>
<h3>scATAC-seq clustering, enables the same methodology as in described in scRNA-seq.</h3><br>
The user is able to define<br> (a) the number of LSI dimensions to use
(defaults to 30), as also<br> (b) the clustering resolution (defaults to 0.6).
</p>
</div>
')
clustering_atac_output <- HTML('<div class="col-md-12 scrollable" style="background-color: #ffffff; border: 1px solid #222d32; border-radius: 15px; font-size:16px;">
<br>
<img src = "images/help_page/ClusteringResultsATAC.PNG" style="border: 1px solid #222d32; border-radius: 15px;">
<figcaption style = "font-size:14px" class="figure-caption text-left"><b>Figure 19:</b> Table of the identified clusters </figcaption>
<br>
<p>
The clustering results can be explored by downloading the respective data table, which encapsulates information regarding the cluster name,
the number of cells in each cluster, and the percentage of total cells that is included in each cluster. The particular information is stored
in the 1st, 2nd and 3rd columns of the aforementioned data table respectively.
</p>
<br>
</div>
<br>
<br>
<div class="col-md-12 scrollable" style="background-color: #ffffff; border: 1px solid #222d32; border-radius: 15px; font-size:16px;">
<br>
<img src = "images/help_page/ClusteringBarplotATAC.PNG" style="border: 1px solid #222d32; border-radius: 15px;">
<figcaption style = "font-size:14px" class="figure-caption text-left"><b>Figure 20:</b> Table of the identified clusters </figcaption>
<br>
<p>
The clustering results are also depicted in a barplot of percentages of cells in each of the identified clusters.
</p>
<br>
</div>
')
################################Umap############################################
umap_tab_intro <- HTML('<h4 style = "line-height: 1.5; text-align:center; background-color: #ffffff; border: 1px solid #222d32; border-radius: 5px;">
The <b>ADDITIONAL DIMENSIONALITY REDUCTION METHODS</b> tab enables the user to perform nonlinear dimensionality
reduction methodologies to scRNA-seq and scATAC-seq, in order to uncover patterns of cell similarity and differentiation.
</h4>')
umap_rna_input <- HTML('
<div class="col-md-4 scrollable">
<img src = "images/help_page/UMAPparameters.PNG" style="border: 1px solid #222d32; border-radius: 15px;">
<figcaption style = "font-size:14px" class="figure-caption text-left"><b>Figure 21:</b> Options </figcaption>
</div>
<div class="col-md-8 scrollable" style="background-color: #ffffff; border: 1px solid #222d32; border-radius: 15px; font-size:16px;">
<p>
<h3><u>Options for non linear dimensionality reductions</u></h3>
<ol>
<li> User-defined parameters for nonlinear dimensionality reduction application
<ul>
<li> Number of principal components to use. The user is able to define the number of the most informative principal components to be used for nonlinear dimensionality reduction.
<li> Number of dimensions to fit output. The user is able to define the number of output dimensions for each of the available nonlinear dimensionality reduction methods.
</ul>
<li> Available dimensionality reduction methodologies. The user can choose among:
<ul>
<li> Uniform Manifold Approximation and Projection (UMAP)
<li> t-distributed stochastic neighbor embedding (tSNE)
<li> Diffusion Maps
<li> PHATE
</ul>
<li> Display settings
<ul>
<li> Plot type. The user is able to choose which of the available nonlinear dimensionality reduction spaces to visualize. Defaults to pca.
<li> Dimensions. The number of dimensions to be plotted. Defaults to 2D.
<li> Color by. Apply different coloring based on the available cell metadata columns. Defaults to orig_ident.
<li> Size. The size of the plotted cells/dots. Defaults to 5.
<li> Opacity. Define the level of the transparency of the plotted cells/dots. Defaults to 1.
<li> Border width. The dot/cell perimeter thickness. Defaults to 0.5.
</ul>
</ol>
</p>
</div>
')
umap_rna_output <- HTML('
<div class="col-md-12 scrollable" style="background-color: #ffffff; border: 1px solid #222d32; border-radius: 15px; font-size:16px;">
<br>
<img src = "images/help_page/UMAP.PNG" style="border: 1px solid #222d32; border-radius: 15px;">
<figcaption style = "font-size:14px" class="figure-caption text-left"><b>Figure 22:</b> Output example </figcaption>
<br>
<p>
Each nonlinear dimensionality reduction space is visualized using a scatter plot. Each dot represents a cell, and each axis a nonlinear dimension.
The particular illustration hosts a UMAP visualization in 2D.
</p>
<br>
</div>
')
umap_atac_input <- HTML('
<div class="col-md-4 scrollable">
<img src = "images/help_page/UMAPparameters.PNG" style="border: 1px solid #222d32; border-radius: 15px;">
<figcaption style = "font-size:14px" class="figure-caption text-left"><b>Figure 23:</b> Options </figcaption>
</div>
<div class="col-md-8 scrollable" style="background-color: #ffffff; border: 1px solid #222d32; border-radius: 15px; font-size:16px;">
<p>
<h3><u>Options for non linear dimensionality reductions</u></h3>
<ol>
<li> User-defined parameters for nonlinear dimensionality reduction application
<ul>
<li> Number of LSI dimensions to use. The user is able to define the number of the most informative LSI dimensions to be used for nonlinear dimensionality reduction.
<li> Number of dimensions to fit output. The user is able to define the number of output dimensions for each of the available nonlinear dimensionality reduction methods.
</ul>
<li> Available dimensionality reduction methodologies. The user can choose among:
<ul>
<li> Uniform Manifold Approximation and Projection (UMAP)
<li> t-distributed stochastic neighbor embedding (tSNE)
</ul>
<li> Display settings
<ul>
<li> Plot type. The user is able to choose which of the available nonlinear dimensionality reduction spaces to visualize. Defaults to pca.
<li> Dimensions. The number of dimensions to be plotted. Defaults to 2D.
<li> Color by. Apply different coloring based on the available cell metadata columns. Defaults to orig_ident.
<li> Size. The size of the plotted cells/dots. Defaults to 5.
<li> Opacity. Define the level of the transparency of the plotted cells/dots. Defaults to 1.
<li> Border width. The dot/cell perimeter thickness. Defaults to 0.5.
</ul>
</ol>
</p>
</div>
')
umap_atac_output <- HTML('
<div class="col-md-12 scrollable" style="background-color: #ffffff; border: 1px solid #222d32; border-radius: 15px; font-size:16px;">
<br>
<img src = "images/help_page/UMAPATAC.PNG" style="border: 1px solid #222d32; border-radius: 15px;">
<figcaption style = "font-size:14px" class="figure-caption text-left"><b>Figure 24:</b> Output example </figcaption>
<br>
<p>
Each nonlinear dimensionality reduction space is visualized using a scatter plot. Each dot represents a cell, and each axis a nonlinear dimension.
The particular illustration hosts a UMAP visualization in 2D.
</p>
<br>
</div>
')
##############################D.E.A.############################################
dea_tab_intro <- HTML('<h4 style = "line-height: 1.5; text-align:center; background-color: #ffffff; border: 1px solid #222d32; border-radius: 5px;">
The <b>MARKERS\' IDENTIFICATION</b> tab enables the user to identify marker genes (scRNA-seq and scATAC-seq) and marker peaks (scATAC-seq) by
applying differential expression and differential accessibility analysis techniques respectively. This is a very crucial analysis step
in single-cell analysis, since the respective results may guide procedures like cell type/state annotation, and identification of key transcriptional
and regulatory programs that drive pathogenicity and/or development.
</h4>')
dea_rna_input <- HTML('<div class="col-md-12 scrollable" style="background-color: #ffffff; border: 1px solid #222d32; border-radius: 15px; font-size:16px;">
<br>
<h3><u> Input parameters for marker genes identification </u><h3>
<br>
<img src = "images/help_page/DEA_RNA_parameters.PNG" style="border: 1px solid #222d32; border-radius: 15px;">
<figcaption style = "font-size:14px" class="figure-caption text-left"><b>Figure 25:</b> DEA parameters for RNA-seq data </figcaption>
<br>
<p>
<ol>
<li> Test used: The user is able to choose amongst an extensive list of statistical tests and DEA methods.
The analysis is performed in a cluster-specific manner, where each cell-cluster\'s cells are tested against
all the other cells of the dataset. Defaults to Wilcoxon rank sum test.
<ul>
<li> Wilcoxon rank sum test. Identifies DEGs between two cell clusters by using a Wilcoxon Rank Sum test.
<li> Likelihood-ratio test for single cell feature expression. Identifies DEGs between two cell clusters using a Likelihood-ratio test.
<li> Standard AUC classifier. Identifies DEGs between two cell clusters by using Receiver operating characteristic (ROC) analysis. Creates
an AUC classifier for each gene, and tests the ability of the particular classifiew to separate two groups of cells. A value of 1 characterizes
the particular gene as a perfect classifier, implying upregulation. A value of 0 characterizes the particular gene as a perfect classifier,
implying downregulation. A value of 0.5 means that the gene has no predictive power.
<li> Student\'s t-test. Identifies DEGs between two cell clusters using the Student\'s t-test.
<li> MAST. Identifies DEGs between two cell clusters by using a hurdle model tailored to scRNA-seq data, by utilizing the MAST package.
<li> DESeq2. Identifies DEGs between two cell clusters based a negative binomial distribution model, by utilizing the DESeq2 package. (slow operation)
</ul>
<li> DEA parameters
<ul>
<li> Base used for average logFC calculation. The user is able to choose what kind of log transformation will be applied to the average cell Fold Changes of
the DEA comparisons. Choices include (a) log with base e and (b) log with base 2. Defaults to log with base e.
<li> Minimum % of expression. Only test genes that are detected to a minimum fraction of cells in either two groups. Defaults to 0.25.
<li> Avg Log FC threshold. Limit testing to genes which show, on average, at least X-Fold change (logged) between the two groups of cells. Defaults to 0.25.
<li> P-value threshold. Only report genes with a p-value lower than the user-defined limit. Defaults to 0.01.
</ul>
</ol>
</p>
<br>
</div>
<br>
<div class="col-md-12 scrollable" style="background-color: #ffffff; border: 1px solid #222d32; border-radius: 15px; font-size:16px;">
<br>
<h3><u> DEA results </u><h3>
<p>
Marker genes data table. The particular, downloadable data table contains all the cluster-specific DEA results.
The information is stored as follow: <br>1st column: P value of the statistical test,
<br>2nd column: average log2 (or log(e)) Fold Change between the two groups of cells,
<br>3rd column: Percentage of cells of group1 that each gene is detected,
<br>4th column: Percentage of cells of group2 that each gene is detected,
<br>5th column: P adjusted value of the statistical test,
<br>6th column: cluster name, 7th column: gene name.
</p>
<img src = "images/help_page/DEA_RNA_results.PNG" style="border: 1px solid #222d32; border-radius: 15px;">
<figcaption style = "font-size:14px" class="figure-caption text-left"><b>Figure 26:</b> DEA results: table of marker genes </figcaption>
<br>
</div>
<div class="col-md-12 scrollable" style="background-color: #ffffff; border: 1px solid #222d32; border-radius: 15px; font-size:16px;">
<p>
<h3><u>Heatmap visualization.<h3></u>
<p>
<p>
A heatmap visualization of scaled expression values for the top 10 markers of each cell cluster. X-axis represents genes,
while y-axis represents cells. Cells are sorted by their cluster name.
<p>
<br>
<img src = "images/help_page/DEA_RNA_results_heat.PNG" style="border: 1px solid #222d32; border-radius: 15px;">
<figcaption style = "font-size:14px" class="figure-caption text-left"><b>Figure 26:</b> DEA results: heatmap of top-10 marker genes per cluster </figcaption>
<br>
</div>
<div class="col-md-12 scrollable" style="background-color: #ffffff; border: 1px solid #222d32; border-radius: 15px; font-size:16px;">
<p>
<h3><u>Dotplot visualization.<h3></u>
<p>
Dot plot illustration of average scaled gene expression for the top 10 markers of each cell cluster. X-axis represents cell clusters, while y-axis represents gene markers.
The size of each dot indicates the gene detection percentage in each cluster.
</p>
<br>
<img src = "images/help_page/DEA_RNA_results_dot1.PNG" style="border: 1px solid #222d32; border-radius: 15px;">
<figcaption style = "font-size:14px" class="figure-caption text-left"><b>Figure 27:</b> DEA results: Dotplot of top-10 marker genes per cluster </figcaption>
<br>
</div>
<div class="col-md-12 scrollable" style="background-color: #ffffff; border: 1px solid #222d32; border-radius: 15px; font-size:16px;">
<p>
<h3><u>Volcano plot visualization.<h3></u>
<p>
Volcano plots is a very convenient way to visually summarize the DEA results between to compared cell groups. SCALA implementation allows cluster specific Volcano plots.
In the particular example, a volcano plot of differential expressed genes of cluster 2 is generated. X-axis depicts the average log2 Fold Change of each gene, while the y-axis
the respective -log10 p-values. Hovering over the visualization results allows the user to detect genes of interest, like for example CD14, which is a known marker of the
particular cell-type (CD14-Monocytes).
</p>
<br>
<img src = "images/help_page/DEA_RNA_results_volcano.PNG" style="border: 1px solid #222d32; border-radius: 15px;">
<figcaption style = "font-size:14px" class="figure-caption text-left"><b>Figure 28:</b> DEA results: Volcano plot depicting the significant up and down regulated genes of the selected cluster </figcaption>
<br>
</div>
')
dea_rna_signature <- HTML('<div class="col-md-12 scrollable" style="background-color: #ffffff; border: 1px solid #222d32; border-radius: 15px; font-size:16px;">
<br>
<h3><u> Feature plot visualization </u><h3>
A cell scatter plot in 2D reduced dimensional space that summarizes the average gene expression of particular markers or gene signatures
<ul>
<li> <b>Gene selection mode.</b>
<br>Visualization of particular genes. The user is able to adjust several parameters like <br>(a) the gene name to be plotted, <br>(b) which 2D reduced
space to use for plotting (available choices are describe in PCA/LSI and ADDITIONAL DIMENSIONALITY REDUCTION METHODS tabs), (c) showing or hiding the cluster names,
setting the <br>(d) maximum and the <br>(e) minimum expression value of the color scale. The plot will be generated after pressing the "Display Plot" button.
<br>
<br>
<img src = "images/help_page/DEA_RNA_results_feat_genewise.PNG" style="border: 1px solid #222d32; border-radius: 15px;">
<figcaption style = "font-size:14px" class="figure-caption text-left"><b>Figure 29:</b> Parameters for feature/signature plot visualization </figcaption>
<br>
In the particular feature plot, a classical Monocyte marker (CD14) marks cluster 2, guiding the annotation of the particular cell group.
<br>
<img src = "images/help_page/DEA_RNA_results_feat_gene1.PNG" style="border: 1px solid #222d32; border-radius: 15px;">
<figcaption style = "font-size:14px" class="figure-caption text-left"><b>Figure 30:</b> Feature plot visualization of CD14 gene </figcaption>
<br>
<li> <b>Gene signature mode</b>. <br>Visualization of sets of genes, as a combined gene expression signature. The user should initially define a gene signature name (Gene signature name),
as also the members of this signature (Paste a list of genes). <b>Note: Gene names should be separated by entering a new line.</b> In the particular example,
classical Bcell markers are defined. To calculate the signature score, the user should press the "Calculate signature score" button (Fig. 31). <br><br>
<img src = "images/help_page/DEA_RNA_results_feat_signature1.PNG" style="border: 1px solid #222d32; border-radius: 15px;">
<figcaption style = "font-size:14px" class="figure-caption text-left"><b>Figure 31:</b> Creation of B-cell markers specificsignature </figcaption>
To visualize the Gene signature, the user should activate the "Gene signature" button under the first option of the particular tab. Consequently, the user
is able to choose his signature by using the "Select signature/numeric variable" sliding window. The rest of the options are described in the previous section.
<img src = "images/help_page/DEA_RNA_results_feat_signature_viz1.PNG" style="border: 1px solid #222d32; border-radius: 15px;">
<figcaption style = "font-size:14px" class="figure-caption text-left"><b>Figure 31:</b> Vizualization of the signature </figcaption>
</ul>
</div>
<div class="col-md-12 scrollable" style="background-color: #ffffff; border: 1px solid #222d32; border-radius: 15px; font-size:16px;">
<h3><u> Violin plot vizualization </u></h3>
<p>
Violin plots of normalized gene expression for either individual genes ("Gene" button), or gene signatures ("Gene signature" button).
<br>
<img src = "images/help_page/DEA_RNA_results_violin1.PNG" style="border: 1px solid #222d32; border-radius: 15px;">
<figcaption style = "font-size:14px" class="figure-caption text-left"><b>Figure 32:</b> Violin plots parameters </figcaption>
<br>
The user can choose either a gene ("Search for gene" sliding window), or a predefined gene signature of interest ("Select signature/numeric variable" sliding window).
To generate the plot, the "Display plot" button should be pressed.
<img src = "images/help_page/DEA_RNA_results_violin2.PNG" style="border: 1px solid #222d32; border-radius: 15px;">
<figcaption style = "font-size:14px" class="figure-caption text-left"><b>Figure 33:</b> Violin plots visualization </figcaption>
</div>
<div class="col-md-12 scrollable" style="background-color: #ffffff; border: 1px solid #222d32; border-radius: 15px; font-size:16px;">
<br>
<h3><u> Multi-feature visualization </u><h3>
Average expression visualization of couples of genes. This functionality allows the detection of marker co-occurrence and co-expression across cells and cell clusters.
<br>
<img src = "images/help_page/DEA_RNA_results_multi.PNG" style="border: 1px solid #222d32; border-radius: 15px;">
<figcaption style = "font-size:14px" class="figure-caption text-left"><b>Figure 34:</b> Parameters for multi-feature plot visualization </figcaption>
<br>
<p>
The user is able to select a pair of genes to be plotted together ("Select 1st feature" and "Select 2nd feature respectively"),
while tweaking the color blending behavior is also applicable ("Select threshold for blending", defaults to 0.5). The rest of the
user-defined parameters are the same as in "Feature plot" tab (previous section).
</p>
<img src = "images/help_page/DEA_RNA_results_multi_viz_combined.PNG" style="border: 1px solid #222d32; border-radius: 15px;">
<figcaption style = "font-size:14px" class="figure-caption text-left"><b>Figure 35:</b> Example of multi-feature plot visualization </figcaption>
<p>
In the particular example, CD3D, a classical T-cell marker, and GZMB, a classical Natural Killer (NK) cell marker are visualized using the multi-feature functionality.
The first two Feature plots (first row) show the individual activity of each marker, the 3rd plot (lower-left) show the co-embedding/occurence of the two markers, which is
typical for T-cells and NK cells, while the forth plot (lower-right) depict the color scale values and mixed colors references.
</p>
</div>
')
dea_atac_genes <- HTML('
<div class="col-md-12 scrollable" style="background-color: #ffffff; border: 1px solid #222d32; border-radius: 15px; font-size:16px;">
<br>
<h3><u> Differential Accessibility Analysis options (scATAC-seq) </u><h3>
<br>
<img src = "images/help_page/ATAC_markers_menu_genes.PNG" style="border: 1px solid #222d32; border-radius: 15px;">
<figcaption style = "font-size:14px" class="figure-caption text-left"><b>Figure 36:</b> DAA parameters for ATAC-seq data </figcaption>
<br>
<p>
<u>Marker genes</u><br>
Marker gene detection using gene -score activity scores. The particular gene activity values are computed by aggregating the
accessibility signal along the regulatory space of each gene, and are considered very good predictors of gene expression. Based on this assumption,
differential expression analysis is performed as described above.
<br>
<br>
The user is able to select among 3 statistical methods for differential expression testing:
<ul>
<li> Wilcoxon. Identifies DEGs between two cell clusters by using a Wilcoxon Rank Sum test.
<li> Binomial. Identifies DEGs between two cell clusters using a binomial statistical test.
<li> T-test. Identifies DEGs between two cell clusters using the Student\'s t-test.
</ul>
</p>
<p>
Additionally, <b>ONLY WHEN</b> integration has been performed successfully from the tab "CLUSTERS\' ANNOTATION", the user can select "Integration predicted clusters" to utilize
the transferred cluster labels (instead of the original clusters).
</p>
<br>
<div class="col-md-12 scrollable" style="background-color: #ffffff; border: 1px solid #222d32; border-radius: 15px; font-size:16px;">
<br>
<h3><u> DAA - marker genes results </u><h3>
<p>
After the detection of cluster-specific marker genes, a downloadable data matrix depicting the DEA results is produced.
The 1st column reports the chromosome location of each marker, the 2nd and 3rd columns report the start and end chromosomal
positions of each reported gene respectively, the 4th column reports the gene strand, the 5th column reports the gene name,
the 6th column reports the gene index, the 7th column reports the average log2 Fold Change, the 8th column reports the False
Discovery Rate of the applied statistical test, the 9th column reports the mean difference between the average normalized gene
activity scores of the two compared groups, and the 10th column reports the marker cluster name.
</p>
<img src = "images/help_page/ATAC_marker_genes_results_table.PNG" style="border: 1px solid #222d32; border-radius: 15px;">
<figcaption style = "font-size:14px" class="figure-caption text-left"><b>Figure 37:</b> DAA results: table of marker genes </figcaption>
<br>
</div>
<br>
<div class="col-md-12 scrollable" style="background-color: #ffffff; border: 1px solid #222d32; border-radius: 15px; font-size:16px;">
<h3><u> Heatmap visualization <h3></u>
<p>
The DEA results are visually summarized using a heatmap of average normalized gene activity scores of the top 10 gene markers of each cluster.
Both rows (clusters) and columns (genes) are clustered using a binary sorting procedure.
</p>
<img src = "images/help_page/ATAC_marker_genes_results_heatmap.PNG" style="border: 1px solid #222d32; border-radius: 15px;">
<figcaption style = "font-size:14px" class="figure-caption text-left"><b>Figure 37:</b> Heatmap of top-10 marker genes per cluster </figcaption>
</div>
</div>
')
dea_atac_peaks <- HTML('
<div class="col-md-12 scrollable" style="background-color: #ffffff; border: 1px solid #222d32; border-radius: 15px; font-size:16px;">
<br>
<h3><u> Differential Peak Accessibility Analysis options (scATAC-seq) </u><h3>
<br>
<img src = "images/help_page/ATAC_markers_menu_peaks.PNG" style="border: 1px solid #222d32; border-radius: 15px;">
<figcaption style = "font-size:14px" class="figure-caption text-left"><b>Figure 38:</b> DAA parameters for ATAC-seq data </figcaption>
<br>
<p>
<u>Marker Peaks</u><br>
Marker peak detection using summarized chromatin accessibility counts in user-defined peak sets.
Marker peaks are calculated in a same fashion as described in marker gene detection in the section above. The main difference is that the
aggregation of the accessibility counts is applied in user defined peaks. Examples of such genomic regions are
<br>(a) the peaks generated by cellranger-atac count ("peaks.bed" file, in the outs/ directory of cellranger-atac count run),
<br>(b) collections of regulatory regions such as promoters and enhancers, derived by ATAC-seq, DNase-seq, FAIRE-seq, MNase-seq
and ChIP-seq datasets, or
<br>(c) fixed genomic segments called bins/tiles.
<br>
The particular input file should be stored in "bed" format, including the chromosome name, and the start and end genomic positions for each of the stored peak in the first 3 columns.
<div class="col-md-12 scrollable">
<br>
<img src = "images/help_page/peak_file_head.PNG" style="border: 1px solid #222d32; border-radius: 15px;">
<figcaption style = "font-size:14px" class="figure-caption text-left"><b>Figure 39:</b> BED example file </figcaption>
<br>
</div>
<p>
Additionally, <b>ONLY WHEN</b> integration has been performed successfully from the tab "CLUSTERS\' ANNOTATION", the user can select "Integration predicted clusters" to utilize
the transferred cluster labels (instead of the original clusters).
</p>
<br>
<div class="col-md-12 scrollable" style="background-color: #ffffff; border: 1px solid #222d32; border-radius: 15px; font-size:16px;">
<br>
<h3><u> DAA - marker peaks results </u><h3>
<p>
After the detection of cluster-specific marker peaks, a downloadable data matrix depicting the differential accessibility analysis results is produced.
The 1st column reports the chromosome location of each marker, the 2nd column reports the peak index, the 3rd and 4th columns report the start and end
chromosomal positions of each reported peak respectively, the 5th column reports the average log2 Fold Change, the 6th column reports the False Discovery
Rate of the applied statistical test, the 7th column reports the mean difference between the average normalized chromatin accessibility values of the two
compared groups, and the 8th column reports the marker cluster name
</p>
<img src = "images/help_page/ATAC_marker_peaks_results_table.PNG" style="border: 1px solid #222d32; border-radius: 15px;">
<figcaption style = "font-size:14px" class="figure-caption text-left"><b>Figure 40:</b> DAA results: table of marker peaks </figcaption>
<br>
</div>
<br>
<div class="col-md-12 scrollable" style="background-color: #ffffff; border: 1px solid #222d32; border-radius: 15px; font-size:16px;">
<h3><u> Heatmap visualization <h3></u>
<p>
The differential accessibility analysis results are visually summarized using a heatmap of average normalized peak accessibility of the top 10 markers peaks of each cluster.
Both rows (clusters) and columns (genes) are clustered using a binary sorting procedure.
</p>
<img src = "images/help_page/ATAC_marker_peaks_results_heatmap.PNG" style="border: 1px solid #222d32; border-radius: 15px;">
<figcaption style = "font-size:14px" class="figure-caption text-left"><b>Figure 41:</b> Heatmap of top-10 marker peaks per cluster </figcaption>
<br>
</div>
</div>
')
dea_atac_activity <- HTML('
<div class="col-md-12 scrollable" style="background-color: #ffffff; border: 1px solid #222d32; border-radius: 15px; font-size:16px;">
<br>
<h3><u>Feature plots of gene activity scores</u></h3>
<img src = "images/help_page/CD14.PNG" style="border: 1px solid #222d32; border-radius: 15px;">
<figcaption style = "font-size:14px" class="figure-caption text-left"><b>Figure 42:</b> Feature plot depicting gene activity scores per cell for a selected gene. </figcaption>
<br>
<p>
The user is able to determine the gene of interest ("Select a gene" sliding window), and in which reduced dimensional space the visualization will take place
("Plot type" sliding window, defaults to UMAP). To generate the visualization, a respective "Display plot" button is available.
In the particular example, CD14 (a classical Monocyte marker) gene activity scores are plotted, revealing that C2 cluster could be annotated as Monocytes.
</p>
</div>
')
##############################Cell cycle########################################
cellCycle_tab_intro <- HTML('<h4 style = "line-height: 1.5; text-align:center; background-color: #ffffff; border: 1px solid #222d32; border-radius: 5px;">
The <b>CELL CYCLE PHASE ANALYSIS</b> tab enables the user identify effects of cell cycle heterogeneity in
a scRNA-seq dataset, by calculating cell cycle phase scores based on canonical markers. If the analysis shows
a clear cell grouping based on these markers, then the particular effect should be considered a putative unwanted bias.
</h4>')
cell_cycle_rna <- HTML('<div class="col-md-12 scrollable" style="background-color: #ffffff; border: 1px solid #222d32; border-radius: 15px; font-size:16px;">
<br>
<p>
The user can choose in which reduced space the cell cycle phase scores will be visualized ("Plot type" sliding window, defaults to PCA).
</p>
<img src = "images/help_page/CellCycleRNA.PNG" style="border: 1px solid #222d32; border-radius: 15px;">
<figcaption style = "font-size:14px" class="figure-caption text-left"><b>Figure 43:</b> Cells visualized PCA space and colored by cell phase identity </figcaption>
<br>
<p>
PCA scatter plot showing a reasonable mix of cell-cycle phases along the first two principal components.
</p>
<br>
<img src = "images/help_page/CellCycleRNA_barplot.PNG" style="border: 1px solid #222d32; border-radius: 15px;">
<figcaption style = "font-size:14px" class="figure-caption text-left"><b>Figure 44:</b> Per-cluster bar plots showing the percentages of the 3 basic cell-cycle phases. </figcaption>
<br>
</div>
')
################################Doublets########################################
doublets_tab_intro <- HTML('
<h4 style = "line-height: 1.5; text-align:center; background-color: #ffffff; border: 1px solid #222d32; border-radius: 5px;">
<b>DOUBLETS\' DETECTION ANALYSIS</b> tab facilitates identification of potential doublets and their removal from downstream analysis.
</h4>
')
doublets_tab_rna <- HTML('<p>
For doublet calculation in scRNA-seq datasets SCALA employs DoubletFinder package. This package can recognize
potential homotypic and heterotypic doublets. Additionally, the end user can utilize the results produced from