-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathinit.R
247 lines (143 loc) · 9.74 KB
/
init.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
### This script runs all analyses associated with the discovery of gene
### expression markers in two acute lymphoblastic leukemia subtypes. The data
### that is analyzed is the TARGET-ALL-P2 gene expression project from the
### TARGET database.
# Clear workspace ---------------------------------------------------------
rm(list = ls())
# Download data from OSF --------------------------------------------------
library("osfr")
node <- osf_retrieve_node("kgfpv")
files <- osf_ls_files(node, n_max = Inf)
osf_download(files,
recurse = TRUE,
conflicts = "skip")
# Restore packages from lockfile ------------------------------------------
library(renv)
renv::settings$external.libraries(c("env_ALL/lib/R/library/"))
renv::restore()
renv::activate()
detach("package:renv",
unload = TRUE)
# Set up for Cairo rendering, for headless machines -----------------------
options(bitmapType = "cairo")
## Run scripts ------------------------------------------------------------
## GET DATA ---------------------------------------------------------------
# Get TARGET-ALL-P2 gene expression data, investigate metadata and subset data
# to contain only bone marrow samples
source("TARGET_data/get_TARGET_data.R")
# -------------------------------------------------------------------------
## REPLICATES -------------------------------------------------------------
# Investigate replicate samples in data
source("TARGET_replicates/TARGET_ALL_P2_bmp_replicates.R")
# Adjust replicates in data
source("TARGET_replicates/TARGET_ALL_P2_bmp_replicates_adj.R")
# -------------------------------------------------------------------------
## PROCESSING -------------------------------------------------------------
# Process data by removing outliers (preprocessing), normalizing for GC content
# and library size (normalization) and removing lowly expressed genes (filtering)
source("TARGET_processing/bone_marrow_primary_comp/TARGET_process_bmp_comp.R")
# Investigate lost genes following normalization step
source("TARGET_processing/bone_marrow_primary_comp/TARGET_norm_genes_bmp_comp.R")
# Visualize gene types of lost genes following normalization step
source("TARGET_processing/bone_marrow_primary_comp/TARGET_norm_genes_plot_bmp_comp.R")
# --------------------------------------------------------------------------
## VOOM TRANSFORMATION ----------------------------------------------------
# Voom transform raw data
source("TARGET_transform/voom_transform_bmp_raw/TARGET_ALL_P2_bmp_raw_voom.R")
# Voom transform filtered data
source("TARGET_transform/voom_transform_bmp_comp/TARGET_ALL_P2_bmp_voom_comp.R")
# --------------------------------------------------------------------------
## MULTIDIMENSIONAL SCALING ANALYSIS BEFORE BATCH CORRECTION ---------------
# Extract information about samples such as subtype, metadata etc
source("TARGET_pca/bone_marrow_primary_pca_comp/TARGET_ALL_P2_bmp_info_comp.R")
# Perform MDS on raw gene expression data
source("TARGET_pca/bone_marrow_primary_pca_raw/TARGET_ALL_P2_bmp_raw_pca.R")
# Perform MDS on processed data before batch correction
source("TARGET_pca/bone_marrow_primary_pca_comp/init_pca/TARGET_ALL_P2_bmp_pca_comp.R")
# --------------------------------------------------------------------------
## BATCH CORRECTION -------------------------------------------------------
# Apply batch correction
source("TARGET_batch/bone_marrow_primary_batch_comp/TARGET_ALL_P2_bmp_batch_comp.R")
# --------------------------------------------------------------------------
## MULTIDIMENSIONAL SCALING ANALYSIS AFTER BATCH CORRECTION ---------------
# Perform MDS on batch corrected data
source("TARGET_pca/bone_marrow_primary_pca_comp/batch_corr_pca/TARGET_ALL_P2_bmp_pca_batch_comp.R")
# --------------------------------------------------------------------------
## DIFFERENTIAL EXPRESSION ANALYSIS ----------------------------------------
# Perform DEA using DESeq2 method
source("TARGET_dea/bone_marrow_primary_dea_comp/deseq2/TARGET_ALL_P2_bmp_dea_deseq2.R")
# Perform DEA using edgeR method
source("TARGET_dea/bone_marrow_primary_dea_comp/edgeR/TARGET_ALL_P2_bmp_dea_edgeR.R")
# Perform DEA using limma-voom method
source("TARGET_dea/bone_marrow_primary_dea_comp/limma_voom/TARGET_ALL_P2_bmp_dea_limma.R")
# Make visualizations of gene counts for the three methods
source("TARGET_dea/bone_marrow_primary_dea_comp/gene_counts_DEA/TARGET_ALL_P2_bmp_dea_gene_counts.R")
# Make visualizations of DEA results found using the three methods
source("TARGET_dea/bone_marrow_primary_dea_comp/visualization_DEA/TARGET_ALL_P2_bmp_dea_visualization.R")
# Compare results of DEA results found using the three methods
source("TARGET_dea/bone_marrow_primary_dea_comp/compare_DEA/TARGET_ALL_P2_bmp_dea_compare.R")
# Intersect identified DEGs across the three methods
source("TARGET_dea/bone_marrow_primary_dea_comp/compare_DEA/gene_intersection_lists/TARGET_ALL_P2_bmp_dea_gene_intersection_lists.R")
# --------------------------------------------------------------------------
## HOUSEKEEPING GENES ------------------------------------------------------
# Find overlap between identified DEGs and housekeeping genes
source("TARGET_housekeeping/bone_marrow_primary_housekeeping_comp/TARGET_ALL_P2_bmp_housekeeping.R")
# Visualize identified housekeeping DEGs
source("TARGET_housekeeping/bone_marrow_primary_housekeeping_comp/TARGET_ALL_P2_bmp_housekeeping_visualization.R")
# --------------------------------------------------------------------------
## ENRICHMENT ANALYSIS -----------------------------------------------------
# Perform enrichment analysis of consensus DEGs
source("TARGET_enrichment/bone_marrow_primary_enrichment_comp/consensus_DEG/TARGET_ALL_P2_bmp_DEG_enrichment.R")
# --------------------------------------------------------------------------
## REGULARIZED LOGISTIC REGRESSION -----------------------------------------
# Perform regularized logistic regression of gene expression data
source("TARGET_lasso/bone_marrow_primary_lasso_comp/TARGET_ALL_P2_bmp_lasso.R")
# Visualize genes found from logistic regression
source("TARGET_lasso/bone_marrow_primary_lasso_comp/elastic_consensus_biotypes/TARGET_ALL_P2_bmp_consensus_biotypes.R")
# --------------------------------------------------------------------------
## GENE CONTRIBUTIONS FROM PCA ---------------------------------------------
# Perform PCA on gene expression data to find feature (gene) contributions to
# principal components
source("TARGET_pca/bone_marrow_primary_pca_comp/gene_contributions_pca/TARGET_ALL_P2_bmp_pca_gene_contrib.R")
# Visualize feature (gene) contributions to principal components
source("TARGET_pca/bone_marrow_primary_pca_comp/gene_contributions_pca/gene_contributions_pca_biotypes/TARGET_ALL_P2_bmp_pca_gene_contrib_biotypes.R")
# --------------------------------------------------------------------------
## COMPARE RESULTS ACROSS METHODS ------------------------------------------
# Compare results across methods
source("TARGET_compare_genes/bone_marrow_primary_compare_comp/TARGET_ALL_P2_bmp_compare.R")
# --------------------------------------------------------------------------
## UNSUPERVISED CLUSTERING -------------------------------------------------
# Perform unsupervised clustering using cola framework
source("TARGET_clustering/bone_marrow_primary_clustering_comp/TARGET_ALL_P2_bmp_run_cola.R")
# Analyze and visualize results from unsupervised clustering
source("TARGET_clustering/bone_marrow_primary_clustering_comp/TARGET_ALL_P2_bmp_analyze_cola.R")
# --------------------------------------------------------------------------
## RANDOM FOREST ON PREDICTED CLUSTERS -------------------------------------
# Perform variable selecting using random forest on predicted clusters
source("TARGET_random_forest/bone_marrow_primary_random_forest_comp/TARGET_ALL_P2_bmp_clusters_random_forest.R")
# Analyze selected variables from random forest
source("TARGET_random_forest/bone_marrow_primary_random_forest_comp/TARGET_ALL_P2_bmp_analyze_clusters_random_forest.R")
# --------------------------------------------------------------------------
## SURVIVAL ANALYSIS -------------------------------------------------------
# Perform survival analysis on predicted subtype-related gene expression markers
source("TARGET_survival/bone_marrow_primary_survival_comp/TARGET_ALL_P2_bmp_survival_subtype_markers.R")
# Perform survival analysis on predicted cluster-related gene expression markers
source("TARGET_survival/bone_marrow_primary_survival_comp/TARGET_ALL_P2_bmp_survival_cluster_markers.R")
# --------------------------------------------------------------------------
## DRUG TARGET INVESTIGATION -----------------------------------------------
# Perform drug target investigation of gene expression markers
source("TARGET_drug_targets/bone_marrow_primary_drug_targets_comp/TARGET_ALL_P2_bmp_drug_targets_markers.R")
# --------------------------------------------------------------------------
## COMPARISON WITH KNOWN MARKERS -----------------------------------------------
# Compare expression of predicted markers with known markers
source("TARGET_known_markers/bone_marrow_primary_known_markers_comp/TARGET_ALL_P2_bmp_known_markers.R")
# --------------------------------------------------------------------------
## TARGET BLOOD VALIDATION -----------------------------------------------
# Perform clustering of expression of predicted markers in TARGET blood samples
source("TARGET_blood_validation/TARGET_blood_validation.R")
# --------------------------------------------------------------------------
## GTEx VALIDATION -----------------------------------------------
# Perform clustering of expression of predicted markers in GTEx blood and
# bone marrow samples
source("GTEx_validation/GTEx_validation.R")
# --------------------------------------------------------------------------