From bca9821f7130de6d38c097dbb4de32222dd373c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sophia=20M=C3=BCller-Dott?= Date: Tue, 16 Jul 2024 08:50:12 +0200 Subject: [PATCH] updated tumor-based benchmark --- .../CPTAC_GS_set_generation.Rmd | 3551 +++++++++++++++++ .../scripts/tumor_based_benchmark/outlieR.R | 84 + .../tumor_based_benchmark/outlieR_plus.R | 138 + .../tumor_based_benchmark/plot_outlieR.R | 51 + 4 files changed, 3824 insertions(+) create mode 100644 workflow/scripts/tumor_based_benchmark/CPTAC_GS_set_generation.Rmd create mode 100644 workflow/scripts/tumor_based_benchmark/outlieR.R create mode 100644 workflow/scripts/tumor_based_benchmark/outlieR_plus.R create mode 100644 workflow/scripts/tumor_based_benchmark/plot_outlieR.R diff --git a/workflow/scripts/tumor_based_benchmark/CPTAC_GS_set_generation.Rmd b/workflow/scripts/tumor_based_benchmark/CPTAC_GS_set_generation.Rmd new file mode 100644 index 0000000..ca12955 --- /dev/null +++ b/workflow/scripts/tumor_based_benchmark/CPTAC_GS_set_generation.Rmd @@ -0,0 +1,3551 @@ +--- +title: "R Notebook" +output: html_notebook +--- + +This is an [R Markdown](http://rmarkdown.rstudio.com) Notebook. When you execute code within the notebook, the results appear beneath the code. + +Try executing this chunk by clicking the *Run* button within the chunk or by placing your cursor inside it and pressing *Cmd+Shift+Enter*. + +```{r} +plot(cars) +``` + +Add a new chunk by clicking the *Insert Chunk* button on the toolbar or by pressing *Cmd+Option+I*. + +When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the *Preview* button or press *Cmd+Shift+K* to preview the HTML file). + +The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike *Knit*, *Preview* does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed. + + +use pan-cancer data to benchmark kinase activity inference methods >> use samples that have highest and lowest levels of kinase protein or activation sites on kinases as gold standard (GS) positives for highy activated kinase-sample pairs and GS negatives for minimally activated kinase-sample pairs, respectively. + +load outlieR functions and required libraries +```{r} +source("outlieR.R") +source("outlieR_plus.R") +#source("plot_outlieR.R") +library(tidyverse) +library(vioplot) +library(colorspace) +library(ggridges) +``` + +load pancan protein and phosphosite datasets and identify activating sites on kinases with measurements (each cancer type separately) + +load kinase and kinase activating site lists +```{r} +kins_mapped <- read.table("genelist_human_kinase.txt", stringsAsFactors = F, header = T, sep = "\t", quote = "") +rownames(kins_mapped) <- kins_mapped$gene +kinase_reg_sites_act <- read.table("kinases_active_sites_updated_with_PSP_Mar2022.txt", sep = "\t", stringsAsFactors = F, quote = "", comment.char = "", header = T) +reg_site_kins <- unique(kinase_reg_sites_act$HGNC_Symbol) +kins_mapped_act1 <- kins_mapped[kins_mapped$gene_name %in% reg_site_kins, ] +``` + +I. construct protein-based benchmarks: + +load protein data, filter to kinases, and use outlieR function to identify "outlier" samples (top and bottom 5% relative to normal distribution for each kinase) +```{r} +brca_prot <- read.table("data_refresh/BRCA/BRCA_proteomics_gene_abundance_log2_reference_intensity_normalized_Tumor.txt", stringsAsFactors = F, header = T, sep = "\t") +rownames(brca_prot) <- brca_prot$idx +brca_prot_kins <- brca_prot[rownames(brca_prot) %in% kins_mapped$gene, ] +rownames(brca_prot_kins) <- kins_mapped[rownames(brca_prot_kins), "gene_name"] +brca_prot_kins <- brca_prot_kins[rowSums(!is.na(brca_prot_kins[ , -1])) > 29, ] +hist(apply(brca_prot_kins[ , -1], 1, var), breaks = 50, xlim = c(0,0.5)) +brca_prot_kins <- brca_prot_kins[apply(brca_prot_kins[ , -1], 1, var, na.rm=T) >= 0.1, ] +brca_prot_outliers_5perc <- outlieR_plus(brca_prot_kins[ , -1], testing_samples = colnames(brca_prot_kins)[-1], reference_samples = colnames(brca_prot_kins)[-1], z_thresh = 1.645, z_method = "normal") +#colnames(brca_prot_outliers_5perc$Zscores) +#all.equal(as.data.frame(t(scale(t(brca_prot_kins[ , -1])))), brca_prot_outliers_5perc$Zscores[rownames(brca_prot_kins), colnames(brca_prot_kins)[-1]]) + +ccrcc_prot <- read.table("data_refresh/CCRCC/CCRCC_proteomics_gene_abundance_log2_reference_intensity_normalized_Tumor.txt", stringsAsFactors = F, header = T, sep = "\t") +rownames(ccrcc_prot) <- ccrcc_prot$idx +ccrcc_prot_kins <- ccrcc_prot[rownames(ccrcc_prot) %in% kins_mapped$gene, ] +rownames(ccrcc_prot_kins) <- kins_mapped[rownames(ccrcc_prot_kins), "gene_name"] +ccrcc_prot_kins <- ccrcc_prot_kins[rowSums(!is.na(ccrcc_prot_kins[ , -1])) > 29, ] +hist(apply(ccrcc_prot_kins[ , -1], 1, var), breaks = 50, xlim = c(0,0.5)) +ccrcc_prot_kins <- ccrcc_prot_kins[apply(ccrcc_prot_kins[ , -1], 1, var, na.rm=T) >= 0.1, ] +ccrcc_prot_outliers_5perc <- outlieR_plus(ccrcc_prot_kins[ , -1], testing_samples = colnames(ccrcc_prot_kins)[-1], reference_samples = colnames(ccrcc_prot_kins)[-1], z_thresh = 1.645, z_method = "normal") + +gbm_prot <- read.table("data_refresh/GBM/GBM_proteomics_gene_abundance_log2_reference_intensity_normalized_Tumor.txt", stringsAsFactors = F, header = T, sep = "\t") +rownames(gbm_prot) <- gbm_prot$idx +gbm_prot_kins <- gbm_prot[rownames(gbm_prot) %in% kins_mapped$gene, ] +rownames(gbm_prot_kins) <- kins_mapped[rownames(gbm_prot_kins), "gene_name"] +gbm_prot_kins <- gbm_prot_kins[rowSums(!is.na(gbm_prot_kins[ , -1])) > 29, ] +hist(apply(gbm_prot_kins[ , -1], 1, var), breaks = 50, xlim = c(0,0.5)) +gbm_prot_kins <- gbm_prot_kins[apply(gbm_prot_kins[ , -1], 1, var, na.rm=T) >= 0.1, ] +gbm_prot_outliers_5perc <- outlieR_plus(gbm_prot_kins[ , -1], testing_samples = colnames(gbm_prot_kins)[-1], reference_samples = colnames(gbm_prot_kins)[-1], z_thresh = 1.645, z_method = "normal") + +hnscc_prot <- read.table("data_refresh/HNSCC/HNSCC_proteomics_gene_abundance_log2_reference_intensity_normalized_Tumor.txt", stringsAsFactors = F, header = T, sep = "\t") +rownames(hnscc_prot) <- hnscc_prot$idx +hnscc_prot_kins <- hnscc_prot[rownames(hnscc_prot) %in% kins_mapped$gene, ] +rownames(hnscc_prot_kins) <- kins_mapped[rownames(hnscc_prot_kins), "gene_name"] +hnscc_prot_kins <- hnscc_prot_kins[rowSums(!is.na(hnscc_prot_kins[ , -1])) > 29, ] +hist(apply(hnscc_prot_kins[ , -1], 1, var), breaks = 50, xlim = c(0,0.5)) +hnscc_prot_kins <- hnscc_prot_kins[apply(hnscc_prot_kins[ , -1], 1, var, na.rm=T) >= 0.1, ] +hnscc_prot_outliers_5perc <- outlieR_plus(hnscc_prot_kins[ , -1], testing_samples = colnames(hnscc_prot_kins)[-1], reference_samples = colnames(hnscc_prot_kins)[-1], z_thresh = 1.645, z_method = "normal") + +lscc_prot <- read.table("data_refresh/LSCC/LSCC_proteomics_gene_abundance_log2_reference_intensity_normalized_Tumor.txt", stringsAsFactors = F, header = T, sep = "\t") +rownames(lscc_prot) <- lscc_prot$idx +lscc_prot_kins <- lscc_prot[rownames(lscc_prot) %in% kins_mapped$gene, ] +rownames(lscc_prot_kins) <- kins_mapped[rownames(lscc_prot_kins), "gene_name"] +lscc_prot_kins <- lscc_prot_kins[rowSums(!is.na(lscc_prot_kins[ , -1])) > 29, ] +hist(apply(lscc_prot_kins[ , -1], 1, var), breaks = 50, xlim = c(0,0.5)) +lscc_prot_kins <- lscc_prot_kins[apply(lscc_prot_kins[ , -1], 1, var, na.rm=T) >= 0.1, ] +lscc_prot_outliers_5perc <- outlieR_plus(lscc_prot_kins[ , -1], testing_samples = colnames(lscc_prot_kins)[-1], reference_samples = colnames(lscc_prot_kins)[-1], z_thresh = 1.645, z_method = "normal") + +luad_prot <- read.table("data_refresh/LUAD/LUAD_proteomics_gene_abundance_log2_reference_intensity_normalized_Tumor.txt", stringsAsFactors = F, header = T, sep = "\t") +rownames(luad_prot) <- luad_prot$idx +luad_prot_kins <- luad_prot[rownames(luad_prot) %in% kins_mapped$gene, ] +rownames(luad_prot_kins) <- kins_mapped[rownames(luad_prot_kins), "gene_name"] +luad_prot_kins <- luad_prot_kins[rowSums(!is.na(luad_prot_kins[ , -1])) > 29, ] +hist(apply(luad_prot_kins[ , -1], 1, var), breaks = 50, xlim = c(0,0.5)) +luad_prot_kins <- luad_prot_kins[apply(luad_prot_kins[ , -1], 1, var, na.rm=T) >= 0.1, ] +luad_prot_outliers_5perc <- outlieR_plus(luad_prot_kins[ , -1], testing_samples = colnames(luad_prot_kins)[-1], reference_samples = colnames(luad_prot_kins)[-1], z_thresh = 1.645, z_method = "normal") + +ucec_prot <- read.table("data_refresh/UCEC/UCEC_proteomics_gene_abundance_log2_reference_intensity_normalized_Tumor.txt", stringsAsFactors = F, header = T, sep = "\t") +rownames(ucec_prot) <- ucec_prot$idx +ucec_prot_kins <- ucec_prot[rownames(ucec_prot) %in% kins_mapped$gene, ] +rownames(ucec_prot_kins) <- kins_mapped[rownames(ucec_prot_kins), "gene_name"] +ucec_prot_kins <- ucec_prot_kins[rowSums(!is.na(ucec_prot_kins[ , -1])) > 29, ] +hist(apply(ucec_prot_kins[ , -1], 1, var), breaks = 50, xlim = c(0,0.5)) +ucec_prot_kins <- ucec_prot_kins[apply(ucec_prot_kins[ , -1], 1, var, na.rm=T) >= 0.1, ] +ucec_prot_outliers_5perc <- outlieR_plus(ucec_prot_kins[ , -1], testing_samples = colnames(ucec_prot_kins)[-1], reference_samples = colnames(ucec_prot_kins)[-1], z_thresh = 1.645, z_method = "normal") +``` + +also include top and bottom 2.5% as alternate thresholds for identifying samples with high and low kinase protein levels + +```{r} +brca_prot_outliers_2pt5perc <- outlieR_plus(brca_prot_kins[ , -1], testing_samples = colnames(brca_prot_kins)[-1], reference_samples = colnames(brca_prot_kins)[-1], z_thresh = 1.96, z_method = "normal") +ccrcc_prot_outliers_2pt5perc <- outlieR_plus(ccrcc_prot_kins[ , -1], testing_samples = colnames(ccrcc_prot_kins)[-1], reference_samples = colnames(ccrcc_prot_kins)[-1], z_thresh = 1.96, z_method = "normal") +gbm_prot_outliers_2pt5perc <- outlieR_plus(gbm_prot_kins[ , -1], testing_samples = colnames(gbm_prot_kins)[-1], reference_samples = colnames(gbm_prot_kins)[-1], z_thresh = 1.96, z_method = "normal") +hnscc_prot_outliers_2pt5perc <- outlieR_plus(hnscc_prot_kins[ , -1], testing_samples = colnames(hnscc_prot_kins)[-1], reference_samples = colnames(hnscc_prot_kins)[-1], z_thresh = 1.96, z_method = "normal") +lscc_prot_outliers_2pt5perc <- outlieR_plus(lscc_prot_kins[ , -1], testing_samples = colnames(lscc_prot_kins)[-1], reference_samples = colnames(lscc_prot_kins)[-1], z_thresh = 1.96, z_method = "normal") +luad_prot_outliers_2pt5perc <- outlieR_plus(luad_prot_kins[ , -1], testing_samples = colnames(luad_prot_kins)[-1], reference_samples = colnames(luad_prot_kins)[-1], z_thresh = 1.96, z_method = "normal") +ucec_prot_outliers_2pt5perc <- outlieR_plus(ucec_prot_kins[ , -1], testing_samples = colnames(ucec_prot_kins)[-1], reference_samples = colnames(ucec_prot_kins)[-1], z_thresh = 1.96, z_method = "normal") +``` + +also try top and bottom 10% as thresholds for identifying samples with high and low activating site levels +```{r} +brca_prot_outliers_10perc <- outlieR_plus(brca_prot_kins[ , -1], testing_samples = colnames(brca_prot_kins)[-1], reference_samples = colnames(brca_prot_kins)[-1], z_thresh = 1.282, z_method = "normal") +ccrcc_prot_outliers_10perc <- outlieR_plus(ccrcc_prot_kins[ , -1], testing_samples = colnames(ccrcc_prot_kins)[-1], reference_samples = colnames(ccrcc_prot_kins)[-1], z_thresh = 1.282, z_method = "normal") +gbm_prot_outliers_10perc <- outlieR_plus(gbm_prot_kins[ , -1], testing_samples = colnames(gbm_prot_kins)[-1], reference_samples = colnames(gbm_prot_kins)[-1], z_thresh = 1.282, z_method = "normal") +hnscc_prot_outliers_10perc <- outlieR_plus(hnscc_prot_kins[ , -1], testing_samples = colnames(hnscc_prot_kins)[-1], reference_samples = colnames(hnscc_prot_kins)[-1], z_thresh = 1.282, z_method = "normal") +lscc_prot_outliers_10perc <- outlieR_plus(lscc_prot_kins[ , -1], testing_samples = colnames(lscc_prot_kins)[-1], reference_samples = colnames(lscc_prot_kins)[-1], z_thresh = 1.282, z_method = "normal") +luad_prot_outliers_10perc <- outlieR_plus(luad_prot_kins[ , -1], testing_samples = colnames(luad_prot_kins)[-1], reference_samples = colnames(luad_prot_kins)[-1], z_thresh = 1.282, z_method = "normal") +ucec_prot_outliers_10perc <- outlieR_plus(ucec_prot_kins[ , -1], testing_samples = colnames(ucec_prot_kins)[-1], reference_samples = colnames(ucec_prot_kins)[-1], z_thresh = 1.282, z_method = "normal") +``` + +also try top and bottom 15% as thresholds for identifying samples with high and low activating site levels +```{r} +brca_prot_outliers_15perc <- outlieR_plus(brca_prot_kins[ , -1], testing_samples = colnames(brca_prot_kins)[-1], reference_samples = colnames(brca_prot_kins)[-1], z_thresh = 1.036, z_method = "normal") +ccrcc_prot_outliers_15perc <- outlieR_plus(ccrcc_prot_kins[ , -1], testing_samples = colnames(ccrcc_prot_kins)[-1], reference_samples = colnames(ccrcc_prot_kins)[-1], z_thresh = 1.036, z_method = "normal") +gbm_prot_outliers_15perc <- outlieR_plus(gbm_prot_kins[ , -1], testing_samples = colnames(gbm_prot_kins)[-1], reference_samples = colnames(gbm_prot_kins)[-1], z_thresh = 1.036, z_method = "normal") +hnscc_prot_outliers_15perc <- outlieR_plus(hnscc_prot_kins[ , -1], testing_samples = colnames(hnscc_prot_kins)[-1], reference_samples = colnames(hnscc_prot_kins)[-1], z_thresh = 1.036, z_method = "normal") +lscc_prot_outliers_15perc <- outlieR_plus(lscc_prot_kins[ , -1], testing_samples = colnames(lscc_prot_kins)[-1], reference_samples = colnames(lscc_prot_kins)[-1], z_thresh = 1.036, z_method = "normal") +luad_prot_outliers_15perc <- outlieR_plus(luad_prot_kins[ , -1], testing_samples = colnames(luad_prot_kins)[-1], reference_samples = colnames(luad_prot_kins)[-1], z_thresh = 1.036, z_method = "normal") +ucec_prot_outliers_15perc <- outlieR_plus(ucec_prot_kins[ , -1], testing_samples = colnames(ucec_prot_kins)[-1], reference_samples = colnames(ucec_prot_kins)[-1], z_thresh = 1.036, z_method = "normal") +``` + +for proteins, also filter to kinases with both GS positive and GS negative samples (at least one of each) +```{r} +brca_prot_gsboth_kins <- rownames(brca_prot_outliers_5perc$outliers[(rowSums(brca_prot_outliers_5perc$outliers == -1, na.rm = T) > 0) & (rowSums(brca_prot_outliers_5perc$outliers == 1, na.rm = T) > 0), ]) +brca_prot_gs_5perc <- brca_prot_outliers_5perc +for(i in 1:length(brca_prot_gsboth_kins)){ + brca_prot_gs_5perc$GS_pos_pairs[[brca_prot_gsboth_kins[i]]] <- colnames(brca_prot_gs_5perc$outliers)[(brca_prot_gs_5perc$outliers[brca_prot_gsboth_kins[i], ] == 1) & !is.na(brca_prot_gs_5perc$outliers[brca_prot_gsboth_kins[i], ])] + brca_prot_gs_5perc$GS_neg_pairs[[brca_prot_gsboth_kins[i]]] <- colnames(brca_prot_gs_5perc$outliers)[(brca_prot_gs_5perc$outliers[brca_prot_gsboth_kins[i], ] == -1) & !is.na(brca_prot_gs_5perc$outliers[brca_prot_gsboth_kins[i], ])] +} + +ccrcc_prot_gsboth_kins <- rownames(ccrcc_prot_outliers_5perc$outliers[(rowSums(ccrcc_prot_outliers_5perc$outliers == -1, na.rm = T) > 0) & (rowSums(ccrcc_prot_outliers_5perc$outliers == 1, na.rm = T) > 0), ]) +ccrcc_prot_gs_5perc <- ccrcc_prot_outliers_5perc +for(i in 1:length(ccrcc_prot_gsboth_kins)){ + ccrcc_prot_gs_5perc$GS_pos_pairs[[ccrcc_prot_gsboth_kins[i]]] <- colnames(ccrcc_prot_gs_5perc$outliers)[(ccrcc_prot_gs_5perc$outliers[ccrcc_prot_gsboth_kins[i], ] == 1) & !is.na(ccrcc_prot_gs_5perc$outliers[ccrcc_prot_gsboth_kins[i], ])] + ccrcc_prot_gs_5perc$GS_neg_pairs[[ccrcc_prot_gsboth_kins[i]]] <- colnames(ccrcc_prot_gs_5perc$outliers)[(ccrcc_prot_gs_5perc$outliers[ccrcc_prot_gsboth_kins[i], ] == -1) & !is.na(ccrcc_prot_gs_5perc$outliers[ccrcc_prot_gsboth_kins[i], ])] +} + +gbm_prot_gsboth_kins <- rownames(gbm_prot_outliers_5perc$outliers[(rowSums(gbm_prot_outliers_5perc$outliers == -1, na.rm = T) > 0) & (rowSums(gbm_prot_outliers_5perc$outliers == 1, na.rm = T) > 0), ]) +gbm_prot_gs_5perc <- gbm_prot_outliers_5perc +for(i in 1:length(gbm_prot_gsboth_kins)){ + gbm_prot_gs_5perc$GS_pos_pairs[[gbm_prot_gsboth_kins[i]]] <- colnames(gbm_prot_gs_5perc$outliers)[(gbm_prot_gs_5perc$outliers[gbm_prot_gsboth_kins[i], ] == 1) & !is.na(gbm_prot_gs_5perc$outliers[gbm_prot_gsboth_kins[i], ])] + gbm_prot_gs_5perc$GS_neg_pairs[[gbm_prot_gsboth_kins[i]]] <- colnames(gbm_prot_gs_5perc$outliers)[(gbm_prot_gs_5perc$outliers[gbm_prot_gsboth_kins[i], ] == -1) & !is.na(gbm_prot_gs_5perc$outliers[gbm_prot_gsboth_kins[i], ])] +} + +hnscc_prot_gsboth_kins <- rownames(hnscc_prot_outliers_5perc$outliers[(rowSums(hnscc_prot_outliers_5perc$outliers == -1, na.rm = T) > 0) & (rowSums(hnscc_prot_outliers_5perc$outliers == 1, na.rm = T) > 0), ]) +hnscc_prot_gs_5perc <- hnscc_prot_outliers_5perc +for(i in 1:length(hnscc_prot_gsboth_kins)){ + hnscc_prot_gs_5perc$GS_pos_pairs[[hnscc_prot_gsboth_kins[i]]] <- colnames(hnscc_prot_gs_5perc$outliers)[(hnscc_prot_gs_5perc$outliers[hnscc_prot_gsboth_kins[i], ] == 1) & !is.na(hnscc_prot_gs_5perc$outliers[hnscc_prot_gsboth_kins[i], ])] + hnscc_prot_gs_5perc$GS_neg_pairs[[hnscc_prot_gsboth_kins[i]]] <- colnames(hnscc_prot_gs_5perc$outliers)[(hnscc_prot_gs_5perc$outliers[hnscc_prot_gsboth_kins[i], ] == -1) & !is.na(hnscc_prot_gs_5perc$outliers[hnscc_prot_gsboth_kins[i], ])] +} + +lscc_prot_gsboth_kins <- rownames(lscc_prot_outliers_5perc$outliers[(rowSums(lscc_prot_outliers_5perc$outliers == -1, na.rm = T) > 0) & (rowSums(lscc_prot_outliers_5perc$outliers == 1, na.rm = T) > 0), ]) +lscc_prot_gs_5perc <- lscc_prot_outliers_5perc +for(i in 1:length(lscc_prot_gsboth_kins)){ + lscc_prot_gs_5perc$GS_pos_pairs[[lscc_prot_gsboth_kins[i]]] <- colnames(lscc_prot_gs_5perc$outliers)[(lscc_prot_gs_5perc$outliers[lscc_prot_gsboth_kins[i], ] == 1) & !is.na(lscc_prot_gs_5perc$outliers[lscc_prot_gsboth_kins[i], ])] + lscc_prot_gs_5perc$GS_neg_pairs[[lscc_prot_gsboth_kins[i]]] <- colnames(lscc_prot_gs_5perc$outliers)[(lscc_prot_gs_5perc$outliers[lscc_prot_gsboth_kins[i], ] == -1) & !is.na(lscc_prot_gs_5perc$outliers[lscc_prot_gsboth_kins[i], ])] +} + +luad_prot_gsboth_kins <- rownames(luad_prot_outliers_5perc$outliers[(rowSums(luad_prot_outliers_5perc$outliers == -1, na.rm = T) > 0) & (rowSums(luad_prot_outliers_5perc$outliers == 1, na.rm = T) > 0), ]) +luad_prot_gs_5perc <- luad_prot_outliers_5perc +for(i in 1:length(luad_prot_gsboth_kins)){ + luad_prot_gs_5perc$GS_pos_pairs[[luad_prot_gsboth_kins[i]]] <- colnames(luad_prot_gs_5perc$outliers)[(luad_prot_gs_5perc$outliers[luad_prot_gsboth_kins[i], ] == 1) & !is.na(luad_prot_gs_5perc$outliers[luad_prot_gsboth_kins[i], ])] + luad_prot_gs_5perc$GS_neg_pairs[[luad_prot_gsboth_kins[i]]] <- colnames(luad_prot_gs_5perc$outliers)[(luad_prot_gs_5perc$outliers[luad_prot_gsboth_kins[i], ] == -1) & !is.na(luad_prot_gs_5perc$outliers[luad_prot_gsboth_kins[i], ])] +} + +ucec_prot_gsboth_kins <- rownames(ucec_prot_outliers_5perc$outliers[(rowSums(ucec_prot_outliers_5perc$outliers == -1, na.rm = T) > 0) & (rowSums(ucec_prot_outliers_5perc$outliers == 1, na.rm = T) > 0), ]) +ucec_prot_gs_5perc <- ucec_prot_outliers_5perc +for(i in 1:length(ucec_prot_gsboth_kins)){ + ucec_prot_gs_5perc$GS_pos_pairs[[ucec_prot_gsboth_kins[i]]] <- colnames(ucec_prot_gs_5perc$outliers)[(ucec_prot_gs_5perc$outliers[ucec_prot_gsboth_kins[i], ] == 1) & !is.na(ucec_prot_gs_5perc$outliers[ucec_prot_gsboth_kins[i], ])] + ucec_prot_gs_5perc$GS_neg_pairs[[ucec_prot_gsboth_kins[i]]] <- colnames(ucec_prot_gs_5perc$outliers)[(ucec_prot_gs_5perc$outliers[ucec_prot_gsboth_kins[i], ] == -1) & !is.na(ucec_prot_gs_5perc$outliers[ucec_prot_gsboth_kins[i], ])] +} +``` + +repeat for 2.5% threshold +```{r} +brca_prot_gsboth_kins_2pt5perc<- rownames(brca_prot_outliers_2pt5perc$outliers[(rowSums(brca_prot_outliers_2pt5perc$outliers == -1, na.rm = T) > 0) & (rowSums(brca_prot_outliers_2pt5perc$outliers == 1, na.rm = T) > 0), ]) +brca_prot_gs_2pt5perc<- brca_prot_outliers_2pt5perc +for(i in 1:length(brca_prot_gsboth_kins_2pt5perc)){ + brca_prot_gs_2pt5perc$GS_pos_pairs[[brca_prot_gsboth_kins_2pt5perc[i]]] <- colnames(brca_prot_gs_2pt5perc$outliers)[(brca_prot_gs_2pt5perc$outliers[brca_prot_gsboth_kins_2pt5perc[i], ] == 1) & !is.na(brca_prot_gs_2pt5perc$outliers[brca_prot_gsboth_kins_2pt5perc[i], ])] + brca_prot_gs_2pt5perc$GS_neg_pairs[[brca_prot_gsboth_kins_2pt5perc[i]]] <- colnames(brca_prot_gs_2pt5perc$outliers)[(brca_prot_gs_2pt5perc$outliers[brca_prot_gsboth_kins_2pt5perc[i], ] == -1) & !is.na(brca_prot_gs_2pt5perc$outliers[brca_prot_gsboth_kins_2pt5perc[i], ])] +} + +ccrcc_prot_gsboth_kins_2pt5perc<- rownames(ccrcc_prot_outliers_2pt5perc$outliers[(rowSums(ccrcc_prot_outliers_2pt5perc$outliers == -1, na.rm = T) > 0) & (rowSums(ccrcc_prot_outliers_2pt5perc$outliers == 1, na.rm = T) > 0), ]) +ccrcc_prot_gs_2pt5perc<- ccrcc_prot_outliers_2pt5perc +for(i in 1:length(ccrcc_prot_gsboth_kins_2pt5perc)){ + ccrcc_prot_gs_2pt5perc$GS_pos_pairs[[ccrcc_prot_gsboth_kins_2pt5perc[i]]] <- colnames(ccrcc_prot_gs_2pt5perc$outliers)[(ccrcc_prot_gs_2pt5perc$outliers[ccrcc_prot_gsboth_kins_2pt5perc[i], ] == 1) & !is.na(ccrcc_prot_gs_2pt5perc$outliers[ccrcc_prot_gsboth_kins_2pt5perc[i], ])] + ccrcc_prot_gs_2pt5perc$GS_neg_pairs[[ccrcc_prot_gsboth_kins_2pt5perc[i]]] <- colnames(ccrcc_prot_gs_2pt5perc$outliers)[(ccrcc_prot_gs_2pt5perc$outliers[ccrcc_prot_gsboth_kins_2pt5perc[i], ] == -1) & !is.na(ccrcc_prot_gs_2pt5perc$outliers[ccrcc_prot_gsboth_kins_2pt5perc[i], ])] +} + +gbm_prot_gsboth_kins_2pt5perc<- rownames(gbm_prot_outliers_2pt5perc$outliers[(rowSums(gbm_prot_outliers_2pt5perc$outliers == -1, na.rm = T) > 0) & (rowSums(gbm_prot_outliers_2pt5perc$outliers == 1, na.rm = T) > 0), ]) +gbm_prot_gs_2pt5perc<- gbm_prot_outliers_2pt5perc +for(i in 1:length(gbm_prot_gsboth_kins_2pt5perc)){ + gbm_prot_gs_2pt5perc$GS_pos_pairs[[gbm_prot_gsboth_kins_2pt5perc[i]]] <- colnames(gbm_prot_gs_2pt5perc$outliers)[(gbm_prot_gs_2pt5perc$outliers[gbm_prot_gsboth_kins_2pt5perc[i], ] == 1) & !is.na(gbm_prot_gs_2pt5perc$outliers[gbm_prot_gsboth_kins_2pt5perc[i], ])] + gbm_prot_gs_2pt5perc$GS_neg_pairs[[gbm_prot_gsboth_kins_2pt5perc[i]]] <- colnames(gbm_prot_gs_2pt5perc$outliers)[(gbm_prot_gs_2pt5perc$outliers[gbm_prot_gsboth_kins_2pt5perc[i], ] == -1) & !is.na(gbm_prot_gs_2pt5perc$outliers[gbm_prot_gsboth_kins_2pt5perc[i], ])] +} + +hnscc_prot_gsboth_kins_2pt5perc<- rownames(hnscc_prot_outliers_2pt5perc$outliers[(rowSums(hnscc_prot_outliers_2pt5perc$outliers == -1, na.rm = T) > 0) & (rowSums(hnscc_prot_outliers_2pt5perc$outliers == 1, na.rm = T) > 0), ]) +hnscc_prot_gs_2pt5perc<- hnscc_prot_outliers_2pt5perc +for(i in 1:length(hnscc_prot_gsboth_kins_2pt5perc)){ + hnscc_prot_gs_2pt5perc$GS_pos_pairs[[hnscc_prot_gsboth_kins_2pt5perc[i]]] <- colnames(hnscc_prot_gs_2pt5perc$outliers)[(hnscc_prot_gs_2pt5perc$outliers[hnscc_prot_gsboth_kins_2pt5perc[i], ] == 1) & !is.na(hnscc_prot_gs_2pt5perc$outliers[hnscc_prot_gsboth_kins_2pt5perc[i], ])] + hnscc_prot_gs_2pt5perc$GS_neg_pairs[[hnscc_prot_gsboth_kins_2pt5perc[i]]] <- colnames(hnscc_prot_gs_2pt5perc$outliers)[(hnscc_prot_gs_2pt5perc$outliers[hnscc_prot_gsboth_kins_2pt5perc[i], ] == -1) & !is.na(hnscc_prot_gs_2pt5perc$outliers[hnscc_prot_gsboth_kins_2pt5perc[i], ])] +} + +lscc_prot_gsboth_kins_2pt5perc<- rownames(lscc_prot_outliers_2pt5perc$outliers[(rowSums(lscc_prot_outliers_2pt5perc$outliers == -1, na.rm = T) > 0) & (rowSums(lscc_prot_outliers_2pt5perc$outliers == 1, na.rm = T) > 0), ]) +lscc_prot_gs_2pt5perc<- lscc_prot_outliers_2pt5perc +for(i in 1:length(lscc_prot_gsboth_kins_2pt5perc)){ + lscc_prot_gs_2pt5perc$GS_pos_pairs[[lscc_prot_gsboth_kins_2pt5perc[i]]] <- colnames(lscc_prot_gs_2pt5perc$outliers)[(lscc_prot_gs_2pt5perc$outliers[lscc_prot_gsboth_kins_2pt5perc[i], ] == 1) & !is.na(lscc_prot_gs_2pt5perc$outliers[lscc_prot_gsboth_kins_2pt5perc[i], ])] + lscc_prot_gs_2pt5perc$GS_neg_pairs[[lscc_prot_gsboth_kins_2pt5perc[i]]] <- colnames(lscc_prot_gs_2pt5perc$outliers)[(lscc_prot_gs_2pt5perc$outliers[lscc_prot_gsboth_kins_2pt5perc[i], ] == -1) & !is.na(lscc_prot_gs_2pt5perc$outliers[lscc_prot_gsboth_kins_2pt5perc[i], ])] +} + +luad_prot_gsboth_kins_2pt5perc<- rownames(luad_prot_outliers_2pt5perc$outliers[(rowSums(luad_prot_outliers_2pt5perc$outliers == -1, na.rm = T) > 0) & (rowSums(luad_prot_outliers_2pt5perc$outliers == 1, na.rm = T) > 0), ]) +luad_prot_gs_2pt5perc<- luad_prot_outliers_2pt5perc +for(i in 1:length(luad_prot_gsboth_kins_2pt5perc)){ + luad_prot_gs_2pt5perc$GS_pos_pairs[[luad_prot_gsboth_kins_2pt5perc[i]]] <- colnames(luad_prot_gs_2pt5perc$outliers)[(luad_prot_gs_2pt5perc$outliers[luad_prot_gsboth_kins_2pt5perc[i], ] == 1) & !is.na(luad_prot_gs_2pt5perc$outliers[luad_prot_gsboth_kins_2pt5perc[i], ])] + luad_prot_gs_2pt5perc$GS_neg_pairs[[luad_prot_gsboth_kins_2pt5perc[i]]] <- colnames(luad_prot_gs_2pt5perc$outliers)[(luad_prot_gs_2pt5perc$outliers[luad_prot_gsboth_kins_2pt5perc[i], ] == -1) & !is.na(luad_prot_gs_2pt5perc$outliers[luad_prot_gsboth_kins_2pt5perc[i], ])] +} + +ucec_prot_gsboth_kins_2pt5perc<- rownames(ucec_prot_outliers_2pt5perc$outliers[(rowSums(ucec_prot_outliers_2pt5perc$outliers == -1, na.rm = T) > 0) & (rowSums(ucec_prot_outliers_2pt5perc$outliers == 1, na.rm = T) > 0), ]) +ucec_prot_gs_2pt5perc<- ucec_prot_outliers_2pt5perc +for(i in 1:length(ucec_prot_gsboth_kins_2pt5perc)){ + ucec_prot_gs_2pt5perc$GS_pos_pairs[[ucec_prot_gsboth_kins_2pt5perc[i]]] <- colnames(ucec_prot_gs_2pt5perc$outliers)[(ucec_prot_gs_2pt5perc$outliers[ucec_prot_gsboth_kins_2pt5perc[i], ] == 1) & !is.na(ucec_prot_gs_2pt5perc$outliers[ucec_prot_gsboth_kins_2pt5perc[i], ])] + ucec_prot_gs_2pt5perc$GS_neg_pairs[[ucec_prot_gsboth_kins_2pt5perc[i]]] <- colnames(ucec_prot_gs_2pt5perc$outliers)[(ucec_prot_gs_2pt5perc$outliers[ucec_prot_gsboth_kins_2pt5perc[i], ] == -1) & !is.na(ucec_prot_gs_2pt5perc$outliers[ucec_prot_gsboth_kins_2pt5perc[i], ])] +} +``` + +repeat for 10% threshold +```{r} +brca_prot_gsboth_kins_10perc<- rownames(brca_prot_outliers_10perc$outliers[(rowSums(brca_prot_outliers_10perc$outliers == -1, na.rm = T) > 0) & (rowSums(brca_prot_outliers_10perc$outliers == 1, na.rm = T) > 0), ]) +brca_prot_gs_10perc<- brca_prot_outliers_10perc +for(i in 1:length(brca_prot_gsboth_kins_10perc)){ + brca_prot_gs_10perc$GS_pos_pairs[[brca_prot_gsboth_kins_10perc[i]]] <- colnames(brca_prot_gs_10perc$outliers)[(brca_prot_gs_10perc$outliers[brca_prot_gsboth_kins_10perc[i], ] == 1) & !is.na(brca_prot_gs_10perc$outliers[brca_prot_gsboth_kins_10perc[i], ])] + brca_prot_gs_10perc$GS_neg_pairs[[brca_prot_gsboth_kins_10perc[i]]] <- colnames(brca_prot_gs_10perc$outliers)[(brca_prot_gs_10perc$outliers[brca_prot_gsboth_kins_10perc[i], ] == -1) & !is.na(brca_prot_gs_10perc$outliers[brca_prot_gsboth_kins_10perc[i], ])] +} + +ccrcc_prot_gsboth_kins_10perc<- rownames(ccrcc_prot_outliers_10perc$outliers[(rowSums(ccrcc_prot_outliers_10perc$outliers == -1, na.rm = T) > 0) & (rowSums(ccrcc_prot_outliers_10perc$outliers == 1, na.rm = T) > 0), ]) +ccrcc_prot_gs_10perc<- ccrcc_prot_outliers_10perc +for(i in 1:length(ccrcc_prot_gsboth_kins_10perc)){ + ccrcc_prot_gs_10perc$GS_pos_pairs[[ccrcc_prot_gsboth_kins_10perc[i]]] <- colnames(ccrcc_prot_gs_10perc$outliers)[(ccrcc_prot_gs_10perc$outliers[ccrcc_prot_gsboth_kins_10perc[i], ] == 1) & !is.na(ccrcc_prot_gs_10perc$outliers[ccrcc_prot_gsboth_kins_10perc[i], ])] + ccrcc_prot_gs_10perc$GS_neg_pairs[[ccrcc_prot_gsboth_kins_10perc[i]]] <- colnames(ccrcc_prot_gs_10perc$outliers)[(ccrcc_prot_gs_10perc$outliers[ccrcc_prot_gsboth_kins_10perc[i], ] == -1) & !is.na(ccrcc_prot_gs_10perc$outliers[ccrcc_prot_gsboth_kins_10perc[i], ])] +} + +gbm_prot_gsboth_kins_10perc<- rownames(gbm_prot_outliers_10perc$outliers[(rowSums(gbm_prot_outliers_10perc$outliers == -1, na.rm = T) > 0) & (rowSums(gbm_prot_outliers_10perc$outliers == 1, na.rm = T) > 0), ]) +gbm_prot_gs_10perc<- gbm_prot_outliers_10perc +for(i in 1:length(gbm_prot_gsboth_kins_10perc)){ + gbm_prot_gs_10perc$GS_pos_pairs[[gbm_prot_gsboth_kins_10perc[i]]] <- colnames(gbm_prot_gs_10perc$outliers)[(gbm_prot_gs_10perc$outliers[gbm_prot_gsboth_kins_10perc[i], ] == 1) & !is.na(gbm_prot_gs_10perc$outliers[gbm_prot_gsboth_kins_10perc[i], ])] + gbm_prot_gs_10perc$GS_neg_pairs[[gbm_prot_gsboth_kins_10perc[i]]] <- colnames(gbm_prot_gs_10perc$outliers)[(gbm_prot_gs_10perc$outliers[gbm_prot_gsboth_kins_10perc[i], ] == -1) & !is.na(gbm_prot_gs_10perc$outliers[gbm_prot_gsboth_kins_10perc[i], ])] +} + +hnscc_prot_gsboth_kins_10perc<- rownames(hnscc_prot_outliers_10perc$outliers[(rowSums(hnscc_prot_outliers_10perc$outliers == -1, na.rm = T) > 0) & (rowSums(hnscc_prot_outliers_10perc$outliers == 1, na.rm = T) > 0), ]) +hnscc_prot_gs_10perc<- hnscc_prot_outliers_10perc +for(i in 1:length(hnscc_prot_gsboth_kins_10perc)){ + hnscc_prot_gs_10perc$GS_pos_pairs[[hnscc_prot_gsboth_kins_10perc[i]]] <- colnames(hnscc_prot_gs_10perc$outliers)[(hnscc_prot_gs_10perc$outliers[hnscc_prot_gsboth_kins_10perc[i], ] == 1) & !is.na(hnscc_prot_gs_10perc$outliers[hnscc_prot_gsboth_kins_10perc[i], ])] + hnscc_prot_gs_10perc$GS_neg_pairs[[hnscc_prot_gsboth_kins_10perc[i]]] <- colnames(hnscc_prot_gs_10perc$outliers)[(hnscc_prot_gs_10perc$outliers[hnscc_prot_gsboth_kins_10perc[i], ] == -1) & !is.na(hnscc_prot_gs_10perc$outliers[hnscc_prot_gsboth_kins_10perc[i], ])] +} + +lscc_prot_gsboth_kins_10perc<- rownames(lscc_prot_outliers_10perc$outliers[(rowSums(lscc_prot_outliers_10perc$outliers == -1, na.rm = T) > 0) & (rowSums(lscc_prot_outliers_10perc$outliers == 1, na.rm = T) > 0), ]) +lscc_prot_gs_10perc<- lscc_prot_outliers_10perc +for(i in 1:length(lscc_prot_gsboth_kins_10perc)){ + lscc_prot_gs_10perc$GS_pos_pairs[[lscc_prot_gsboth_kins_10perc[i]]] <- colnames(lscc_prot_gs_10perc$outliers)[(lscc_prot_gs_10perc$outliers[lscc_prot_gsboth_kins_10perc[i], ] == 1) & !is.na(lscc_prot_gs_10perc$outliers[lscc_prot_gsboth_kins_10perc[i], ])] + lscc_prot_gs_10perc$GS_neg_pairs[[lscc_prot_gsboth_kins_10perc[i]]] <- colnames(lscc_prot_gs_10perc$outliers)[(lscc_prot_gs_10perc$outliers[lscc_prot_gsboth_kins_10perc[i], ] == -1) & !is.na(lscc_prot_gs_10perc$outliers[lscc_prot_gsboth_kins_10perc[i], ])] +} + +luad_prot_gsboth_kins_10perc<- rownames(luad_prot_outliers_10perc$outliers[(rowSums(luad_prot_outliers_10perc$outliers == -1, na.rm = T) > 0) & (rowSums(luad_prot_outliers_10perc$outliers == 1, na.rm = T) > 0), ]) +luad_prot_gs_10perc<- luad_prot_outliers_10perc +for(i in 1:length(luad_prot_gsboth_kins_10perc)){ + luad_prot_gs_10perc$GS_pos_pairs[[luad_prot_gsboth_kins_10perc[i]]] <- colnames(luad_prot_gs_10perc$outliers)[(luad_prot_gs_10perc$outliers[luad_prot_gsboth_kins_10perc[i], ] == 1) & !is.na(luad_prot_gs_10perc$outliers[luad_prot_gsboth_kins_10perc[i], ])] + luad_prot_gs_10perc$GS_neg_pairs[[luad_prot_gsboth_kins_10perc[i]]] <- colnames(luad_prot_gs_10perc$outliers)[(luad_prot_gs_10perc$outliers[luad_prot_gsboth_kins_10perc[i], ] == -1) & !is.na(luad_prot_gs_10perc$outliers[luad_prot_gsboth_kins_10perc[i], ])] +} + +ucec_prot_gsboth_kins_10perc<- rownames(ucec_prot_outliers_10perc$outliers[(rowSums(ucec_prot_outliers_10perc$outliers == -1, na.rm = T) > 0) & (rowSums(ucec_prot_outliers_10perc$outliers == 1, na.rm = T) > 0), ]) +ucec_prot_gs_10perc<- ucec_prot_outliers_10perc +for(i in 1:length(ucec_prot_gsboth_kins_10perc)){ + ucec_prot_gs_10perc$GS_pos_pairs[[ucec_prot_gsboth_kins_10perc[i]]] <- colnames(ucec_prot_gs_10perc$outliers)[(ucec_prot_gs_10perc$outliers[ucec_prot_gsboth_kins_10perc[i], ] == 1) & !is.na(ucec_prot_gs_10perc$outliers[ucec_prot_gsboth_kins_10perc[i], ])] + ucec_prot_gs_10perc$GS_neg_pairs[[ucec_prot_gsboth_kins_10perc[i]]] <- colnames(ucec_prot_gs_10perc$outliers)[(ucec_prot_gs_10perc$outliers[ucec_prot_gsboth_kins_10perc[i], ] == -1) & !is.na(ucec_prot_gs_10perc$outliers[ucec_prot_gsboth_kins_10perc[i], ])] +} +``` + +repeat for 15% threshold +```{r} +brca_prot_gsboth_kins_15perc <- rownames(brca_prot_outliers_15perc$outliers[(rowSums(brca_prot_outliers_15perc$outliers == -1, na.rm = T) > 0) & (rowSums(brca_prot_outliers_15perc$outliers == 1, na.rm = T) > 0), ]) +brca_prot_gs_15perc <- brca_prot_outliers_15perc +for(i in 1:length(brca_prot_gsboth_kins_15perc)){ + brca_prot_gs_15perc$GS_pos_pairs[[brca_prot_gsboth_kins_15perc[i]]] <- colnames(brca_prot_gs_15perc$outliers)[(brca_prot_gs_15perc$outliers[brca_prot_gsboth_kins_15perc[i], ] == 1) & !is.na(brca_prot_gs_15perc$outliers[brca_prot_gsboth_kins_15perc[i], ])] + brca_prot_gs_15perc$GS_neg_pairs[[brca_prot_gsboth_kins_15perc[i]]] <- colnames(brca_prot_gs_15perc$outliers)[(brca_prot_gs_15perc$outliers[brca_prot_gsboth_kins_15perc[i], ] == -1) & !is.na(brca_prot_gs_15perc$outliers[brca_prot_gsboth_kins_15perc[i], ])] +} + +ccrcc_prot_gsboth_kins_15perc <- rownames(ccrcc_prot_outliers_15perc$outliers[(rowSums(ccrcc_prot_outliers_15perc$outliers == -1, na.rm = T) > 0) & (rowSums(ccrcc_prot_outliers_15perc$outliers == 1, na.rm = T) > 0), ]) +ccrcc_prot_gs_15perc <- ccrcc_prot_outliers_15perc +for(i in 1:length(ccrcc_prot_gsboth_kins_15perc)){ + ccrcc_prot_gs_15perc$GS_pos_pairs[[ccrcc_prot_gsboth_kins_15perc[i]]] <- colnames(ccrcc_prot_gs_15perc$outliers)[(ccrcc_prot_gs_15perc$outliers[ccrcc_prot_gsboth_kins_15perc[i], ] == 1) & !is.na(ccrcc_prot_gs_15perc$outliers[ccrcc_prot_gsboth_kins_15perc[i], ])] + ccrcc_prot_gs_15perc$GS_neg_pairs[[ccrcc_prot_gsboth_kins_15perc[i]]] <- colnames(ccrcc_prot_gs_15perc$outliers)[(ccrcc_prot_gs_15perc$outliers[ccrcc_prot_gsboth_kins_15perc[i], ] == -1) & !is.na(ccrcc_prot_gs_15perc$outliers[ccrcc_prot_gsboth_kins_15perc[i], ])] +} + +gbm_prot_gsboth_kins_15perc <- rownames(gbm_prot_outliers_15perc$outliers[(rowSums(gbm_prot_outliers_15perc$outliers == -1, na.rm = T) > 0) & (rowSums(gbm_prot_outliers_15perc$outliers == 1, na.rm = T) > 0), ]) +gbm_prot_gs_15perc <- gbm_prot_outliers_15perc +for(i in 1:length(gbm_prot_gsboth_kins_15perc)){ + gbm_prot_gs_15perc$GS_pos_pairs[[gbm_prot_gsboth_kins_15perc[i]]] <- colnames(gbm_prot_gs_15perc$outliers)[(gbm_prot_gs_15perc$outliers[gbm_prot_gsboth_kins_15perc[i], ] == 1) & !is.na(gbm_prot_gs_15perc$outliers[gbm_prot_gsboth_kins_15perc[i], ])] + gbm_prot_gs_15perc$GS_neg_pairs[[gbm_prot_gsboth_kins_15perc[i]]] <- colnames(gbm_prot_gs_15perc$outliers)[(gbm_prot_gs_15perc$outliers[gbm_prot_gsboth_kins_15perc[i], ] == -1) & !is.na(gbm_prot_gs_15perc$outliers[gbm_prot_gsboth_kins_15perc[i], ])] +} + +hnscc_prot_gsboth_kins_15perc <- rownames(hnscc_prot_outliers_15perc$outliers[(rowSums(hnscc_prot_outliers_15perc$outliers == -1, na.rm = T) > 0) & (rowSums(hnscc_prot_outliers_15perc$outliers == 1, na.rm = T) > 0), ]) +hnscc_prot_gs_15perc <- hnscc_prot_outliers_15perc +for(i in 1:length(hnscc_prot_gsboth_kins_15perc)){ + hnscc_prot_gs_15perc$GS_pos_pairs[[hnscc_prot_gsboth_kins_15perc[i]]] <- colnames(hnscc_prot_gs_15perc$outliers)[(hnscc_prot_gs_15perc$outliers[hnscc_prot_gsboth_kins_15perc[i], ] == 1) & !is.na(hnscc_prot_gs_15perc$outliers[hnscc_prot_gsboth_kins_15perc[i], ])] + hnscc_prot_gs_15perc$GS_neg_pairs[[hnscc_prot_gsboth_kins_15perc[i]]] <- colnames(hnscc_prot_gs_15perc$outliers)[(hnscc_prot_gs_15perc$outliers[hnscc_prot_gsboth_kins_15perc[i], ] == -1) & !is.na(hnscc_prot_gs_15perc$outliers[hnscc_prot_gsboth_kins_15perc[i], ])] +} + +lscc_prot_gsboth_kins_15perc <- rownames(lscc_prot_outliers_15perc$outliers[(rowSums(lscc_prot_outliers_15perc$outliers == -1, na.rm = T) > 0) & (rowSums(lscc_prot_outliers_15perc$outliers == 1, na.rm = T) > 0), ]) +lscc_prot_gs_15perc <- lscc_prot_outliers_15perc +for(i in 1:length(lscc_prot_gsboth_kins_15perc)){ + lscc_prot_gs_15perc$GS_pos_pairs[[lscc_prot_gsboth_kins_15perc[i]]] <- colnames(lscc_prot_gs_15perc$outliers)[(lscc_prot_gs_15perc$outliers[lscc_prot_gsboth_kins_15perc[i], ] == 1) & !is.na(lscc_prot_gs_15perc$outliers[lscc_prot_gsboth_kins_15perc[i], ])] + lscc_prot_gs_15perc$GS_neg_pairs[[lscc_prot_gsboth_kins_15perc[i]]] <- colnames(lscc_prot_gs_15perc$outliers)[(lscc_prot_gs_15perc$outliers[lscc_prot_gsboth_kins_15perc[i], ] == -1) & !is.na(lscc_prot_gs_15perc$outliers[lscc_prot_gsboth_kins_15perc[i], ])] +} + +luad_prot_gsboth_kins_15perc <- rownames(luad_prot_outliers_15perc$outliers[(rowSums(luad_prot_outliers_15perc$outliers == -1, na.rm = T) > 0) & (rowSums(luad_prot_outliers_15perc$outliers == 1, na.rm = T) > 0), ]) +luad_prot_gs_15perc <- luad_prot_outliers_15perc +for(i in 1:length(luad_prot_gsboth_kins_15perc)){ + luad_prot_gs_15perc$GS_pos_pairs[[luad_prot_gsboth_kins_15perc[i]]] <- colnames(luad_prot_gs_15perc$outliers)[(luad_prot_gs_15perc$outliers[luad_prot_gsboth_kins_15perc[i], ] == 1) & !is.na(luad_prot_gs_15perc$outliers[luad_prot_gsboth_kins_15perc[i], ])] + luad_prot_gs_15perc$GS_neg_pairs[[luad_prot_gsboth_kins_15perc[i]]] <- colnames(luad_prot_gs_15perc$outliers)[(luad_prot_gs_15perc$outliers[luad_prot_gsboth_kins_15perc[i], ] == -1) & !is.na(luad_prot_gs_15perc$outliers[luad_prot_gsboth_kins_15perc[i], ])] +} + +ucec_prot_gsboth_kins_15perc <- rownames(ucec_prot_outliers_15perc$outliers[(rowSums(ucec_prot_outliers_15perc$outliers == -1, na.rm = T) > 0) & (rowSums(ucec_prot_outliers_15perc$outliers == 1, na.rm = T) > 0), ]) +ucec_prot_gs_15perc <- ucec_prot_outliers_15perc +for(i in 1:length(ucec_prot_gsboth_kins_15perc)){ + ucec_prot_gs_15perc$GS_pos_pairs[[ucec_prot_gsboth_kins_15perc[i]]] <- colnames(ucec_prot_gs_15perc$outliers)[(ucec_prot_gs_15perc$outliers[ucec_prot_gsboth_kins_15perc[i], ] == 1) & !is.na(ucec_prot_gs_15perc$outliers[ucec_prot_gsboth_kins_15perc[i], ])] + ucec_prot_gs_15perc$GS_neg_pairs[[ucec_prot_gsboth_kins_15perc[i]]] <- colnames(ucec_prot_gs_15perc$outliers)[(ucec_prot_gs_15perc$outliers[ucec_prot_gsboth_kins_15perc[i], ] == -1) & !is.na(ucec_prot_gs_15perc$outliers[ucec_prot_gsboth_kins_15perc[i], ])] +} +``` + +```{r} +combinedGS_5per <- list(brca_prot_gs_5perc, ccrcc_prot_gs_5perc, gbm_prot_gs_5perc, hnscc_prot_gs_5perc, lscc_prot_gs_5perc, luad_prot_gs_5perc, ucec_prot_gs_5perc) +names(combinedGS_5per) <- c("BRCA", "CCRCC", "GBM", "HNSCC", "LSCC", "LUAD", "UCEC") +saveRDS(combinedGS_5per, "GSsets/protein_5percent.Rds") + +combinedGS_2pt5per <- list(brca_prot_gs_2pt5perc, ccrcc_prot_gs_2pt5perc, gbm_prot_gs_2pt5perc, hnscc_prot_gs_2pt5perc, lscc_prot_gs_2pt5perc, luad_prot_gs_2pt5perc, ucec_prot_gs_2pt5perc) +names(combinedGS_2pt5per) <- c("BRCA", "CCRCC", "GBM", "HNSCC", "LSCC", "LUAD", "UCEC") +saveRDS(combinedGS_2pt5per, "GSsets/protein_2pt5percent.Rds") + +combinedGS_10per <- list(brca_prot_gs_10perc, ccrcc_prot_gs_10perc, gbm_prot_gs_10perc, hnscc_prot_gs_10perc, lscc_prot_gs_10perc, luad_prot_gs_10perc, ucec_prot_gs_10perc) +names(combinedGS_10per) <- c("BRCA", "CCRCC", "GBM", "HNSCC", "LSCC", "LUAD", "UCEC") +saveRDS(combinedGS_10per, "GSsets/protein_10percent.Rds") + +combinedGS_15per <- list(brca_prot_gs_15perc, ccrcc_prot_gs_15perc, gbm_prot_gs_15perc, hnscc_prot_gs_15perc, lscc_prot_gs_15perc, luad_prot_gs_15perc, ucec_prot_gs_15perc) +names(combinedGS_15per) <- c("BRCA", "CCRCC", "GBM", "HNSCC", "LSCC", "LUAD", "UCEC") +saveRDS(combinedGS_15per, "GSsets/protein_15percent.Rds") +``` + + +II. construct activating site based benchmarks + +load brca phospho data, filter data to activating sites and identify outliers +```{r warning=FALSE} +brca_phos <- read.table("data_refresh/BRCA/BRCA_phospho_site_abundance_log2_reference_intensity_normalized_Tumor.txt", stringsAsFactors = F, header = T, sep = "\t") +brca_phos[brca_phos=="NaN"] <- NA +brca_phos[brca_phos== -Inf] <- NA + +brca_phos_kins <- brca_phos[sub("\\|.*", "", brca_phos$idx) %in% kins_mapped_act1$gene, ] +brca_phos_kins$gene <- kins_mapped_act1[sub("\\|.*", "", brca_phos_kins$idx), "gene_name"] +brca_phos_kins$site <- sub("^[^|]*\\|[^|]*\\|", "", brca_phos_kins$idx) +brca_phos_kins$site <- sub("\\|.*", "", brca_phos_kins$site) +brca_phos_kins$site <- paste0(brca_phos_kins$gene, "x", brca_phos_kins$site) + +unique(brca_phos_kins$gene) + +brca_phos_kins1 <- brca_phos_kins[brca_phos_kins$site %in% kinase_reg_sites_act$Site2, ] +rownames(brca_phos_kins1) <- brca_phos_kins1$site +brca_act_site_kins <- unique(brca_phos_kins1$gene) +brca_phos_kins1 <- brca_phos_kins1[ , !colnames(brca_phos_kins1) %in% c("idx","gene","site")] + +brca_act_site_outlier_df <- outlieR_plus(brca_phos_kins1, testing_samples = colnames(brca_phos_kins1), reference_samples = colnames(brca_phos_kins1), z_thresh = 1.645, z_method = "normal") +``` + +apply to other cancer types +```{r warning=FALSE} +ccrcc_phos <- read.table("data_refresh/CCRCC/CCRCC_phospho_site_abundance_log2_reference_intensity_normalized_Tumor.txt", stringsAsFactors = F, header = T, sep = "\t") +ccrcc_phos[ccrcc_phos=="NaN"] <- NA +ccrcc_phos[ccrcc_phos== -Inf] <- NA +ccrcc_phos_kins <- ccrcc_phos[sub("\\|.*", "", ccrcc_phos$idx) %in% kins_mapped_act1$gene, ] +ccrcc_phos_kins$gene <- kins_mapped_act1[sub("\\|.*", "", ccrcc_phos_kins$idx), "gene_name"] +ccrcc_phos_kins$site <- sub("^[^|]*\\|[^|]*\\|", "", ccrcc_phos_kins$idx) +ccrcc_phos_kins$site <- sub("\\|.*", "", ccrcc_phos_kins$site) +ccrcc_phos_kins$site <- paste0(ccrcc_phos_kins$gene, "x", ccrcc_phos_kins$site) +ccrcc_phos_kins1 <- ccrcc_phos_kins[ccrcc_phos_kins$site %in% kinase_reg_sites_act$Site2, ] +rownames(ccrcc_phos_kins1) <- ccrcc_phos_kins1$site +ccrcc_phos_kins1 <- ccrcc_phos_kins1[ , !colnames(ccrcc_phos_kins1) %in% c("idx","gene","site")] +ccrcc_act_site_outlier_df <- outlieR_plus(ccrcc_phos_kins1, testing_samples = colnames(ccrcc_phos_kins1), reference_samples = colnames(ccrcc_phos_kins1), z_thresh = 1.645, z_method = "normal") + +gbm_phos <- read.table("data_refresh/GBM/GBM_phospho_site_abundance_log2_reference_intensity_normalized_Tumor.txt", stringsAsFactors = F, header = T, sep = "\t") +gbm_phos[gbm_phos=="NaN"] <- NA +gbm_phos[gbm_phos==Inf] <- NA +gbm_phos[gbm_phos== -Inf] <- NA +gbm_phos_kins <- gbm_phos[sub("\\|.*", "", gbm_phos$idx) %in% kins_mapped_act1$gene, ] +gbm_phos_kins$gene <- kins_mapped_act1[sub("\\|.*", "", gbm_phos_kins$idx), "gene_name"] +gbm_phos_kins$site <- sub("^[^|]*\\|[^|]*\\|", "", gbm_phos_kins$idx) +gbm_phos_kins$site <- sub("\\|.*", "", gbm_phos_kins$site) +gbm_phos_kins$site <- paste0(gbm_phos_kins$gene, "x", gbm_phos_kins$site) +gbm_phos_kins1 <- gbm_phos_kins[gbm_phos_kins$site %in% kinase_reg_sites_act$Site2, ] +rownames(gbm_phos_kins1) <- gbm_phos_kins1$site +gbm_phos_kins1 <- gbm_phos_kins1[ , !colnames(gbm_phos_kins1) %in% c("idx","gene","site")] +gbm_act_site_outlier_df <- outlieR_plus(gbm_phos_kins1, testing_samples = colnames(gbm_phos_kins1), reference_samples = colnames(gbm_phos_kins1), z_thresh = 1.645, z_method = "normal") + +hnscc_phos <- read.table("data_refresh/HNSCC//HNSCC_phospho_site_abundance_log2_reference_intensity_normalized_Tumor.txt", stringsAsFactors = F, header = T, sep = "\t") +hnscc_phos[hnscc_phos=="NaN"] <- NA +hnscc_phos[hnscc_phos== -Inf] <- NA +hnscc_phos_kins <- hnscc_phos[sub("\\|.*", "", hnscc_phos$idx) %in% kins_mapped_act1$gene, ] +hnscc_phos_kins$gene <- kins_mapped_act1[sub("\\|.*", "", hnscc_phos_kins$idx), "gene_name"] +hnscc_phos_kins$site <- sub("^[^|]*\\|[^|]*\\|", "", hnscc_phos_kins$idx) +hnscc_phos_kins$site <- sub("\\|.*", "", hnscc_phos_kins$site) +hnscc_phos_kins$site <- paste0(hnscc_phos_kins$gene, "x", hnscc_phos_kins$site) +hnscc_phos_kins1 <- hnscc_phos_kins[hnscc_phos_kins$site %in% kinase_reg_sites_act$Site2, ] +rownames(hnscc_phos_kins1) <- hnscc_phos_kins1$site +hnscc_phos_kins1 <- hnscc_phos_kins1[ , !colnames(hnscc_phos_kins1) %in% c("idx","gene","site")] +hnscc_act_site_outlier_df <- outlieR_plus(hnscc_phos_kins1, testing_samples = colnames(hnscc_phos_kins1), reference_samples = colnames(hnscc_phos_kins1), z_thresh = 1.645, z_method = "normal") + +lscc_phos <- read.table("data_refresh/LSCC/LSCC_phospho_site_abundance_log2_reference_intensity_normalized_Tumor.txt", stringsAsFactors = F, header = T, sep = "\t") +lscc_phos[lscc_phos=="NaN"] <- NA +lscc_phos[lscc_phos== -Inf] <- NA +lscc_phos_kins <- lscc_phos[sub("\\|.*", "", lscc_phos$idx) %in% kins_mapped_act1$gene, ] +lscc_phos_kins$gene <- kins_mapped_act1[sub("\\|.*", "", lscc_phos_kins$idx), "gene_name"] +lscc_phos_kins$site <- sub("^[^|]*\\|[^|]*\\|", "", lscc_phos_kins$idx) +lscc_phos_kins$site <- sub("\\|.*", "", lscc_phos_kins$site) +lscc_phos_kins$site <- paste0(lscc_phos_kins$gene, "x", lscc_phos_kins$site) +lscc_phos_kins1 <- lscc_phos_kins[lscc_phos_kins$site %in% kinase_reg_sites_act$Site2, ] +rownames(lscc_phos_kins1) <- lscc_phos_kins1$site +lscc_phos_kins1 <- lscc_phos_kins1[ , !colnames(lscc_phos_kins1) %in% c("idx","gene","site")] +lscc_act_site_outlier_df <- outlieR_plus(lscc_phos_kins1, testing_samples = colnames(lscc_phos_kins1), reference_samples = colnames(lscc_phos_kins1), z_thresh = 1.645, z_method = "normal") + +luad_phos <- read.table("data_refresh/LUAD/LUAD_phospho_site_abundance_log2_reference_intensity_normalized_Tumor.txt", stringsAsFactors = F, header = T, sep = "\t") +#luad_phos2 <- read.table("phospho data/LUAD_phospho_site_abundance_log2_reference_intensity_normalized_isoform_adjusted_Tumor_no_inf.cct", stringsAsFactors = F, header = T, sep = "\t") +#colnames(luad_phos)[1] <- "idx" +luad_phos[luad_phos=="NaN"] <- NA +luad_phos[luad_phos== -Inf] <- NA +luad_phos_kins <- luad_phos[sub("\\|.*", "", luad_phos$idx) %in% kins_mapped_act1$gene, ] +luad_phos_kins$gene <- kins_mapped_act1[sub("\\|.*", "", luad_phos_kins$idx), "gene_name"] +luad_phos_kins$site <- sub("^[^|]*\\|[^|]*\\|", "", luad_phos_kins$idx) +luad_phos_kins$site <- sub("\\|.*", "", luad_phos_kins$site) +luad_phos_kins$site <- paste0(luad_phos_kins$gene, "x", luad_phos_kins$site) +luad_phos_kins1 <- luad_phos_kins[luad_phos_kins$site %in% kinase_reg_sites_act$Site2, ] +rownames(luad_phos_kins1) <- luad_phos_kins1$site +luad_phos_kins1 <- luad_phos_kins1[ , !colnames(luad_phos_kins1) %in% c("idx","gene","site")] +luad_act_site_outlier_df <- outlieR_plus(luad_phos_kins1, testing_samples = colnames(luad_phos_kins1), reference_samples = colnames(luad_phos_kins1), z_thresh = 1.645, z_method = "normal") + +ucec_phos <- read.table("data_refresh/UCEC/UCEC_phospho_site_abundance_log2_reference_intensity_normalized_Tumor.txt", stringsAsFactors = F, header = T, sep = "\t") +ucec_phos[ucec_phos=="NaN"] <- NA +ucec_phos[ucec_phos== -Inf] <- NA +ucec_phos_kins <- ucec_phos[sub("\\|.*", "", ucec_phos$idx) %in% kins_mapped_act1$gene, ] +ucec_phos_kins$gene <- kins_mapped_act1[sub("\\|.*", "", ucec_phos_kins$idx), "gene_name"] +ucec_phos_kins$site <- sub("^[^|]*\\|[^|]*\\|", "", ucec_phos_kins$idx) +ucec_phos_kins$site <- sub("\\|.*", "", ucec_phos_kins$site) +ucec_phos_kins$site <- paste0(ucec_phos_kins$gene, "x", ucec_phos_kins$site) +ucec_phos_kins1 <- ucec_phos_kins[ucec_phos_kins$site %in% kinase_reg_sites_act$Site2, ] +rownames(ucec_phos_kins1) <- ucec_phos_kins1$site +ucec_phos_kins1 <- ucec_phos_kins1[ , !colnames(ucec_phos_kins1) %in% c("idx","gene","site")] +ucec_act_site_outlier_df <- outlieR_plus(ucec_phos_kins1, testing_samples = colnames(ucec_phos_kins1), reference_samples = colnames(ucec_phos_kins1), z_thresh = 1.645, z_method = "normal") +``` + +filter data to sites with at least 30 measurements and variance > 0.2 +```{r warning=F} +brca_phos_kins_var <- apply(brca_phos_kins1, 1, var, na.rm=T) +brca_act_site_filtered <- intersect(rownames(brca_phos_kins1)[rowSums(!is.na(brca_phos_kins1)) >= 30], names(brca_phos_kins_var)[brca_phos_kins_var > 0.2]) +brca_actsite_outliers_5perc <- brca_act_site_outlier_df +brca_actsite_outliers_5perc$Zscores <- brca_actsite_outliers_5perc$Zscores[brca_act_site_filtered, ] +brca_actsite_outliers_5perc$DEscores <- brca_actsite_outliers_5perc$DEscores[brca_act_site_filtered, ] +brca_actsite_outliers_5perc$outliers <- brca_actsite_outliers_5perc$outliers[brca_act_site_filtered, ] +sum(rowSums(brca_actsite_outliers_5perc$outliers==1, na.rm = T)) +sum(rowSums(brca_actsite_outliers_5perc$outliers== -1, na.rm = T)) + +hnscc_phos_kins_var <- apply(hnscc_phos_kins1, 1, var, na.rm=T) +hnscc_act_site_filtered <- intersect(rownames(hnscc_phos_kins1)[rowSums(!is.na(hnscc_phos_kins1)) >= 30], names(hnscc_phos_kins_var)[hnscc_phos_kins_var > 0.2]) +hnscc_actsite_outliers_5perc <- hnscc_act_site_outlier_df +hnscc_actsite_outliers_5perc$Zscores <- hnscc_actsite_outliers_5perc$Zscores[hnscc_act_site_filtered, ] +hnscc_actsite_outliers_5perc$DEscores <- hnscc_actsite_outliers_5perc$DEscores[hnscc_act_site_filtered, ] +hnscc_actsite_outliers_5perc$outliers <- hnscc_actsite_outliers_5perc$outliers[hnscc_act_site_filtered, ] +sum(rowSums(hnscc_actsite_outliers_5perc$outliers==1, na.rm = T)) +sum(rowSums(hnscc_actsite_outliers_5perc$outliers== -1, na.rm = T)) + +ccrcc_phos_kins_var <- apply(ccrcc_phos_kins1, 1, var, na.rm=T) +ccrcc_act_site_filtered <- intersect(rownames(ccrcc_phos_kins1)[rowSums(!is.na(ccrcc_phos_kins1)) >= 30], names(ccrcc_phos_kins_var)[ccrcc_phos_kins_var > 0.2]) +ccrcc_actsite_outliers_5perc <- ccrcc_act_site_outlier_df +ccrcc_actsite_outliers_5perc$Zscores <- ccrcc_actsite_outliers_5perc$Zscores[ccrcc_act_site_filtered, ] +ccrcc_actsite_outliers_5perc$DEscores <- ccrcc_actsite_outliers_5perc$DEscores[ccrcc_act_site_filtered, ] +ccrcc_actsite_outliers_5perc$outliers <- ccrcc_actsite_outliers_5perc$outliers[ccrcc_act_site_filtered, ] +sum(rowSums(ccrcc_actsite_outliers_5perc$outliers==1, na.rm = T)) +sum(rowSums(ccrcc_actsite_outliers_5perc$outliers== -1, na.rm = T)) + +gbm_phos_kins_var <- apply(gbm_phos_kins1, 1, var, na.rm=T) +gbm_act_site_filtered <- intersect(rownames(gbm_phos_kins1)[rowSums(!is.na(gbm_phos_kins1)) >= 30], names(gbm_phos_kins_var)[gbm_phos_kins_var > 0.2]) +gbm_actsite_outliers_5perc <- gbm_act_site_outlier_df +gbm_actsite_outliers_5perc$Zscores <- gbm_actsite_outliers_5perc$Zscores[gbm_act_site_filtered, ] +gbm_actsite_outliers_5perc$DEscores <- gbm_actsite_outliers_5perc$DEscores[gbm_act_site_filtered, ] +gbm_actsite_outliers_5perc$outliers <- gbm_actsite_outliers_5perc$outliers[gbm_act_site_filtered, ] +sum(rowSums(gbm_actsite_outliers_5perc$outliers==1, na.rm = T)) +sum(rowSums(gbm_actsite_outliers_5perc$outliers== -1, na.rm = T)) + +lscc_phos_kins_var <- apply(lscc_phos_kins1, 1, var, na.rm=T) +lscc_act_site_filtered <- intersect(rownames(lscc_phos_kins1)[rowSums(!is.na(lscc_phos_kins1)) >= 30], names(lscc_phos_kins_var)[lscc_phos_kins_var > 0.2]) +lscc_actsite_outliers_5perc <- lscc_act_site_outlier_df +lscc_actsite_outliers_5perc$Zscores <- lscc_actsite_outliers_5perc$Zscores[lscc_act_site_filtered, ] +lscc_actsite_outliers_5perc$DEscores <- lscc_actsite_outliers_5perc$DEscores[lscc_act_site_filtered, ] +lscc_actsite_outliers_5perc$outliers <- lscc_actsite_outliers_5perc$outliers[lscc_act_site_filtered, ] +sum(rowSums(lscc_actsite_outliers_5perc$outliers==1, na.rm = T)) +sum(rowSums(lscc_actsite_outliers_5perc$outliers== -1, na.rm = T)) + +luad_phos_kins_var <- apply(luad_phos_kins1, 1, var, na.rm=T) +luad_act_site_filtered <- intersect(rownames(luad_phos_kins1)[rowSums(!is.na(luad_phos_kins1)) >= 30], names(luad_phos_kins_var)[luad_phos_kins_var > 0.2]) +luad_actsite_outliers_5perc <- luad_act_site_outlier_df +luad_actsite_outliers_5perc$Zscores <- luad_actsite_outliers_5perc$Zscores[luad_act_site_filtered, ] +luad_actsite_outliers_5perc$DEscores <- luad_actsite_outliers_5perc$DEscores[luad_act_site_filtered, ] +luad_actsite_outliers_5perc$outliers <- luad_actsite_outliers_5perc$outliers[luad_act_site_filtered, ] +sum(rowSums(luad_actsite_outliers_5perc$outliers==1, na.rm = T)) +sum(rowSums(luad_actsite_outliers_5perc$outliers== -1, na.rm = T)) + +ucec_phos_kins_var <- apply(ucec_phos_kins1, 1, var, na.rm=T) +ucec_act_site_filtered <- intersect(rownames(ucec_phos_kins1)[rowSums(!is.na(ucec_phos_kins1)) >= 30], names(ucec_phos_kins_var)[ucec_phos_kins_var > 0.2]) +ucec_actsite_outliers_5perc <- ucec_act_site_outlier_df +ucec_actsite_outliers_5perc$Zscores <- ucec_actsite_outliers_5perc$Zscores[ucec_act_site_filtered, ] +ucec_actsite_outliers_5perc$DEscores <- ucec_actsite_outliers_5perc$DEscores[ucec_act_site_filtered, ] +ucec_actsite_outliers_5perc$outliers <- ucec_actsite_outliers_5perc$outliers[ucec_act_site_filtered, ] +sum(rowSums(ucec_actsite_outliers_5perc$outliers==1, na.rm = T)) +sum(rowSums(ucec_actsite_outliers_5perc$outliers== -1, na.rm = T)) +``` + +repeat for 2.5% threshold +```{r} +brca_actsite_outliers_2pt5perc <- outlieR_plus(brca_phos_kins1, testing_samples = colnames(brca_phos_kins1), reference_samples = colnames(brca_phos_kins1), z_method = "normal", z_thresh = 1.96) +brca_actsite_outliers_2pt5perc$outliers <- brca_actsite_outliers_2pt5perc$outliers[brca_act_site_filtered, ] + +ccrcc_actsite_outliers_2pt5perc <- outlieR_plus(ccrcc_phos_kins1, testing_samples = colnames(ccrcc_phos_kins1), reference_samples = colnames(ccrcc_phos_kins1), z_method = "normal", z_thresh = 1.96) +ccrcc_actsite_outliers_2pt5perc$outliers <- ccrcc_actsite_outliers_2pt5perc$outliers[ccrcc_act_site_filtered, ] + +gbm_actsite_outliers_2pt5perc <- outlieR_plus(gbm_phos_kins1, testing_samples = colnames(gbm_phos_kins1), reference_samples = colnames(gbm_phos_kins1), z_method = "normal", z_thresh = 1.96) +gbm_actsite_outliers_2pt5perc$outliers <- gbm_actsite_outliers_2pt5perc$outliers[gbm_act_site_filtered, ] + +hnscc_actsite_outliers_2pt5perc <- outlieR_plus(hnscc_phos_kins1, testing_samples = colnames(hnscc_phos_kins1), reference_samples = colnames(hnscc_phos_kins1), z_method = "normal", z_thresh = 1.96) +hnscc_actsite_outliers_2pt5perc$outliers <- hnscc_actsite_outliers_2pt5perc$outliers[hnscc_act_site_filtered, ] + +lscc_actsite_outliers_2pt5perc <- outlieR_plus(lscc_phos_kins1, testing_samples = colnames(lscc_phos_kins1), reference_samples = colnames(lscc_phos_kins1), z_method = "normal", z_thresh = 1.96) +lscc_actsite_outliers_2pt5perc$outliers <- lscc_actsite_outliers_2pt5perc$outliers[lscc_act_site_filtered, ] + +luad_actsite_outliers_2pt5perc <- outlieR_plus(luad_phos_kins1, testing_samples = colnames(luad_phos_kins1), reference_samples = colnames(luad_phos_kins1), z_method = "normal", z_thresh = 1.96) +luad_actsite_outliers_2pt5perc$outliers <- luad_actsite_outliers_2pt5perc$outliers[luad_act_site_filtered, ] + +ucec_actsite_outliers_2pt5perc <- outlieR_plus(ucec_phos_kins1, testing_samples = colnames(ucec_phos_kins1), reference_samples = colnames(ucec_phos_kins1), z_method = "normal", z_thresh = 1.96) +ucec_actsite_outliers_2pt5perc$outliers <- ucec_actsite_outliers_2pt5perc$outliers[ucec_act_site_filtered, ] +``` + +repeat for 10% threshold +```{r} +brca_actsite_outliers_10perc <- outlieR_plus(brca_phos_kins1, testing_samples = colnames(brca_phos_kins1), reference_samples = colnames(brca_phos_kins1), z_method = "normal", z_thresh = 1.282) +brca_actsite_outliers_10perc$outliers <- brca_actsite_outliers_10perc$outliers[brca_act_site_filtered, ] + +ccrcc_actsite_outliers_10perc <- outlieR_plus(ccrcc_phos_kins1, testing_samples = colnames(ccrcc_phos_kins1), reference_samples = colnames(ccrcc_phos_kins1), z_method = "normal", z_thresh = 1.282) +ccrcc_actsite_outliers_10perc$outliers <- ccrcc_actsite_outliers_10perc$outliers[ccrcc_act_site_filtered, ] + +gbm_actsite_outliers_10perc <- outlieR_plus(gbm_phos_kins1, testing_samples = colnames(gbm_phos_kins1), reference_samples = colnames(gbm_phos_kins1), z_method = "normal", z_thresh = 1.282) +gbm_actsite_outliers_10perc$outliers <- gbm_actsite_outliers_10perc$outliers[gbm_act_site_filtered, ] + +hnscc_actsite_outliers_10perc <- outlieR_plus(hnscc_phos_kins1, testing_samples = colnames(hnscc_phos_kins1), reference_samples = colnames(hnscc_phos_kins1), z_method = "normal", z_thresh = 1.282) +hnscc_actsite_outliers_10perc$outliers <- hnscc_actsite_outliers_10perc$outliers[hnscc_act_site_filtered, ] + +lscc_actsite_outliers_10perc <- outlieR_plus(lscc_phos_kins1, testing_samples = colnames(lscc_phos_kins1), reference_samples = colnames(lscc_phos_kins1), z_method = "normal", z_thresh = 1.282) +lscc_actsite_outliers_10perc$outliers <- lscc_actsite_outliers_10perc$outliers[lscc_act_site_filtered, ] + +luad_actsite_outliers_10perc <- outlieR_plus(luad_phos_kins1, testing_samples = colnames(luad_phos_kins1), reference_samples = colnames(luad_phos_kins1), z_method = "normal", z_thresh = 1.282) +luad_actsite_outliers_10perc$outliers <- luad_actsite_outliers_10perc$outliers[luad_act_site_filtered, ] + +ucec_actsite_outliers_10perc <- outlieR_plus(ucec_phos_kins1, testing_samples = colnames(ucec_phos_kins1), reference_samples = colnames(ucec_phos_kins1), z_method = "normal", z_thresh = 1.282) +ucec_actsite_outliers_10perc$outliers <- ucec_actsite_outliers_10perc$outliers[ucec_act_site_filtered, ] +``` + +repeat for 15% threshold +```{r} +brca_actsite_outliers_15perc <- outlieR_plus(brca_phos_kins1, testing_samples = colnames(brca_phos_kins1), reference_samples = colnames(brca_phos_kins1), z_method = "normal", z_thresh = 1.036) +brca_actsite_outliers_15perc$outliers <- brca_actsite_outliers_15perc$outliers[brca_act_site_filtered, ] + +ccrcc_actsite_outliers_15perc <- outlieR_plus(ccrcc_phos_kins1, testing_samples = colnames(ccrcc_phos_kins1), reference_samples = colnames(ccrcc_phos_kins1), z_method = "normal", z_thresh = 1.036) +ccrcc_actsite_outliers_15perc$outliers <- ccrcc_actsite_outliers_15perc$outliers[ccrcc_act_site_filtered, ] + +gbm_actsite_outliers_15perc <- outlieR_plus(gbm_phos_kins1, testing_samples = colnames(gbm_phos_kins1), reference_samples = colnames(gbm_phos_kins1), z_method = "normal", z_thresh = 1.036) +gbm_actsite_outliers_15perc$outliers <- gbm_actsite_outliers_15perc$outliers[gbm_act_site_filtered, ] + +hnscc_actsite_outliers_15perc <- outlieR_plus(hnscc_phos_kins1, testing_samples = colnames(hnscc_phos_kins1), reference_samples = colnames(hnscc_phos_kins1), z_method = "normal", z_thresh = 1.036) +hnscc_actsite_outliers_15perc$outliers <- hnscc_actsite_outliers_15perc$outliers[hnscc_act_site_filtered, ] + +lscc_actsite_outliers_15perc <- outlieR_plus(lscc_phos_kins1, testing_samples = colnames(lscc_phos_kins1), reference_samples = colnames(lscc_phos_kins1), z_method = "normal", z_thresh = 1.036) +lscc_actsite_outliers_15perc$outliers <- lscc_actsite_outliers_15perc$outliers[lscc_act_site_filtered, ] + +luad_actsite_outliers_15perc <- outlieR_plus(luad_phos_kins1, testing_samples = colnames(luad_phos_kins1), reference_samples = colnames(luad_phos_kins1), z_method = "normal", z_thresh = 1.036) +luad_actsite_outliers_15perc$outliers <- luad_actsite_outliers_15perc$outliers[luad_act_site_filtered, ] + +ucec_actsite_outliers_15perc <- outlieR_plus(ucec_phos_kins1, testing_samples = colnames(ucec_phos_kins1), reference_samples = colnames(ucec_phos_kins1), z_method = "normal", z_thresh = 1.036) +ucec_actsite_outliers_15perc$outliers <- ucec_actsite_outliers_15perc$outliers[ucec_act_site_filtered, ] +``` + + +for kinases with multiple activating sites, only keep high/low outlier samples for a given site if data for other sites is concordant (mean of the other sites also in top/bottom quartile); function adds lists of GS-positive and GS-negative samples for multiple site outliers that meet criteria for inclusion to list for single site outliers +```{r} +multi_site_adj_GS_pairs <- function(filt_sites, extract_gs_filt){ + mult_sites <- filt_sites + names(mult_sites) <- filt_sites + mult_sites <- sub("x.*", "", mult_sites) + mult_sites_kins <- mult_sites[duplicated(mult_sites)] + mult_sites <- mult_sites[mult_sites %in% mult_sites_kins] + extract_gs_filt$GS_pos_pairs <- list() + extract_gs_filt$GS_neg_pairs <- list() + if(length(mult_sites_kins)>0){ + for(i in 1:length(mult_sites_kins)){ + outs <- character() + norms <- character() + multi_sites <- names(mult_sites[mult_sites==mult_sites_kins[i]]) + for(j in 1:length(multi_sites)){ + pot_outs <- colnames(extract_gs_filt$outliers[multi_sites[j], , drop=F])[extract_gs_filt$outliers[multi_sites[j], , drop=F]==1] + pot_norms <- colnames(extract_gs_filt$outliers[multi_sites[j], , drop=F])[extract_gs_filt$outliers[multi_sites[j], , drop=F]== -1] + pot_outs <- pot_outs[!is.na(pot_outs)] + pot_norms <- pot_norms[!is.na(pot_norms)] + for(k in 1:length(pot_outs)){ + if(mean(extract_gs_filt$Zscores[multi_sites[-j], pot_outs[k]], na.rm = T) >= 0.675 | is.na((mean(extract_gs_filt$Zscores[multi_sites[-j], pot_outs[k]], na.rm = T)))){ + outs <- c(outs, pot_outs[k]) + } + } + for(k in 1:length(pot_norms)){ + if(mean(extract_gs_filt$Zscores[multi_sites[-j], pot_norms[k]], na.rm = T) <= -0.675 | is.na((mean(extract_gs_filt$Zscores[multi_sites[-j], pot_norms[k]], na.rm = T)))){ + norms <- c(norms, pot_norms[k]) + } + } + } + extract_gs_filt$GS_pos_pairs[[mult_sites_kins[i]]] <- unique(outs[!is.na(outs)]) + extract_gs_filt$GS_neg_pairs[[mult_sites_kins[i]]] <- unique(norms[!is.na(norms)]) + } + } + sing_sites <- setdiff(filt_sites, names(mult_sites)) + for(i in 1:length(sing_sites)){ + pot_outs <- colnames(extract_gs_filt$outliers[sing_sites[i], , drop=F])[extract_gs_filt$outliers[sing_sites[i], , drop=F]==1] + extract_gs_filt$GS_pos_pairs[[sub("x.*","", sing_sites[i])]] <- pot_outs[!is.na(pot_outs)] + norms <- colnames(extract_gs_filt$outliers[sing_sites[i], , drop=F])[extract_gs_filt$outliers[sing_sites[i], , drop=F]== -1] + extract_gs_filt$GS_neg_pairs[[sub("x.*","", sing_sites[i])]] <- norms[!is.na(norms)] + } + return(extract_gs_filt) +} +``` + +apply to BRCA and apply to other cancer types +```{r warning=F} +brca_filt_sites_5perc <- rownames(brca_actsite_outliers_5perc$outliers)[rowSums(brca_actsite_outliers_5perc$outliers==1, na.rm = T) > 0 & rowSums(brca_actsite_outliers_5perc$outliers== -1, na.rm = T) > 0] +brca_filt_sites_5perc <- brca_filt_sites_5perc[order(brca_filt_sites_5perc)] +brca_actsite_outliers_5perc <- multi_site_adj_GS_pairs(brca_filt_sites_5perc, brca_actsite_outliers_5perc) + +brca_filt_sites_15perc <- rownames(brca_actsite_outliers_15perc$outliers)[rowSums(brca_actsite_outliers_15perc$outliers==1, na.rm = T) > 0 & rowSums(brca_actsite_outliers_15perc$outliers== -1, na.rm = T) > 0] +brca_filt_sites_15perc <- brca_filt_sites_15perc[order(brca_filt_sites_15perc)] +brca_actsite_outliers_15perc <- multi_site_adj_GS_pairs(brca_filt_sites_15perc, brca_actsite_outliers_15perc) + +ccrcc_filt_sites_5perc <- rownames(ccrcc_actsite_outliers_5perc$outliers)[rowSums(ccrcc_actsite_outliers_5perc$outliers==1, na.rm = T) > 0 & rowSums(ccrcc_actsite_outliers_5perc$outliers== -1, na.rm = T) > 0] +ccrcc_filt_sites_5perc <- ccrcc_filt_sites_5perc[order(ccrcc_filt_sites_5perc)] +ccrcc_actsite_outliers_5perc <- multi_site_adj_GS_pairs(ccrcc_filt_sites_5perc, ccrcc_actsite_outliers_5perc) + +ccrcc_filt_sites_15perc <- rownames(ccrcc_actsite_outliers_15perc$outliers)[rowSums(ccrcc_actsite_outliers_15perc$outliers==1, na.rm = T) > 0 & rowSums(ccrcc_actsite_outliers_15perc$outliers== -1, na.rm = T) > 0] +ccrcc_filt_sites_15perc <- ccrcc_filt_sites_15perc[order(ccrcc_filt_sites_15perc)] +ccrcc_actsite_outliers_15perc <- multi_site_adj_GS_pairs(ccrcc_filt_sites_15perc, ccrcc_actsite_outliers_15perc) + +gbm_filt_sites_5perc <- rownames(gbm_actsite_outliers_5perc$outliers)[rowSums(gbm_actsite_outliers_5perc$outliers==1, na.rm = T) > 0 & rowSums(gbm_actsite_outliers_5perc$outliers== -1, na.rm = T) > 0] +gbm_filt_sites_5perc <- gbm_filt_sites_5perc[order(gbm_filt_sites_5perc)] +gbm_actsite_outliers_5perc <- multi_site_adj_GS_pairs(gbm_filt_sites_5perc, gbm_actsite_outliers_5perc) + +gbm_filt_sites_15perc <- rownames(gbm_actsite_outliers_15perc$outliers)[rowSums(gbm_actsite_outliers_15perc$outliers==1, na.rm = T) > 0 & rowSums(gbm_actsite_outliers_15perc$outliers== -1, na.rm = T) > 0] +gbm_filt_sites_15perc <- gbm_filt_sites_15perc[order(gbm_filt_sites_15perc)] +gbm_actsite_outliers_15perc <- multi_site_adj_GS_pairs(gbm_filt_sites_15perc, gbm_actsite_outliers_15perc) + +hnscc_filt_sites_5perc <- rownames(hnscc_actsite_outliers_5perc$outliers)[rowSums(hnscc_actsite_outliers_5perc$outliers==1, na.rm = T) > 0 & rowSums(hnscc_actsite_outliers_5perc$outliers== -1, na.rm = T) > 0] +hnscc_filt_sites_5perc <- hnscc_filt_sites_5perc[order(hnscc_filt_sites_5perc)] +hnscc_actsite_outliers_5perc <- multi_site_adj_GS_pairs(hnscc_filt_sites_5perc, hnscc_actsite_outliers_5perc) + +hnscc_filt_sites_15perc <- rownames(hnscc_actsite_outliers_15perc$outliers)[rowSums(hnscc_actsite_outliers_15perc$outliers==1, na.rm = T) > 0 & rowSums(hnscc_actsite_outliers_15perc$outliers== -1, na.rm = T) > 0] +hnscc_filt_sites_15perc <- hnscc_filt_sites_15perc[order(hnscc_filt_sites_15perc)] +hnscc_actsite_outliers_15perc <- multi_site_adj_GS_pairs(hnscc_filt_sites_15perc, hnscc_actsite_outliers_15perc) + +lscc_filt_sites_5perc <- rownames(lscc_actsite_outliers_5perc$outliers)[rowSums(lscc_actsite_outliers_5perc$outliers==1, na.rm = T) > 0 & rowSums(lscc_actsite_outliers_5perc$outliers== -1, na.rm = T) > 0] +lscc_filt_sites_5perc <- lscc_filt_sites_5perc[order(lscc_filt_sites_5perc)] +lscc_actsite_outliers_5perc <- multi_site_adj_GS_pairs(lscc_filt_sites_5perc, lscc_actsite_outliers_5perc) + +lscc_filt_sites_15perc <- rownames(lscc_actsite_outliers_15perc$outliers)[rowSums(lscc_actsite_outliers_15perc$outliers==1, na.rm = T) > 0 & rowSums(lscc_actsite_outliers_15perc$outliers== -1, na.rm = T) > 0] +lscc_filt_sites_15perc <- lscc_filt_sites_15perc[order(lscc_filt_sites_15perc)] +lscc_actsite_outliers_15perc <- multi_site_adj_GS_pairs(lscc_filt_sites_15perc, lscc_actsite_outliers_15perc) + +luad_filt_sites_5perc <- rownames(luad_actsite_outliers_5perc$outliers)[rowSums(luad_actsite_outliers_5perc$outliers==1, na.rm = T) > 0 & rowSums(luad_actsite_outliers_5perc$outliers== -1, na.rm = T) > 0] +luad_filt_sites_5perc <- luad_filt_sites_5perc[order(luad_filt_sites_5perc)] +luad_actsite_outliers_5perc <- multi_site_adj_GS_pairs(luad_filt_sites_5perc, luad_actsite_outliers_5perc) + +luad_filt_sites_15perc <- rownames(luad_actsite_outliers_15perc$outliers)[rowSums(luad_actsite_outliers_15perc$outliers==1, na.rm = T) > 0 & rowSums(luad_actsite_outliers_15perc$outliers== -1, na.rm = T) > 0] +luad_filt_sites_15perc <- luad_filt_sites_15perc[order(luad_filt_sites_15perc)] +luad_actsite_outliers_15perc <- multi_site_adj_GS_pairs(luad_filt_sites_15perc, luad_actsite_outliers_15perc) + +ucec_filt_sites_5perc <- rownames(ucec_actsite_outliers_5perc$outliers)[rowSums(ucec_actsite_outliers_5perc$outliers==1, na.rm = T) > 0 & rowSums(ucec_actsite_outliers_5perc$outliers== -1, na.rm = T) > 0] +ucec_filt_sites_5perc <- ucec_filt_sites_5perc[order(ucec_filt_sites_5perc)] +ucec_actsite_outliers_5perc <- multi_site_adj_GS_pairs(ucec_filt_sites_5perc, ucec_actsite_outliers_5perc) + +ucec_filt_sites_15perc <- rownames(ucec_actsite_outliers_15perc$outliers)[rowSums(ucec_actsite_outliers_15perc$outliers==1, na.rm = T) > 0 & rowSums(ucec_actsite_outliers_15perc$outliers== -1, na.rm = T) > 0] +ucec_filt_sites_15perc <- ucec_filt_sites_15perc[order(ucec_filt_sites_15perc)] +ucec_actsite_outliers_15perc <- multi_site_adj_GS_pairs(ucec_filt_sites_15perc, ucec_actsite_outliers_15perc) +``` + +```{r} +brca_filt_sites_2pt5perc <- rownames(brca_actsite_outliers_2pt5perc$outliers)[rowSums(brca_actsite_outliers_2pt5perc$outliers==1, na.rm = T) > 0 & rowSums(brca_actsite_outliers_2pt5perc$outliers== -1, na.rm = T) > 0] +brca_filt_sites_2pt5perc <- brca_filt_sites_2pt5perc[order(brca_filt_sites_2pt5perc)] +brca_actsite_outliers_2pt5perc <- multi_site_adj_GS_pairs(brca_filt_sites_2pt5perc, brca_actsite_outliers_2pt5perc) + +brca_filt_sites_10perc <- rownames(brca_actsite_outliers_10perc$outliers)[rowSums(brca_actsite_outliers_10perc$outliers==1, na.rm = T) > 0 & rowSums(brca_actsite_outliers_10perc$outliers== -1, na.rm = T) > 0] +brca_filt_sites_10perc <- brca_filt_sites_10perc[order(brca_filt_sites_10perc)] +brca_actsite_outliers_10perc <- multi_site_adj_GS_pairs(brca_filt_sites_10perc, brca_actsite_outliers_10perc) + +ccrcc_filt_sites_2pt5perc <- rownames(ccrcc_actsite_outliers_2pt5perc$outliers)[rowSums(ccrcc_actsite_outliers_2pt5perc$outliers==1, na.rm = T) > 0 & rowSums(ccrcc_actsite_outliers_2pt5perc$outliers== -1, na.rm = T) > 0] +ccrcc_filt_sites_2pt5perc <- ccrcc_filt_sites_2pt5perc[order(ccrcc_filt_sites_2pt5perc)] +ccrcc_actsite_outliers_2pt5perc <- multi_site_adj_GS_pairs(ccrcc_filt_sites_2pt5perc, ccrcc_actsite_outliers_2pt5perc) +ccrcc_filt_sites_10perc <- rownames(ccrcc_actsite_outliers_10perc$outliers)[rowSums(ccrcc_actsite_outliers_10perc$outliers==1, na.rm = T) > 0 & rowSums(ccrcc_actsite_outliers_10perc$outliers== -1, na.rm = T) > 0] +ccrcc_filt_sites_10perc <- ccrcc_filt_sites_10perc[order(ccrcc_filt_sites_10perc)] +ccrcc_actsite_outliers_10perc <- multi_site_adj_GS_pairs(ccrcc_filt_sites_10perc, ccrcc_actsite_outliers_10perc) + +gbm_filt_sites_2pt5perc <- rownames(gbm_actsite_outliers_2pt5perc$outliers)[rowSums(gbm_actsite_outliers_2pt5perc$outliers==1, na.rm = T) > 0 & rowSums(gbm_actsite_outliers_2pt5perc$outliers== -1, na.rm = T) > 0] +gbm_filt_sites_2pt5perc <- gbm_filt_sites_2pt5perc[order(gbm_filt_sites_2pt5perc)] +gbm_actsite_outliers_2pt5perc <- multi_site_adj_GS_pairs(gbm_filt_sites_2pt5perc, gbm_actsite_outliers_2pt5perc) +gbm_filt_sites_10perc <- rownames(gbm_actsite_outliers_10perc$outliers)[rowSums(gbm_actsite_outliers_10perc$outliers==1, na.rm = T) > 0 & rowSums(gbm_actsite_outliers_10perc$outliers== -1, na.rm = T) > 0] +gbm_filt_sites_10perc <- gbm_filt_sites_10perc[order(gbm_filt_sites_10perc)] +gbm_actsite_outliers_10perc <- multi_site_adj_GS_pairs(gbm_filt_sites_10perc, gbm_actsite_outliers_10perc) + +hnscc_filt_sites_2pt5perc <- rownames(hnscc_actsite_outliers_2pt5perc$outliers)[rowSums(hnscc_actsite_outliers_2pt5perc$outliers==1, na.rm = T) > 0 & rowSums(hnscc_actsite_outliers_2pt5perc$outliers== -1, na.rm = T) > 0] +hnscc_filt_sites_2pt5perc <- hnscc_filt_sites_2pt5perc[order(hnscc_filt_sites_2pt5perc)] +hnscc_actsite_outliers_2pt5perc <- multi_site_adj_GS_pairs(hnscc_filt_sites_2pt5perc, hnscc_actsite_outliers_2pt5perc) +hnscc_filt_sites_10perc <- rownames(hnscc_actsite_outliers_10perc$outliers)[rowSums(hnscc_actsite_outliers_10perc$outliers==1, na.rm = T) > 0 & rowSums(hnscc_actsite_outliers_10perc$outliers== -1, na.rm = T) > 0] +hnscc_filt_sites_10perc <- hnscc_filt_sites_10perc[order(hnscc_filt_sites_10perc)] +hnscc_actsite_outliers_10perc <- multi_site_adj_GS_pairs(hnscc_filt_sites_10perc, hnscc_actsite_outliers_10perc) + +lscc_filt_sites_2pt5perc <- rownames(lscc_actsite_outliers_2pt5perc$outliers)[rowSums(lscc_actsite_outliers_2pt5perc$outliers==1, na.rm = T) > 0 & rowSums(lscc_actsite_outliers_2pt5perc$outliers== -1, na.rm = T) > 0] +lscc_filt_sites_2pt5perc <- lscc_filt_sites_2pt5perc[order(lscc_filt_sites_2pt5perc)] +lscc_actsite_outliers_2pt5perc <- multi_site_adj_GS_pairs(lscc_filt_sites_2pt5perc, lscc_actsite_outliers_2pt5perc) +lscc_filt_sites_10perc <- rownames(lscc_actsite_outliers_10perc$outliers)[rowSums(lscc_actsite_outliers_10perc$outliers==1, na.rm = T) > 0 & rowSums(lscc_actsite_outliers_10perc$outliers== -1, na.rm = T) > 0] +lscc_filt_sites_10perc <- lscc_filt_sites_10perc[order(lscc_filt_sites_10perc)] +lscc_actsite_outliers_10perc <- multi_site_adj_GS_pairs(lscc_filt_sites_10perc, lscc_actsite_outliers_10perc) + +luad_filt_sites_2pt5perc <- rownames(luad_actsite_outliers_2pt5perc$outliers)[rowSums(luad_actsite_outliers_2pt5perc$outliers==1, na.rm = T) > 0 & rowSums(luad_actsite_outliers_2pt5perc$outliers== -1, na.rm = T) > 0] +luad_filt_sites_2pt5perc <- luad_filt_sites_2pt5perc[order(luad_filt_sites_2pt5perc)] +luad_actsite_outliers_2pt5perc <- multi_site_adj_GS_pairs(luad_filt_sites_2pt5perc, luad_actsite_outliers_2pt5perc) +luad_filt_sites_10perc <- rownames(luad_actsite_outliers_10perc$outliers)[rowSums(luad_actsite_outliers_10perc$outliers==1, na.rm = T) > 0 & rowSums(luad_actsite_outliers_10perc$outliers== -1, na.rm = T) > 0] +luad_filt_sites_10perc <- luad_filt_sites_10perc[order(luad_filt_sites_10perc)] +luad_actsite_outliers_10perc <- multi_site_adj_GS_pairs(luad_filt_sites_10perc, luad_actsite_outliers_10perc) + +ucec_filt_sites_2pt5perc <- rownames(ucec_actsite_outliers_2pt5perc$outliers)[rowSums(ucec_actsite_outliers_2pt5perc$outliers==1, na.rm = T) > 0 & rowSums(ucec_actsite_outliers_2pt5perc$outliers== -1, na.rm = T) > 0] +ucec_filt_sites_2pt5perc <- ucec_filt_sites_2pt5perc[order(ucec_filt_sites_2pt5perc)] +ucec_actsite_outliers_2pt5perc <- multi_site_adj_GS_pairs(ucec_filt_sites_2pt5perc, ucec_actsite_outliers_2pt5perc) +ucec_filt_sites_10perc <- rownames(ucec_actsite_outliers_10perc$outliers)[rowSums(ucec_actsite_outliers_10perc$outliers==1, na.rm = T) > 0 & rowSums(ucec_actsite_outliers_10perc$outliers== -1, na.rm = T) > 0] +ucec_filt_sites_10perc <- ucec_filt_sites_10perc[order(ucec_filt_sites_10perc)] +ucec_actsite_outliers_10perc <- multi_site_adj_GS_pairs(ucec_filt_sites_10perc, ucec_actsite_outliers_10perc) +``` + + +want to make sure each kinase has both GS+ and GS- pairs in each dataset +```{r} +brca_gsboth_kins_5perc <- intersect(names(brca_actsite_outliers_5perc$GS_pos_pairs)[lengths(brca_actsite_outliers_5perc$GS_pos_pairs) > 0], names(brca_actsite_outliers_5perc$GS_neg_pairs)[lengths(brca_actsite_outliers_5perc$GS_neg_pairs) > 0]) +brca_gsboth_kins_15perc <- intersect(names(brca_actsite_outliers_15perc$GS_pos_pairs)[lengths(brca_actsite_outliers_15perc$GS_pos_pairs) > 0], names(brca_actsite_outliers_15perc$GS_neg_pairs)[lengths(brca_actsite_outliers_15perc$GS_neg_pairs) > 0]) + +ccrcc_gsboth_kins_5perc <- intersect(names(ccrcc_actsite_outliers_5perc$GS_pos_pairs)[lengths(ccrcc_actsite_outliers_5perc$GS_pos_pairs) > 0], names(ccrcc_actsite_outliers_5perc$GS_neg_pairs)[lengths(ccrcc_actsite_outliers_5perc$GS_neg_pairs) > 0]) +ccrcc_gsboth_kins_15perc <- intersect(names(ccrcc_actsite_outliers_15perc$GS_pos_pairs)[lengths(ccrcc_actsite_outliers_15perc$GS_pos_pairs) > 0], names(ccrcc_actsite_outliers_15perc$GS_neg_pairs)[lengths(ccrcc_actsite_outliers_15perc$GS_neg_pairs) > 0]) + +gbm_gsboth_kins_5perc <- intersect(names(gbm_actsite_outliers_5perc$GS_pos_pairs)[lengths(gbm_actsite_outliers_5perc$GS_pos_pairs) > 0], names(gbm_actsite_outliers_5perc$GS_neg_pairs)[lengths(gbm_actsite_outliers_5perc$GS_neg_pairs) > 0]) +gbm_gsboth_kins_15perc <- intersect(names(gbm_actsite_outliers_15perc$GS_pos_pairs)[lengths(gbm_actsite_outliers_15perc$GS_pos_pairs) > 0], names(gbm_actsite_outliers_15perc$GS_neg_pairs)[lengths(gbm_actsite_outliers_15perc$GS_neg_pairs) > 0]) + +hnscc_gsboth_kins_5perc <- intersect(names(hnscc_actsite_outliers_5perc$GS_pos_pairs)[lengths(hnscc_actsite_outliers_5perc$GS_pos_pairs) > 0], names(hnscc_actsite_outliers_5perc$GS_neg_pairs)[lengths(hnscc_actsite_outliers_5perc$GS_neg_pairs) > 0]) +hnscc_gsboth_kins_15perc <- intersect(names(hnscc_actsite_outliers_15perc$GS_pos_pairs)[lengths(hnscc_actsite_outliers_15perc$GS_pos_pairs) > 0], names(hnscc_actsite_outliers_15perc$GS_neg_pairs)[lengths(hnscc_actsite_outliers_15perc$GS_neg_pairs) > 0]) + +lscc_gsboth_kins_5perc <- intersect(names(lscc_actsite_outliers_5perc$GS_pos_pairs)[lengths(lscc_actsite_outliers_5perc$GS_pos_pairs) > 0], names(lscc_actsite_outliers_5perc$GS_neg_pairs)[lengths(lscc_actsite_outliers_5perc$GS_neg_pairs) > 0]) +lscc_gsboth_kins_15perc <- intersect(names(lscc_actsite_outliers_15perc$GS_pos_pairs)[lengths(lscc_actsite_outliers_15perc$GS_pos_pairs) > 0], names(lscc_actsite_outliers_15perc$GS_neg_pairs)[lengths(lscc_actsite_outliers_15perc$GS_neg_pairs) > 0]) + +luad_gsboth_kins_5perc <- intersect(names(luad_actsite_outliers_5perc$GS_pos_pairs)[lengths(luad_actsite_outliers_5perc$GS_pos_pairs) > 0], names(luad_actsite_outliers_5perc$GS_neg_pairs)[lengths(luad_actsite_outliers_5perc$GS_neg_pairs) > 0]) +luad_gsboth_kins_15perc <- intersect(names(luad_actsite_outliers_15perc$GS_pos_pairs)[lengths(luad_actsite_outliers_15perc$GS_pos_pairs) > 0], names(luad_actsite_outliers_15perc$GS_neg_pairs)[lengths(luad_actsite_outliers_15perc$GS_neg_pairs) > 0]) + +ucec_gsboth_kins_5perc <- intersect(names(ucec_actsite_outliers_5perc$GS_pos_pairs)[lengths(ucec_actsite_outliers_5perc$GS_pos_pairs) > 0], names(ucec_actsite_outliers_5perc$GS_neg_pairs)[lengths(ucec_actsite_outliers_5perc$GS_neg_pairs) > 0]) +ucec_gsboth_kins_15perc <- intersect(names(ucec_actsite_outliers_15perc$GS_pos_pairs)[lengths(ucec_actsite_outliers_15perc$GS_pos_pairs) > 0], names(ucec_actsite_outliers_15perc$GS_neg_pairs)[lengths(ucec_actsite_outliers_15perc$GS_neg_pairs) > 0]) +``` + +```{r} +brca_gsboth_kins_2pt5perc <- intersect(names(brca_actsite_outliers_2pt5perc$GS_pos_pairs)[lengths(brca_actsite_outliers_2pt5perc$GS_pos_pairs) > 0], names(brca_actsite_outliers_2pt5perc$GS_neg_pairs)[lengths(brca_actsite_outliers_2pt5perc$GS_neg_pairs) > 0]) +brca_gsboth_kins_10perc <- intersect(names(brca_actsite_outliers_10perc$GS_pos_pairs)[lengths(brca_actsite_outliers_10perc$GS_pos_pairs) > 0], names(brca_actsite_outliers_10perc$GS_neg_pairs)[lengths(brca_actsite_outliers_10perc$GS_neg_pairs) > 0]) + +ccrcc_gsboth_kins_2pt5perc <- intersect(names(ccrcc_actsite_outliers_2pt5perc$GS_pos_pairs)[lengths(ccrcc_actsite_outliers_2pt5perc$GS_pos_pairs) > 0], names(ccrcc_actsite_outliers_2pt5perc$GS_neg_pairs)[lengths(ccrcc_actsite_outliers_2pt5perc$GS_neg_pairs) > 0]) +ccrcc_gsboth_kins_10perc <- intersect(names(ccrcc_actsite_outliers_10perc$GS_pos_pairs)[lengths(ccrcc_actsite_outliers_10perc$GS_pos_pairs) > 0], names(ccrcc_actsite_outliers_10perc$GS_neg_pairs)[lengths(ccrcc_actsite_outliers_10perc$GS_neg_pairs) > 0]) + +gbm_gsboth_kins_2pt5perc <- intersect(names(gbm_actsite_outliers_2pt5perc$GS_pos_pairs)[lengths(gbm_actsite_outliers_2pt5perc$GS_pos_pairs) > 0], names(gbm_actsite_outliers_2pt5perc$GS_neg_pairs)[lengths(gbm_actsite_outliers_2pt5perc$GS_neg_pairs) > 0]) +gbm_gsboth_kins_10perc <- intersect(names(gbm_actsite_outliers_10perc$GS_pos_pairs)[lengths(gbm_actsite_outliers_10perc$GS_pos_pairs) > 0], names(gbm_actsite_outliers_10perc$GS_neg_pairs)[lengths(gbm_actsite_outliers_10perc$GS_neg_pairs) > 0]) + +hnscc_gsboth_kins_2pt5perc <- intersect(names(hnscc_actsite_outliers_2pt5perc$GS_pos_pairs)[lengths(hnscc_actsite_outliers_2pt5perc$GS_pos_pairs) > 0], names(hnscc_actsite_outliers_2pt5perc$GS_neg_pairs)[lengths(hnscc_actsite_outliers_2pt5perc$GS_neg_pairs) > 0]) +hnscc_gsboth_kins_10perc <- intersect(names(hnscc_actsite_outliers_10perc$GS_pos_pairs)[lengths(hnscc_actsite_outliers_10perc$GS_pos_pairs) > 0], names(hnscc_actsite_outliers_10perc$GS_neg_pairs)[lengths(hnscc_actsite_outliers_10perc$GS_neg_pairs) > 0]) + +lscc_gsboth_kins_2pt5perc <- intersect(names(lscc_actsite_outliers_2pt5perc$GS_pos_pairs)[lengths(lscc_actsite_outliers_2pt5perc$GS_pos_pairs) > 0], names(lscc_actsite_outliers_2pt5perc$GS_neg_pairs)[lengths(lscc_actsite_outliers_2pt5perc$GS_neg_pairs) > 0]) +lscc_gsboth_kins_10perc <- intersect(names(lscc_actsite_outliers_10perc$GS_pos_pairs)[lengths(lscc_actsite_outliers_10perc$GS_pos_pairs) > 0], names(lscc_actsite_outliers_10perc$GS_neg_pairs)[lengths(lscc_actsite_outliers_10perc$GS_neg_pairs) > 0]) + +luad_gsboth_kins_2pt5perc <- intersect(names(luad_actsite_outliers_2pt5perc$GS_pos_pairs)[lengths(luad_actsite_outliers_2pt5perc$GS_pos_pairs) > 0], names(luad_actsite_outliers_2pt5perc$GS_neg_pairs)[lengths(luad_actsite_outliers_2pt5perc$GS_neg_pairs) > 0]) +luad_gsboth_kins_10perc <- intersect(names(luad_actsite_outliers_10perc$GS_pos_pairs)[lengths(luad_actsite_outliers_10perc$GS_pos_pairs) > 0], names(luad_actsite_outliers_10perc$GS_neg_pairs)[lengths(luad_actsite_outliers_10perc$GS_neg_pairs) > 0]) + +ucec_gsboth_kins_2pt5perc <- intersect(names(ucec_actsite_outliers_2pt5perc$GS_pos_pairs)[lengths(ucec_actsite_outliers_2pt5perc$GS_pos_pairs) > 0], names(ucec_actsite_outliers_2pt5perc$GS_neg_pairs)[lengths(ucec_actsite_outliers_2pt5perc$GS_neg_pairs) > 0]) +ucec_gsboth_kins_10perc <- intersect(names(ucec_actsite_outliers_10perc$GS_pos_pairs)[lengths(ucec_actsite_outliers_10perc$GS_pos_pairs) > 0], names(ucec_actsite_outliers_10perc$GS_neg_pairs)[lengths(ucec_actsite_outliers_10perc$GS_neg_pairs) > 0]) +``` + +```{r} +combinedGS_5per_act <- list(brca_gsboth_kins_5perc, ccrcc_gsboth_kins_5perc, gbm_gsboth_kins_5perc, hnscc_gsboth_kins_5perc, lscc_gsboth_kins_5perc, luad_gsboth_kins_5perc, ucec_gsboth_kins_5perc) +names(combinedGS_5per_act) <- c("BRCA", "CCRCC", "GBM", "HNSCC", "LSCC", "LUAD", "UCEC") +saveRDS(combinedGS_5per_act, "GSsets/protein_5percent.Rds") + +combinedGS_2pt5per_act <- list(brca_gsboth_kins_2pt5perc, ccrcc_gsboth_kins_2pt5perc, gbm_gsboth_kins_2pt5perc, hnscc_gsboth_kins_2pt5perc, lscc_gsboth_kins_2pt5perc, luad_gsboth_kins_2pt5perc, ucec_gsboth_kins_2pt5perc) +names(combinedGS_2pt5per_act) <- c("BRCA", "CCRCC", "GBM", "HNSCC", "LSCC", "LUAD", "UCEC") +saveRDS(combinedGS_2pt5per_act, "GSsets/protein_2pt5percent.Rds") + +combinedGS_10per_act <- list(brca_gsboth_kins_10perc, ccrcc_gsboth_kins_10perc, gbm_gsboth_kins_10perc, hnscc_gsboth_kins_10perc, lscc_gsboth_kins_10perc, luad_gsboth_kins_10perc, ucec_gsboth_kins_10perc) +names(combinedGS_10per_act) <- c("BRCA", "CCRCC", "GBM", "HNSCC", "LSCC", "LUAD", "UCEC") +saveRDS(combinedGS_10per_act, "GSsets/protein_10percent.Rds") + +combinedGS_15per_act <- list(brca_gsboth_kins_15perc, ccrcc_gsboth_kins_15perc, gbm_gsboth_kins_15perc, hnscc_gsboth_kins_15perc, lscc_gsboth_kins_15perc, luad_gsboth_kins_15perc, ucec_gsboth_kins_15perc) +names(combinedGS_15per_act) <- c("BRCA", "CCRCC", "GBM", "HNSCC", "LSCC", "LUAD", "UCEC") +saveRDS(combinedGS_15per_act, "GSsets/protein_15percent.Rds") +``` + +```{r} +save.image("KIA_benchmarking_defGSset_v7_latest_ckpt1.rda") +``` +```{r} +load("KIA_benchmarking_defGSset_v7_latest_ckpt1.rda") +``` + +For the rest: extract code to generate the summary tables and delete rest (TBD) + +RESUME HERE: + +make figure 1 table: numbers of tumors, kinases, GS pairs for each cancer type + +1%, 5%, 15% thresholds +```{r} +fig_1_tab <- matrix(NA, nrow = 10, ncol = 10, dimnames = list(c("BRCA","CCRCC","COAD","GBM","HNSCC","LSCC","LUAD","OV","PDAC","UCEC"), c("No. Tumors", "Kinases (1% threshold)", "GS+ Kinase-Tumor Pairs (1% threshold)", "GS- Kinase-Tumor Pairs (1% threshold)", "Kinases (5% threshold)", "GS+ Kinase-Tumor Pairs (5% threshold)", "GS- Kinase-Tumor Pairs (5% threshold)", "Kinases (15% threshold)", "GS+ Kinase-Tumor Pairs (15% threshold)", "GS- Kinase-Tumor Pairs (15% threshold)"))) +fig_1_tab[1, 1] <- ncol(brca_phos_kins1) +fig_1_tab[2, 1] <- ncol(ccrcc_phos_kins1) +fig_1_tab[3, 1] <- ncol(coad_phos_kins1) +fig_1_tab[4, 1] <- ncol(gbm_phos_kins1) +fig_1_tab[5, 1] <- ncol(hnscc_phos_kins1) +fig_1_tab[6, 1] <- ncol(lscc_phos_kins1) +fig_1_tab[7, 1] <- ncol(luad_phos_kins1) +fig_1_tab[8, 1] <- ncol(ov_phos_kins1) +fig_1_tab[9, 1] <- ncol(pdac_phos_kins1) +fig_1_tab[10, 1] <- ncol(ucec_phos_kins1) + +fig_1_tab[1, 2] <- length(brca_gsboth_kins_1per) +fig_1_tab[2, 2] <- length(ccrcc_gsboth_kins_1per) +fig_1_tab[3, 2] <- length(coad_gsboth_kins_1per) +fig_1_tab[4, 2] <- length(gbm_gsboth_kins_1per) +fig_1_tab[5, 2] <- length(hnscc_gsboth_kins_1per) +fig_1_tab[6, 2] <- length(lscc_gsboth_kins_1per) +fig_1_tab[7, 2] <- length(luad_gsboth_kins_1per) +fig_1_tab[8, 2] <- length(ov_gsboth_kins_1per) +fig_1_tab[9, 2] <- length(pdac_gsboth_kins_1per) +fig_1_tab[10,2] <- length(ucec_gsboth_kins_1per) + +fig_1_tab[1, 3] <- sum(lengths(brca_extract_gs_filt_1per2$GS_pos_pairs[brca_gsboth_kins_1per])) +fig_1_tab[2, 3] <- sum(lengths(coad_extract_gs_filt_1per2$GS_pos_pairs[coad_gsboth_kins_1per])) +fig_1_tab[3, 3] <- sum(lengths(ccrcc_extract_gs_filt_1per2$GS_pos_pairs[ccrcc_gsboth_kins_1per])) +fig_1_tab[4, 3] <- sum(lengths(gbm_extract_gs_filt_1per2$GS_pos_pairs[gbm_gsboth_kins_1per])) +fig_1_tab[5, 3] <- sum(lengths(hnscc_extract_gs_filt_1per2$GS_pos_pairs[hnscc_gsboth_kins_1per])) +fig_1_tab[6, 3] <- sum(lengths(lscc_extract_gs_filt_1per2$GS_pos_pairs[lscc_gsboth_kins_1per])) +fig_1_tab[7, 3] <- sum(lengths(luad_extract_gs_filt_1per2$GS_pos_pairs[luad_gsboth_kins_1per])) +fig_1_tab[8, 3] <- sum(lengths(ov_extract_gs_filt_1per2$GS_pos_pairs[ov_gsboth_kins_1per])) +fig_1_tab[9, 3] <- sum(lengths(pdac_extract_gs_filt_1per2$GS_pos_pairs[pdac_gsboth_kins_1per])) +fig_1_tab[10,3] <- sum(lengths(ucec_extract_gs_filt_1per2$GS_pos_pairs[ucec_gsboth_kins_1per])) + +fig_1_tab[1, 4] <- sum(lengths(brca_extract_gs_filt_1per2$GS_neg_pairs[brca_gsboth_kins_1per])) +fig_1_tab[2, 4] <- sum(lengths(coad_extract_gs_filt_1per2$GS_neg_pairs[coad_gsboth_kins_1per])) +fig_1_tab[4, 4] <- sum(lengths(ccrcc_extract_gs_filt_1per2$GS_neg_pairs[ccrcc_gsboth_kins_1per])) +fig_1_tab[4, 4] <- sum(lengths(gbm_extract_gs_filt_1per2$GS_neg_pairs[gbm_gsboth_kins_1per])) +fig_1_tab[5, 4] <- sum(lengths(hnscc_extract_gs_filt_1per2$GS_neg_pairs[hnscc_gsboth_kins_1per])) +fig_1_tab[6, 4] <- sum(lengths(lscc_extract_gs_filt_1per2$GS_neg_pairs[lscc_gsboth_kins_1per])) +fig_1_tab[7, 4] <- sum(lengths(luad_extract_gs_filt_1per2$GS_neg_pairs[luad_gsboth_kins_1per])) +fig_1_tab[8, 4] <- sum(lengths(ov_extract_gs_filt_1per2$GS_neg_pairs[ov_gsboth_kins_1per])) +fig_1_tab[9, 4] <- sum(lengths(pdac_extract_gs_filt_1per2$GS_neg_pairs[pdac_gsboth_kins_1per])) +fig_1_tab[10,4] <- sum(lengths(ucec_extract_gs_filt_1per2$GS_neg_pairs[ucec_gsboth_kins_1per])) + +fig_1_tab[1, 5] <- length(brca_gsboth_kins) +fig_1_tab[2, 5] <- length(ccrcc_gsboth_kins) +fig_1_tab[3, 5] <- length(coad_gsboth_kins) +fig_1_tab[4, 5] <- length(gbm_gsboth_kins) +fig_1_tab[5, 5] <- length(hnscc_gsboth_kins) +fig_1_tab[6, 5] <- length(lscc_gsboth_kins) +fig_1_tab[7, 5] <- length(luad_gsboth_kins) +fig_1_tab[8, 5] <- length(ov_gsboth_kins) +fig_1_tab[9, 5] <- length(pdac_gsboth_kins) +fig_1_tab[10,5] <- length(ucec_gsboth_kins) + +fig_1_tab[1, 6] <- sum(lengths(brca_GS_pos)) +fig_1_tab[2, 6] <- sum(lengths(ccrcc_GS_pos)) +fig_1_tab[3, 6] <- sum(lengths(coad_GS_pos)) +fig_1_tab[4, 6] <- sum(lengths(gbm_GS_pos)) +fig_1_tab[5, 6] <- sum(lengths(hnscc_GS_pos)) +fig_1_tab[6, 6] <- sum(lengths(lscc_GS_pos)) +fig_1_tab[7, 6] <- sum(lengths(luad_GS_pos)) +fig_1_tab[8, 6] <- sum(lengths(ov_GS_pos)) +fig_1_tab[9, 6] <- sum(lengths(pdac_GS_pos)) +fig_1_tab[10,6] <- sum(lengths(ucec_GS_pos)) + +fig_1_tab[1, 7] <- sum(lengths(brca_GS_neg)) +fig_1_tab[2, 7] <- sum(lengths(ccrcc_GS_neg)) +fig_1_tab[3, 7] <- sum(lengths(coad_GS_neg)) +fig_1_tab[4, 7] <- sum(lengths(gbm_GS_neg)) +fig_1_tab[5, 7] <- sum(lengths(hnscc_GS_neg)) +fig_1_tab[6, 7] <- sum(lengths(lscc_GS_neg)) +fig_1_tab[7, 7] <- sum(lengths(luad_GS_neg)) +fig_1_tab[8, 7] <- sum(lengths(ov_GS_neg)) +fig_1_tab[9, 7] <- sum(lengths(pdac_GS_neg)) +fig_1_tab[10,7] <- sum(lengths(ucec_GS_neg)) + +fig_1_tab[1, 8] <- length(brca_gsboth_kins_15per) +fig_1_tab[2, 8] <- length(ccrcc_gsboth_kins_15per) +fig_1_tab[3, 8] <- length(coad_gsboth_kins_15per) +fig_1_tab[4, 8] <- length(gbm_gsboth_kins_15per) +fig_1_tab[5, 8] <- length(hnscc_gsboth_kins_15per) +fig_1_tab[6, 8] <- length(lscc_gsboth_kins_15per) +fig_1_tab[7, 8] <- length(luad_gsboth_kins_15per) +fig_1_tab[8, 8] <- length(ov_gsboth_kins_15per) +fig_1_tab[9, 8] <- length(pdac_gsboth_kins_15per) +fig_1_tab[10,8] <- length(ucec_gsboth_kins_15per) + +fig_1_tab[1, 9] <- sum(lengths(brca_GS_pos_15per)) +fig_1_tab[2, 9] <- sum(lengths(ccrcc_GS_pos_15per)) +fig_1_tab[3, 9] <- sum(lengths(coad_GS_pos_15per)) +fig_1_tab[4, 9] <- sum(lengths(gbm_GS_pos_15per)) +fig_1_tab[5, 9] <- sum(lengths(hnscc_GS_pos_15per)) +fig_1_tab[6, 9] <- sum(lengths(lscc_GS_pos_15per)) +fig_1_tab[7, 9] <- sum(lengths(luad_GS_pos_15per)) +fig_1_tab[8, 9] <- sum(lengths(ov_GS_pos_15per)) +fig_1_tab[9, 9] <- sum(lengths(pdac_GS_pos_15per)) +fig_1_tab[10,9] <- sum(lengths(ucec_GS_pos_15per)) + +fig_1_tab[1, 10] <- sum(lengths(brca_GS_neg_15per)) +fig_1_tab[2, 10] <- sum(lengths(ccrcc_GS_neg_15per)) +fig_1_tab[3, 10] <- sum(lengths(coad_GS_neg_15per)) +fig_1_tab[4, 10] <- sum(lengths(gbm_GS_neg_15per)) +fig_1_tab[5, 10] <- sum(lengths(hnscc_GS_neg_15per)) +fig_1_tab[6, 10] <- sum(lengths(lscc_GS_neg_15per)) +fig_1_tab[7, 10] <- sum(lengths(luad_GS_neg_15per)) +fig_1_tab[8, 10] <- sum(lengths(ov_GS_neg_15per)) +fig_1_tab[9, 10] <- sum(lengths(pdac_GS_neg_15per)) +fig_1_tab[10,10] <- sum(lengths(ucec_GS_neg_15per)) + +fig_1_tab <- rbind(fig_1_tab, colSums(fig_1_tab)) +rownames(fig_1_tab)[nrow(fig_1_tab)] <- "Total" +all_kins1 <- unique(c(brca_gsboth_kins_1per, ccrcc_gsboth_kins_1per, coad_gsboth_kins_1per, gbm_gsboth_kins_1per, hnscc_gsboth_kins_1per, lscc_gsboth_kins_1per, luad_gsboth_kins_1per, ov_gsboth_kins_1per, pdac_gsboth_kins_1per, ucec_gsboth_kins_1per)) +fig_1_tab[11, 2] <- length(all_kins1) +all_kins <- unique(c(brca_gsboth_kins, ccrcc_gsboth_kins, coad_gsboth_kins, gbm_gsboth_kins, hnscc_gsboth_kins, lscc_gsboth_kins, luad_gsboth_kins, ov_gsboth_kins, pdac_gsboth_kins, ucec_gsboth_kins)) +fig_1_tab[11, 5] <- length(all_kins) +all_kins10 <- unique(c(brca_gsboth_kins_15per, ccrcc_gsboth_kins_15per, coad_gsboth_kins_15per, gbm_gsboth_kins_15per, hnscc_gsboth_kins_15per, lscc_gsboth_kins_15per, luad_gsboth_kins_15per, ov_gsboth_kins_15per, pdac_gsboth_kins_15per, ucec_gsboth_kins_15per)) +fig_1_tab[11, 8] <- length(all_kins10) + +fig_1_tab <- as.data.frame(fig_1_tab) + +fig_1_tab$`Kinase List (1%)` <- NA +fig_1_tab[1, 11] <- paste0(brca_gsboth_kins_1per, collapse = ",") +fig_1_tab[2, 11] <- paste0(ccrcc_gsboth_kins_1per, collapse = ",") +fig_1_tab[3, 11] <- paste0(coad_gsboth_kins_1per, collapse = ",") +fig_1_tab[4, 11] <- paste0(gbm_gsboth_kins_1per, collapse = ",") +fig_1_tab[5, 11] <- paste0(hnscc_gsboth_kins_1per, collapse = ",") +fig_1_tab[6, 11] <- paste0(lscc_gsboth_kins_1per, collapse = ",") +fig_1_tab[7, 11] <- paste0(luad_gsboth_kins_1per, collapse = ",") +fig_1_tab[8, 11] <- paste0(ov_gsboth_kins_1per, collapse = ",") +fig_1_tab[9, 11] <- paste0(pdac_gsboth_kins_1per, collapse = ",") +fig_1_tab[10, 11] <- paste0(ucec_gsboth_kins_1per, collapse = ",") +fig_1_tab[11, 11] <- paste0(all_kins, collapse = ",") + +fig_1_tab$`Kinase List (5%)` <- NA +fig_1_tab[1, 12] <- paste0(brca_gsboth_kins, collapse = ",") +fig_1_tab[2, 12] <- paste0(ccrcc_gsboth_kins, collapse = ",") +fig_1_tab[3, 12] <- paste0(coad_gsboth_kins, collapse = ",") +fig_1_tab[4, 12] <- paste0(gbm_gsboth_kins, collapse = ",") +fig_1_tab[5, 12] <- paste0(hnscc_gsboth_kins, collapse = ",") +fig_1_tab[6, 12] <- paste0(lscc_gsboth_kins, collapse = ",") +fig_1_tab[7, 12] <- paste0(luad_gsboth_kins, collapse = ",") +fig_1_tab[8, 12] <- paste0(ov_gsboth_kins, collapse = ",") +fig_1_tab[9, 12] <- paste0(pdac_gsboth_kins, collapse = ",") +fig_1_tab[10, 12] <- paste0(ucec_gsboth_kins, collapse = ",") +fig_1_tab[11, 12] <- paste0(all_kins, collapse = ",") + +fig_1_tab$`Kinase List (20%)` <- NA +fig_1_tab[1, 13] <- paste0(brca_gsboth_kins_15per, collapse = ",") +fig_1_tab[2, 13] <- paste0(ccrcc_gsboth_kins_15per, collapse = ",") +fig_1_tab[3, 13] <- paste0(coad_gsboth_kins_15per, collapse = ",") +fig_1_tab[4, 13] <- paste0(gbm_gsboth_kins_15per, collapse = ",") +fig_1_tab[5, 13] <- paste0(hnscc_gsboth_kins_15per, collapse = ",") +fig_1_tab[6, 13] <- paste0(lscc_gsboth_kins_15per, collapse = ",") +fig_1_tab[7, 13] <- paste0(luad_gsboth_kins_15per, collapse = ",") +fig_1_tab[8, 13] <- paste0(ov_gsboth_kins_15per, collapse = ",") +fig_1_tab[9, 13] <- paste0(pdac_gsboth_kins_15per, collapse = ",") +fig_1_tab[10, 13] <- paste0(ucec_gsboth_kins_15per, collapse = ",") +fig_1_tab[11, 13] <- paste0(all_kins10, collapse = ",") +``` + +alt version: 2%, 10%, 20% thresholds +```{r} +fig_1_tab <- matrix(NA, nrow = 10, ncol = 10, dimnames = list(c("BRCA","CCRCC","COAD","GBM","HNSCC","LSCC","LUAD","OV","PDAC","UCEC"), c("Tumors", "Kinases (2% threshold)", "GS+ Kinase-Tumor Pairs (2% threshold)", "GS- Kinase-Tumor Pairs (2% threshold)", "Kinases (10% threshold)", "GS+ Kinase-Tumor Pairs (10% threshold)", "GS- Kinase-Tumor Pairs (10% threshold)", "Kinases (20% threshold)", "GS+ Kinase-Tumor Pairs (20% threshold)", "GS- Kinase-Tumor Pairs (20% threshold)"))) +fig_1_tab[1, 1] <- ncol(brca_phos_kins1) +fig_1_tab[2, 1] <- ncol(ccrcc_phos_kins1) +fig_1_tab[3, 1] <- ncol(coad_phos_kins1) +fig_1_tab[4, 1] <- ncol(gbm_phos_kins1) +fig_1_tab[5, 1] <- ncol(hnscc_phos_kins1) +fig_1_tab[6, 1] <- ncol(lscc_phos_kins1) +fig_1_tab[7, 1] <- ncol(luad_phos_kins1) +fig_1_tab[8, 1] <- ncol(ov_phos_kins1) +fig_1_tab[9, 1] <- ncol(pdac_phos_kins1) +fig_1_tab[10, 1] <- ncol(ucec_phos_kins1) + +fig_1_tab[1, 2] <- length(brca_gsboth_kins_2per) +fig_1_tab[2, 2] <- length(ccrcc_gsboth_kins_2per) +fig_1_tab[3, 2] <- length(coad_gsboth_kins_2per) +fig_1_tab[4, 2] <- length(gbm_gsboth_kins_2per) +fig_1_tab[5, 2] <- length(hnscc_gsboth_kins_2per) +fig_1_tab[6, 2] <- length(lscc_gsboth_kins_2per) +fig_1_tab[7, 2] <- length(luad_gsboth_kins_2per) +fig_1_tab[8, 2] <- length(ov_gsboth_kins_2per) +fig_1_tab[9, 2] <- length(pdac_gsboth_kins_2per) +fig_1_tab[10,2] <- length(ucec_gsboth_kins_2per) + +fig_1_tab[1, 3] <- sum(lengths(brca_GS_pos_2per)) +fig_1_tab[2, 3] <- sum(lengths(ccrcc_GS_pos_2per)) +fig_1_tab[3, 3] <- sum(lengths(coad_GS_pos_2per)) +fig_1_tab[4, 3] <- sum(lengths(gbm_GS_pos_2per)) +fig_1_tab[5, 3] <- sum(lengths(hnscc_GS_pos_2per)) +fig_1_tab[6, 3] <- sum(lengths(lscc_GS_pos_2per)) +fig_1_tab[7, 3] <- sum(lengths(luad_GS_pos_2per)) +fig_1_tab[8, 3] <- sum(lengths(ov_GS_pos_2per)) +fig_1_tab[9, 3] <- sum(lengths(pdac_GS_pos_2per)) +fig_1_tab[10,3] <- sum(lengths(ucec_GS_pos_2per)) + +fig_1_tab[1, 4] <- sum(lengths(brca_GS_neg_2per)) +fig_1_tab[2, 4] <- sum(lengths(ccrcc_GS_neg_2per)) +fig_1_tab[3, 4] <- sum(lengths(coad_GS_neg_2per)) +fig_1_tab[4, 4] <- sum(lengths(gbm_GS_neg_2per)) +fig_1_tab[5, 4] <- sum(lengths(hnscc_GS_neg_2per)) +fig_1_tab[6, 4] <- sum(lengths(lscc_GS_neg_2per)) +fig_1_tab[7, 4] <- sum(lengths(luad_GS_neg_2per)) +fig_1_tab[8, 4] <- sum(lengths(ov_GS_neg_2per)) +fig_1_tab[9, 4] <- sum(lengths(pdac_GS_neg_2per)) +fig_1_tab[10,4] <- sum(lengths(ucec_GS_neg_2per)) + +fig_1_tab[1, 5] <- length(brca_gsboth_kins_10per) +fig_1_tab[2, 5] <- length(ccrcc_gsboth_kins_10per) +fig_1_tab[3, 5] <- length(coad_gsboth_kins_10per) +fig_1_tab[4, 5] <- length(gbm_gsboth_kins_10per) +fig_1_tab[5, 5] <- length(hnscc_gsboth_kins_10per) +fig_1_tab[6, 5] <- length(lscc_gsboth_kins_10per) +fig_1_tab[7, 5] <- length(luad_gsboth_kins_10per) +fig_1_tab[8, 5] <- length(ov_gsboth_kins_10per) +fig_1_tab[9, 5] <- length(pdac_gsboth_kins_10per) +fig_1_tab[10,5] <- length(ucec_gsboth_kins_10per) + +fig_1_tab[1, 6] <- sum(lengths(brca_GS_pos_10per)) +fig_1_tab[2, 6] <- sum(lengths(ccrcc_GS_pos_10per)) +fig_1_tab[3, 6] <- sum(lengths(coad_GS_pos_10per)) +fig_1_tab[4, 6] <- sum(lengths(gbm_GS_pos_10per)) +fig_1_tab[5, 6] <- sum(lengths(hnscc_GS_pos_10per)) +fig_1_tab[6, 6] <- sum(lengths(lscc_GS_pos_10per)) +fig_1_tab[7, 6] <- sum(lengths(luad_GS_pos_10per)) +fig_1_tab[8, 6] <- sum(lengths(ov_GS_pos_10per)) +fig_1_tab[9, 6] <- sum(lengths(pdac_GS_pos_10per)) +fig_1_tab[10,6] <- sum(lengths(ucec_GS_pos_10per)) + +fig_1_tab[1, 7] <- sum(lengths(brca_GS_neg_10per)) +fig_1_tab[2, 7] <- sum(lengths(ccrcc_GS_neg_10per)) +fig_1_tab[3, 7] <- sum(lengths(coad_GS_neg_10per)) +fig_1_tab[4, 7] <- sum(lengths(gbm_GS_neg_10per)) +fig_1_tab[5, 7] <- sum(lengths(hnscc_GS_neg_10per)) +fig_1_tab[6, 7] <- sum(lengths(lscc_GS_neg_10per)) +fig_1_tab[7, 7] <- sum(lengths(luad_GS_neg_10per)) +fig_1_tab[8, 7] <- sum(lengths(ov_GS_neg_10per)) +fig_1_tab[9, 7] <- sum(lengths(pdac_GS_neg_10per)) +fig_1_tab[10,7] <- sum(lengths(ucec_GS_neg_10per)) + +fig_1_tab[1, 8] <- length(brca_gsboth_kins_20per) +fig_1_tab[2, 8] <- length(ccrcc_gsboth_kins_20per) +fig_1_tab[3, 8] <- length(coad_gsboth_kins_20per) +fig_1_tab[4, 8] <- length(gbm_gsboth_kins_20per) +fig_1_tab[5, 8] <- length(hnscc_gsboth_kins_20per) +fig_1_tab[6, 8] <- length(lscc_gsboth_kins_20per) +fig_1_tab[7, 8] <- length(luad_gsboth_kins_20per) +fig_1_tab[8, 8] <- length(ov_gsboth_kins_20per) +fig_1_tab[9, 8] <- length(pdac_gsboth_kins_20per) +fig_1_tab[10,8] <- length(ucec_gsboth_kins_20per) + +fig_1_tab[1, 9] <- sum(lengths(brca_GS_pos_20per)) +fig_1_tab[2, 9] <- sum(lengths(ccrcc_GS_pos_20per)) +fig_1_tab[3, 9] <- sum(lengths(coad_GS_pos_20per)) +fig_1_tab[4, 9] <- sum(lengths(gbm_GS_pos_20per)) +fig_1_tab[5, 9] <- sum(lengths(hnscc_GS_pos_20per)) +fig_1_tab[6, 9] <- sum(lengths(lscc_GS_pos_20per)) +fig_1_tab[7, 9] <- sum(lengths(luad_GS_pos_20per)) +fig_1_tab[8, 9] <- sum(lengths(ov_GS_pos_20per)) +fig_1_tab[9, 9] <- sum(lengths(pdac_GS_pos_20per)) +fig_1_tab[10,9] <- sum(lengths(ucec_GS_pos_20per)) + +fig_1_tab[1, 10] <- sum(lengths(brca_GS_neg_20per)) +fig_1_tab[2, 10] <- sum(lengths(ccrcc_GS_neg_20per)) +fig_1_tab[3, 10] <- sum(lengths(coad_GS_neg_20per)) +fig_1_tab[4, 10] <- sum(lengths(gbm_GS_neg_20per)) +fig_1_tab[5, 10] <- sum(lengths(hnscc_GS_neg_20per)) +fig_1_tab[6, 10] <- sum(lengths(lscc_GS_neg_20per)) +fig_1_tab[7, 10] <- sum(lengths(luad_GS_neg_20per)) +fig_1_tab[8, 10] <- sum(lengths(ov_GS_neg_20per)) +fig_1_tab[9, 10] <- sum(lengths(pdac_GS_neg_20per)) +fig_1_tab[10,10] <- sum(lengths(ucec_GS_neg_20per)) + +fig_1_tab <- rbind(fig_1_tab, colSums(fig_1_tab)) +rownames(fig_1_tab)[nrow(fig_1_tab)] <- "Total" +all_kins2 <- unique(c(brca_gsboth_kins_2per, ccrcc_gsboth_kins_2per, coad_gsboth_kins_2per, gbm_gsboth_kins_2per, hnscc_gsboth_kins_2per, lscc_gsboth_kins_2per, luad_gsboth_kins_2per, ov_gsboth_kins_2per, pdac_gsboth_kins_2per, ucec_gsboth_kins_2per)) +fig_1_tab[11, 2] <- length(all_kins2) +all_kins10 <- unique(c(brca_gsboth_kins_10per, ccrcc_gsboth_kins_10per, coad_gsboth_kins_10per, gbm_gsboth_kins_10per, hnscc_gsboth_kins_10per, lscc_gsboth_kins_10per, luad_gsboth_kins_10per, ov_gsboth_kins_10per, pdac_gsboth_kins_10per, ucec_gsboth_kins_10per)) +fig_1_tab[11, 5] <- length(all_kins10) +all_kins20 <- unique(c(brca_gsboth_kins_20per, ccrcc_gsboth_kins_20per, coad_gsboth_kins_20per, gbm_gsboth_kins_20per, hnscc_gsboth_kins_20per, lscc_gsboth_kins_20per, luad_gsboth_kins_20per, ov_gsboth_kins_20per, pdac_gsboth_kins_20per, ucec_gsboth_kins_20per)) +fig_1_tab[11, 8] <- length(all_kins20) + +fig_1_tab <- as.data.frame(fig_1_tab) + +fig_1_tab$`Kinase List (1%)` <- NA +fig_1_tab[1, 11] <- paste0(brca_gsboth_kins_2per, collapse = ",") +fig_1_tab[2, 11] <- paste0(ccrcc_gsboth_kins_2per, collapse = ",") +fig_1_tab[3, 11] <- paste0(coad_gsboth_kins_2per, collapse = ",") +fig_1_tab[4, 11] <- paste0(gbm_gsboth_kins_2per, collapse = ",") +fig_1_tab[5, 11] <- paste0(hnscc_gsboth_kins_2per, collapse = ",") +fig_1_tab[6, 11] <- paste0(lscc_gsboth_kins_2per, collapse = ",") +fig_1_tab[7, 11] <- paste0(luad_gsboth_kins_2per, collapse = ",") +fig_1_tab[8, 11] <- paste0(ov_gsboth_kins_2per, collapse = ",") +fig_1_tab[9, 11] <- paste0(pdac_gsboth_kins_2per, collapse = ",") +fig_1_tab[10, 11] <- paste0(ucec_gsboth_kins_2per, collapse = ",") +fig_1_tab[11, 11] <- paste0(all_kins, collapse = ",") + +fig_1_tab$`Kinase List (10%)` <- NA +fig_1_tab[1, 12] <- paste0(brca_gsboth_kins_10per, collapse = ",") +fig_1_tab[2, 12] <- paste0(ccrcc_gsboth_kins_10per, collapse = ",") +fig_1_tab[3, 12] <- paste0(coad_gsboth_kins_10per, collapse = ",") +fig_1_tab[4, 12] <- paste0(gbm_gsboth_kins_10per, collapse = ",") +fig_1_tab[5, 12] <- paste0(hnscc_gsboth_kins_10per, collapse = ",") +fig_1_tab[6, 12] <- paste0(lscc_gsboth_kins_10per, collapse = ",") +fig_1_tab[7, 12] <- paste0(luad_gsboth_kins_10per, collapse = ",") +fig_1_tab[8, 12] <- paste0(ov_gsboth_kins_10per, collapse = ",") +fig_1_tab[9, 12] <- paste0(pdac_gsboth_kins_10per, collapse = ",") +fig_1_tab[10, 12] <- paste0(ucec_gsboth_kins_10per, collapse = ",") +fig_1_tab[11, 12] <- paste0(all_kins10, collapse = ",") + +fig_1_tab$`Kinase List (20%)` <- NA +fig_1_tab[1, 13] <- paste0(brca_gsboth_kins_20per, collapse = ",") +fig_1_tab[2, 13] <- paste0(ccrcc_gsboth_kins_20per, collapse = ",") +fig_1_tab[3, 13] <- paste0(coad_gsboth_kins_20per, collapse = ",") +fig_1_tab[4, 13] <- paste0(gbm_gsboth_kins_20per, collapse = ",") +fig_1_tab[5, 13] <- paste0(hnscc_gsboth_kins_20per, collapse = ",") +fig_1_tab[6, 13] <- paste0(lscc_gsboth_kins_20per, collapse = ",") +fig_1_tab[7, 13] <- paste0(luad_gsboth_kins_20per, collapse = ",") +fig_1_tab[8, 13] <- paste0(ov_gsboth_kins_20per, collapse = ",") +fig_1_tab[9, 13] <- paste0(pdac_gsboth_kins_20per, collapse = ",") +fig_1_tab[10, 13] <- paste0(ucec_gsboth_kins_20per, collapse = ",") +fig_1_tab[11, 13] <- paste0(all_kins20, collapse = ",") +``` + +alternate version without COAD, OV, and PDAC +make figure 1 table: numbers of tumors, kinases, GS pairs for each cancer type +```{r} +fig_1_tab <- matrix(NA, nrow = 7, ncol = 7, dimnames = list(c("BRCA","CCRCC","GBM","HNSCC","LSCC","LUAD","UCEC"), c("Tumors", "Kinases (5% threshold)", "GS+ Kinase-Tumor Pairs (5% threshold)", "GS- Kinase-Tumor Pairs (5% threshold)", "Kinases (20% threshold)", "GS+ Kinase-Tumor Pairs (20% threshold)", "GS- Kinase-Tumor Pairs (20% threshold)"))) +fig_1_tab[1, 1] <- ncol(brca_phos_kins1) +fig_1_tab[2, 1] <- ncol(ccrcc_phos_kins1) +#fig_1_tab[3, 1] <- ncol(coad_phos_kins1) +fig_1_tab[3, 1] <- ncol(gbm_phos_kins1) +fig_1_tab[4, 1] <- ncol(hnscc_phos_kins1) +fig_1_tab[5, 1] <- ncol(lscc_phos_kins1) +fig_1_tab[6, 1] <- ncol(luad_phos_kins1) +#fig_1_tab[8, 1] <- ncol(ov_phos_kins1) +#fig_1_tab[9, 1] <- ncol(pdac_phos_kins1) +fig_1_tab[7, 1] <- ncol(ucec_phos_kins1) + +fig_1_tab[1, 2] <- length(brca_gsboth_kins) +fig_1_tab[2, 2] <- length(ccrcc_gsboth_kins) +#fig_1_tab[3, 2] <- length(coad_gsboth_kins) +fig_1_tab[3, 2] <- length(gbm_gsboth_kins) +fig_1_tab[4, 2] <- length(hnscc_gsboth_kins) +fig_1_tab[5, 2] <- length(lscc_gsboth_kins) +fig_1_tab[6, 2] <- length(luad_gsboth_kins) +#fig_1_tab[8, 2] <- length(ov_gsboth_kins) +#fig_1_tab[9, 2] <- length(pdac_gsboth_kins) +fig_1_tab[7,2] <- length(ucec_gsboth_kins) + +fig_1_tab[1, 3] <- sum(lengths(brca_GS_pos)) +fig_1_tab[2, 3] <- sum(lengths(ccrcc_GS_pos)) +#fig_1_tab[3, 3] <- sum(lengths(coad_GS_pos)) +fig_1_tab[3, 3] <- sum(lengths(gbm_GS_pos)) +fig_1_tab[4, 3] <- sum(lengths(hnscc_GS_pos)) +fig_1_tab[5, 3] <- sum(lengths(lscc_GS_pos)) +fig_1_tab[6, 3] <- sum(lengths(luad_GS_pos)) +#fig_1_tab[8, 3] <- sum(lengths(ov_GS_pos)) +#fig_1_tab[9, 3] <- sum(lengths(pdac_GS_pos)) +fig_1_tab[7,3] <- sum(lengths(ucec_GS_pos)) + +fig_1_tab[1, 4] <- sum(lengths(brca_GS_neg)) +fig_1_tab[2, 4] <- sum(lengths(ccrcc_GS_neg)) +#fig_1_tab[3, 4] <- sum(lengths(coad_GS_neg)) +fig_1_tab[3, 4] <- sum(lengths(gbm_GS_neg)) +fig_1_tab[4, 4] <- sum(lengths(hnscc_GS_neg)) +fig_1_tab[5, 4] <- sum(lengths(lscc_GS_neg)) +fig_1_tab[6, 4] <- sum(lengths(luad_GS_neg)) +#fig_1_tab[8, 4] <- sum(lengths(ov_GS_neg)) +#fig_1_tab[9, 4] <- sum(lengths(pdac_GS_neg)) +fig_1_tab[7,4] <- sum(lengths(ucec_GS_neg)) + +fig_1_tab[1, 5] <- length(brca_gsboth_kins_20per) +fig_1_tab[2, 5] <- length(ccrcc_gsboth_kins_20per) +#fig_1_tab[3, 5] <- length(coad_gsboth_kins_20per) +fig_1_tab[3, 5] <- length(gbm_gsboth_kins_20per) +fig_1_tab[4, 5] <- length(hnscc_gsboth_kins_20per) +fig_1_tab[5, 5] <- length(lscc_gsboth_kins_20per) +fig_1_tab[6, 5] <- length(luad_gsboth_kins_20per) +#fig_1_tab[8, 5] <- length(ov_gsboth_kins_20per) +#fig_1_tab[9, 5] <- length(pdac_gsboth_kins_20per) +fig_1_tab[7,5] <- length(ucec_gsboth_kins_20per) + +fig_1_tab[1, 6] <- sum(lengths(brca_GS_pos_20per)) +fig_1_tab[2, 6] <- sum(lengths(ccrcc_GS_pos_20per)) +#fig_1_tab[3, 6] <- sum(lengths(coad_GS_pos_20per)) +fig_1_tab[3, 6] <- sum(lengths(gbm_GS_pos_20per)) +fig_1_tab[4, 6] <- sum(lengths(hnscc_GS_pos_20per)) +fig_1_tab[5, 6] <- sum(lengths(lscc_GS_pos_20per)) +fig_1_tab[6, 6] <- sum(lengths(luad_GS_pos_20per)) +#fig_1_tab[8, 6] <- sum(lengths(ov_GS_pos_20per)) +#fig_1_tab[9, 6] <- sum(lengths(pdac_GS_pos_20per)) +fig_1_tab[7,6] <- sum(lengths(ucec_GS_pos_20per)) + +fig_1_tab[1, 7] <- sum(lengths(brca_GS_neg_20per)) +fig_1_tab[2, 7] <- sum(lengths(ccrcc_GS_neg_20per)) +#fig_1_tab[3, 7] <- sum(lengths(coad_GS_neg_20per)) +fig_1_tab[3, 7] <- sum(lengths(gbm_GS_neg_20per)) +fig_1_tab[4, 7] <- sum(lengths(hnscc_GS_neg_20per)) +fig_1_tab[5, 7] <- sum(lengths(lscc_GS_neg_20per)) +fig_1_tab[6, 7] <- sum(lengths(luad_GS_neg_20per)) +#fig_1_tab[8, 7] <- sum(lengths(ov_GS_neg_20per)) +#fig_1_tab[9, 7] <- sum(lengths(pdac_GS_neg_20per)) +fig_1_tab[7,7] <- sum(lengths(ucec_GS_neg_20per)) + +fig_1_tab <- rbind(fig_1_tab, colSums(fig_1_tab)) +rownames(fig_1_tab)[nrow(fig_1_tab)] <- "Total" +all_kins <- unique(c(brca_gsboth_kins, ccrcc_gsboth_kins, gbm_gsboth_kins, hnscc_gsboth_kins, lscc_gsboth_kins, luad_gsboth_kins, ucec_gsboth_kins)) +fig_1_tab[8, 2] <- length(all_kins) +all_kins20 <- unique(c(brca_gsboth_kins_20per, ccrcc_gsboth_kins_20per, gbm_gsboth_kins_20per, hnscc_gsboth_kins_20per, lscc_gsboth_kins_20per, luad_gsboth_kins_20per, ucec_gsboth_kins_20per)) +fig_1_tab[8, 5] <- length(all_kins20) + +fig_1_tab <- as.data.frame(fig_1_tab) +fig_1_tab$`Kinase List (5%)` <- NA +fig_1_tab[1, 8] <- paste0(brca_gsboth_kins, collapse = ",") +fig_1_tab[2, 8] <- paste0(ccrcc_gsboth_kins, collapse = ",") +#fig_1_tab[3, 8] <- paste0(coad_gsboth_kins, collapse = ",") +fig_1_tab[3, 8] <- paste0(gbm_gsboth_kins, collapse = ",") +fig_1_tab[4, 8] <- paste0(hnscc_gsboth_kins, collapse = ",") +fig_1_tab[5, 8] <- paste0(lscc_gsboth_kins, collapse = ",") +fig_1_tab[6, 8] <- paste0(luad_gsboth_kins, collapse = ",") +#fig_1_tab[8, 8] <- paste0(ov_gsboth_kins, collapse = ",") +#fig_1_tab[9, 8] <- paste0(pdac_gsboth_kins, collapse = ",") +fig_1_tab[7, 8] <- paste0(ucec_gsboth_kins, collapse = ",") +fig_1_tab[8, 8] <- paste0(all_kins, collapse = ",") + +fig_1_tab$`Kinase List (20%)` <- NA +fig_1_tab[1, 9] <- paste0(brca_gsboth_kins_20per, collapse = ",") +fig_1_tab[2, 9] <- paste0(ccrcc_gsboth_kins_20per, collapse = ",") +#fig_1_tab[3, 9] <- paste0(coad_gsboth_kins_20per, collapse = ",") +fig_1_tab[3, 9] <- paste0(gbm_gsboth_kins_20per, collapse = ",") +fig_1_tab[4, 9] <- paste0(hnscc_gsboth_kins_20per, collapse = ",") +fig_1_tab[5, 9] <- paste0(lscc_gsboth_kins_20per, collapse = ",") +fig_1_tab[6, 9] <- paste0(luad_gsboth_kins_20per, collapse = ",") +#fig_1_tab[8, 9] <- paste0(ov_gsboth_kins_20per, collapse = ",") +#fig_1_tab[9, 9] <- paste0(pdac_gsboth_kins_20per, collapse = ",") +fig_1_tab[7, 9] <- paste0(ucec_gsboth_kins_20per, collapse = ",") +fig_1_tab[8, 9] <- paste0(all_kins20, collapse = ",") +``` + +```{r} +write.table(cbind(cancer_type=rownames(fig_1_tab),fig_1_tab), "kinase_GSsets/GS_tumor_kinase_pair_summary_v6.tab", sep = "\t", quote = F, row.names = F) +``` + +save GS set lists (rds) +```{r} +saveRDS(brca_GS_pos, "kinase_GSsets/brca_GS_pos_set_v6.rds") +saveRDS(brca_GS_neg, "kinase_GSsets/brca_GS_neg_set_v6.rds") +saveRDS(ccrcc_GS_pos, "kinase_GSsets/ccrcc_GS_pos_set_v6.rds") +saveRDS(ccrcc_GS_neg, "kinase_GSsets/ccrcc_GS_neg_set_v6.rds") +saveRDS(coad_GS_pos, "kinase_GSsets/coad_GS_pos_set_v6.rds") +saveRDS(coad_GS_neg, "kinase_GSsets/coad_GS_neg_set_v6.rds") +saveRDS(gbm_GS_pos, "kinase_GSsets/gbm_GS_pos_set_v6.rds") +saveRDS(gbm_GS_neg, "kinase_GSsets/gbm_GS_neg_set_v6.rds") +saveRDS(hnscc_GS_pos, "kinase_GSsets/hnscc_GS_pos_set_v6.rds") +saveRDS(hnscc_GS_neg, "kinase_GSsets/hnscc_GS_neg_set_v6.rds") +saveRDS(lscc_GS_pos, "kinase_GSsets/lscc_GS_pos_set_v6.rds") +saveRDS(lscc_GS_neg, "kinase_GSsets/lscc_GS_neg_set_v6.rds") +saveRDS(luad_GS_pos, "kinase_GSsets/luad_GS_pos_set_v6.rds") +saveRDS(luad_GS_neg, "kinase_GSsets/luad_GS_neg_set_v6.rds") +saveRDS(ov_GS_pos, "kinase_GSsets/ov_GS_pos_set_v6.rds") +saveRDS(ov_GS_neg, "kinase_GSsets/ov_GS_neg_set_v6.rds") +saveRDS(pdac_GS_pos, "kinase_GSsets/pdac_GS_pos_set_v6.rds") +saveRDS(pdac_GS_neg, "kinase_GSsets/pdac_GS_neg_set_v6.rds") +saveRDS(ucec_GS_pos, "kinase_GSsets/ucec_GS_pos_set_v6.rds") +saveRDS(ucec_GS_neg, "kinase_GSsets/ucec_GS_neg_set_v6.rds") + +saveRDS(brca_GS_pos_20per, "kinase_GSsets/brca_GS_pos_20per_set_v6.rds") +saveRDS(brca_GS_neg_20per, "kinase_GSsets/brca_GS_neg_20per_set_v6.rds") +saveRDS(ccrcc_GS_pos_20per, "kinase_GSsets/ccrcc_GS_pos_20per_set_v6.rds") +saveRDS(ccrcc_GS_neg_20per, "kinase_GSsets/ccrcc_GS_neg_20per_set_v6.rds") +saveRDS(coad_GS_pos_20per, "kinase_GSsets/coad_GS_pos_20per_set_v6.rds") +saveRDS(coad_GS_neg_20per, "kinase_GSsets/coad_GS_neg_20per_set_v6.rds") +saveRDS(gbm_GS_pos_20per, "kinase_GSsets/gbm_GS_pos_20per_set_v6.rds") +saveRDS(gbm_GS_neg_20per, "kinase_GSsets/gbm_GS_neg_20per_set_v6.rds") +saveRDS(hnscc_GS_pos_20per, "kinase_GSsets/hnscc_GS_pos_20per_set_v6.rds") +saveRDS(hnscc_GS_neg_20per, "kinase_GSsets/hnscc_GS_neg_20per_set_v6.rds") +saveRDS(lscc_GS_pos_20per, "kinase_GSsets/lscc_GS_pos_20per_set_v6.rds") +saveRDS(lscc_GS_neg_20per, "kinase_GSsets/lscc_GS_neg_20per_set_v6.rds") +saveRDS(luad_GS_pos_20per, "kinase_GSsets/luad_GS_pos_20per_set_v6.rds") +saveRDS(luad_GS_neg_20per, "kinase_GSsets/luad_GS_neg_20per_set_v6.rds") +saveRDS(ov_GS_pos_20per, "kinase_GSsets/ov_GS_pos_20per_set_v6.rds") +saveRDS(ov_GS_neg_20per, "kinase_GSsets/ov_GS_neg_20per_set_v6.rds") +saveRDS(pdac_GS_pos_20per, "kinase_GSsets/pdac_GS_pos_20per_set_v6.rds") +saveRDS(pdac_GS_neg_20per, "kinase_GSsets/pdac_GS_neg_20per_set_v6.rds") +saveRDS(ucec_GS_pos_20per, "kinase_GSsets/ucec_GS_pos_20per_set_v6.rds") +saveRDS(ucec_GS_neg_20per, "kinase_GSsets/ucec_GS_neg_20per_set_v6.rds") + +saveRDS(brca_GS_pos_2per, "kinase_GSsets/brca_GS_pos_2per_set_v6.rds") +saveRDS(brca_GS_neg_2per, "kinase_GSsets/brca_GS_neg_2per_set_v6.rds") +saveRDS(ccrcc_GS_pos_2per, "kinase_GSsets/ccrcc_GS_pos_2per_set_v6.rds") +saveRDS(ccrcc_GS_neg_2per, "kinase_GSsets/ccrcc_GS_neg_2per_set_v6.rds") +saveRDS(coad_GS_pos_2per, "kinase_GSsets/coad_GS_pos_2per_set_v6.rds") +saveRDS(coad_GS_neg_2per, "kinase_GSsets/coad_GS_neg_2per_set_v6.rds") +saveRDS(gbm_GS_pos_2per, "kinase_GSsets/gbm_GS_pos_2per_set_v6.rds") +saveRDS(gbm_GS_neg_2per, "kinase_GSsets/gbm_GS_neg_2per_set_v6.rds") +saveRDS(hnscc_GS_pos_2per, "kinase_GSsets/hnscc_GS_pos_2per_set_v6.rds") +saveRDS(hnscc_GS_neg_2per, "kinase_GSsets/hnscc_GS_neg_2per_set_v6.rds") +saveRDS(lscc_GS_pos_2per, "kinase_GSsets/lscc_GS_pos_2per_set_v6.rds") +saveRDS(lscc_GS_neg_2per, "kinase_GSsets/lscc_GS_neg_2per_set_v6.rds") +saveRDS(luad_GS_pos_2per, "kinase_GSsets/luad_GS_pos_2per_set_v6.rds") +saveRDS(luad_GS_neg_2per, "kinase_GSsets/luad_GS_neg_2per_set_v6.rds") +saveRDS(ov_GS_pos_2per, "kinase_GSsets/ov_GS_pos_2per_set_v6.rds") +saveRDS(ov_GS_neg_2per, "kinase_GSsets/ov_GS_neg_2per_set_v6.rds") +saveRDS(pdac_GS_pos_2per, "kinase_GSsets/pdac_GS_pos_2per_set_v6.rds") +saveRDS(pdac_GS_neg_2per, "kinase_GSsets/pdac_GS_neg_2per_set_v6.rds") +saveRDS(ucec_GS_pos_2per, "kinase_GSsets/ucec_GS_pos_2per_set_v6.rds") +saveRDS(ucec_GS_neg_2per, "kinase_GSsets/ucec_GS_neg_2per_set_v6.rds") + +saveRDS(brca_GS_pos_1per, "kinase_GSsets/brca_GS_pos_1per_set_v6.rds") +saveRDS(brca_GS_neg_1per, "kinase_GSsets/brca_GS_neg_1per_set_v6.rds") +saveRDS(ccrcc_GS_pos_1per, "kinase_GSsets/ccrcc_GS_pos_1per_set_v6.rds") +saveRDS(ccrcc_GS_neg_1per, "kinase_GSsets/ccrcc_GS_neg_1per_set_v6.rds") +saveRDS(coad_GS_pos_1per, "kinase_GSsets/coad_GS_pos_1per_set_v6.rds") +saveRDS(coad_GS_neg_1per, "kinase_GSsets/coad_GS_neg_1per_set_v6.rds") +saveRDS(gbm_GS_pos_1per, "kinase_GSsets/gbm_GS_pos_1per_set_v6.rds") +saveRDS(gbm_GS_neg_1per, "kinase_GSsets/gbm_GS_neg_1per_set_v6.rds") +saveRDS(hnscc_GS_pos_1per, "kinase_GSsets/hnscc_GS_pos_1per_set_v6.rds") +saveRDS(hnscc_GS_neg_1per, "kinase_GSsets/hnscc_GS_neg_1per_set_v6.rds") +saveRDS(lscc_GS_pos_1per, "kinase_GSsets/lscc_GS_pos_1per_set_v6.rds") +saveRDS(lscc_GS_neg_1per, "kinase_GSsets/lscc_GS_neg_1per_set_v6.rds") +saveRDS(luad_GS_pos_1per, "kinase_GSsets/luad_GS_pos_1per_set_v6.rds") +saveRDS(luad_GS_neg_1per, "kinase_GSsets/luad_GS_neg_1per_set_v6.rds") +saveRDS(ov_GS_pos_1per, "kinase_GSsets/ov_GS_pos_1per_set_v6.rds") +saveRDS(ov_GS_neg_1per, "kinase_GSsets/ov_GS_neg_1per_set_v6.rds") +saveRDS(pdac_GS_pos_1per, "kinase_GSsets/pdac_GS_pos_1per_set_v6.rds") +saveRDS(pdac_GS_neg_1per, "kinase_GSsets/pdac_GS_neg_1per_set_v6.rds") +saveRDS(ucec_GS_pos_1per, "kinase_GSsets/ucec_GS_pos_1per_set_v6.rds") +saveRDS(ucec_GS_neg_1per, "kinase_GSsets/ucec_GS_neg_1per_set_v6.rds") + +saveRDS(brca_GS_pos_10per, "kinase_GSsets/brca_GS_pos_10per_set_v6.rds") +saveRDS(brca_GS_neg_10per, "kinase_GSsets/brca_GS_neg_10per_set_v6.rds") +saveRDS(ccrcc_GS_pos_10per, "kinase_GSsets/ccrcc_GS_pos_10per_set_v6.rds") +saveRDS(ccrcc_GS_neg_10per, "kinase_GSsets/ccrcc_GS_neg_10per_set_v6.rds") +saveRDS(coad_GS_pos_10per, "kinase_GSsets/coad_GS_pos_10per_set_v6.rds") +saveRDS(coad_GS_neg_10per, "kinase_GSsets/coad_GS_neg_10per_set_v6.rds") +saveRDS(gbm_GS_pos_10per, "kinase_GSsets/gbm_GS_pos_10per_set_v6.rds") +saveRDS(gbm_GS_neg_10per, "kinase_GSsets/gbm_GS_neg_10per_set_v6.rds") +saveRDS(hnscc_GS_pos_10per, "kinase_GSsets/hnscc_GS_pos_10per_set_v6.rds") +saveRDS(hnscc_GS_neg_10per, "kinase_GSsets/hnscc_GS_neg_10per_set_v6.rds") +saveRDS(lscc_GS_pos_10per, "kinase_GSsets/lscc_GS_pos_10per_set_v6.rds") +saveRDS(lscc_GS_neg_10per, "kinase_GSsets/lscc_GS_neg_10per_set_v6.rds") +saveRDS(luad_GS_pos_10per, "kinase_GSsets/luad_GS_pos_10per_set_v6.rds") +saveRDS(luad_GS_neg_10per, "kinase_GSsets/luad_GS_neg_10per_set_v6.rds") +saveRDS(ov_GS_pos_10per, "kinase_GSsets/ov_GS_pos_10per_set_v6.rds") +saveRDS(ov_GS_neg_10per, "kinase_GSsets/ov_GS_neg_10per_set_v6.rds") +saveRDS(pdac_GS_pos_10per, "kinase_GSsets/pdac_GS_pos_10per_set_v6.rds") +saveRDS(pdac_GS_neg_10per, "kinase_GSsets/pdac_GS_neg_10per_set_v6.rds") +saveRDS(ucec_GS_pos_10per, "kinase_GSsets/ucec_GS_pos_10per_set_v6.rds") +saveRDS(ucec_GS_neg_10per, "kinase_GSsets/ucec_GS_neg_10per_set_v6.rds") +``` + +###correlation of activating sites with host kinase protein levels >> how much more information do we gain by considering site level data vs protein level data + + + +```{r} +brca_prot_kins <- brca_prot[sub("\\|.*", "", brca_prot$idx) %in% kins_mapped_act1$gene, ] +brca_prot_kins$gene <- kins_mapped_act1[sub("\\|.*", "", brca_prot_kins$idx), "gene_name"] +sum(duplicated(brca_prot_kins$gene)) +rownames(brca_prot_kins) <- brca_prot_kins$gene +brca_prot_kins <- brca_prot_kins[ , !colnames(brca_prot_kins) %in% c("idx", "gene")] +brca_kins <- intersect(rownames(brca_prot_kins), sub("x.*","",rownames(brca_phos_kins1))) + +brca_kin_prot_actSite_Pcorr <- list() +for(i in 1:nrow(brca_phos_kins1)){ + if(sub("x.*","",rownames(brca_phos_kins1)[i]) %in% brca_kins){ + submat <- cbind(as.numeric(brca_phos_kins1[i, ]), as.numeric(brca_prot_kins[sub("x.*","",rownames(brca_phos_kins1)[i]), colnames(brca_phos_kins1)])) + if(sum(rowSums(!is.na(submat))==2) > 30){ + brca_kin_prot_actSite_Pcorr[[rownames(brca_phos_kins1)[i]]] <- rcorr(submat, type = "pearson")$r[1,2] + } + } +} +median(unlist(brca_kin_prot_actSite_Pcorr)) +``` + +```{r} +brca_phos_kins2 <- brca_phos_kins[brca_phos_kins$gene %in% brca_kins, ] +brca_phos_kins2 <- brca_phos_kins2[!brca_phos_kins2$site %in% rownames(brca_phos_kins1), ] +rownames(brca_phos_kins2) <- brca_phos_kins2$site +brca_phos_kins2 <- brca_phos_kins2[ , c(-1, -(ncol(brca_phos_kins2)-1), -ncol(brca_phos_kins2))] +brca_kin_prot_nonactSite_Pcorr <- list() +for(i in 1:nrow(brca_phos_kins2)){ + if(sub("x.*","",rownames(brca_phos_kins2)[i]) %in% brca_kins){ + submat <- cbind(as.numeric(brca_phos_kins2[i, ]), as.numeric(brca_prot_kins[sub("x.*","",rownames(brca_phos_kins2)[i]), colnames(brca_phos_kins2)])) + if(sum(rowSums(!is.na(submat))==2) > 30){ + brca_kin_prot_nonactSite_Pcorr[[rownames(brca_phos_kins2)[i]]] <- rcorr(submat, type = "pearson")$r[1,2] + } + } +} +median(unlist(brca_kin_prot_nonactSite_Pcorr)) +``` + + +```{r} +ccrcc_prot_kins <- ccrcc_prot[sub("\\|.*", "", ccrcc_prot$idx) %in% kins_mapped_act1$gene, ] +ccrcc_prot_kins$gene <- kins_mapped_act1[sub("\\|.*", "", ccrcc_prot_kins$idx), "gene_name"] +sum(duplicated(ccrcc_prot_kins$gene)) +rownames(ccrcc_prot_kins) <- ccrcc_prot_kins$gene +ccrcc_prot_kins <- ccrcc_prot_kins[ , !colnames(ccrcc_prot_kins) %in% c("idx", "gene")] +ccrcc_kins <- intersect(rownames(ccrcc_prot_kins), sub("x.*","",rownames(ccrcc_phos_kins1))) + +ccrcc_kin_prot_actSite_Pcorr <- list() +for(i in 1:nrow(ccrcc_phos_kins1)){ + if(sub("x.*","",rownames(ccrcc_phos_kins1)[i]) %in% ccrcc_kins){ + submat <- cbind(as.numeric(ccrcc_phos_kins1[i, ]), as.numeric(ccrcc_prot_kins[sub("x.*","",rownames(ccrcc_phos_kins1)[i]), colnames(ccrcc_phos_kins1)])) + if(sum(rowSums(!is.na(submat))==2) > 30){ + ccrcc_kin_prot_actSite_Pcorr[[rownames(ccrcc_phos_kins1)[i]]] <- rcorr(submat, type = "pearson")$r[1,2] + } + } +} + +ccrcc_phos_kins2 <- ccrcc_phos_kins[ccrcc_phos_kins$gene %in% ccrcc_kins, ] +ccrcc_phos_kins2 <- ccrcc_phos_kins2[!ccrcc_phos_kins2$site %in% rownames(ccrcc_phos_kins1), ] +rownames(ccrcc_phos_kins2) <- paste0(ccrcc_phos_kins2$site, ccrcc_phos_kins2$idx) +ccrcc_phos_kins2 <- ccrcc_phos_kins2[ , c(-1, -(ncol(ccrcc_phos_kins2)-1), -ncol(ccrcc_phos_kins2))] +ccrcc_kin_prot_nonactSite_Pcorr <- list() +for(i in 1:nrow(ccrcc_phos_kins2)){ + if(sub("x.*","",rownames(ccrcc_phos_kins2)[i]) %in% ccrcc_kins){ + submat <- cbind(as.numeric(ccrcc_phos_kins2[i, ]), as.numeric(ccrcc_prot_kins[sub("x.*","",rownames(ccrcc_phos_kins2)[i]), colnames(ccrcc_phos_kins2)])) + if(sum(rowSums(!is.na(submat))==2) > 30){ + ccrcc_kin_prot_nonactSite_Pcorr[[rownames(ccrcc_phos_kins2)[i]]] <- rcorr(submat, type = "pearson")$r[1,2] + } + } +} + +coad_prot_kins <- coad_prot[sub("\\|.*", "", coad_prot$idx) %in% kins_mapped_act1$gene, ] +coad_prot_kins$gene <- kins_mapped_act1[sub("\\|.*", "", coad_prot_kins$idx), "gene_name"] +sum(duplicated(coad_prot_kins$gene)) +rownames(coad_prot_kins) <- coad_prot_kins$gene +coad_prot_kins <- coad_prot_kins[ , !colnames(coad_prot_kins) %in% c("idx", "gene")] +coad_kins <- intersect(rownames(coad_prot_kins), sub("x.*","",rownames(coad_phos_kins1))) + +coad_kin_prot_actSite_Pcorr <- list() +for(i in 1:nrow(coad_phos_kins1)){ + if(sub("x.*","",rownames(coad_phos_kins1)[i]) %in% coad_kins){ + submat <- cbind(as.numeric(coad_phos_kins1[i, ]), as.numeric(coad_prot_kins[sub("x.*","",rownames(coad_phos_kins1)[i]), colnames(coad_phos_kins1)])) + if(sum(rowSums(!is.na(submat))==2) > 30){ + coad_kin_prot_actSite_Pcorr[[rownames(coad_phos_kins1)[i]]] <- rcorr(submat, type = "pearson")$r[1,2] + } + } +} + +coad_phos_kins2 <- coad_phos_kins[coad_phos_kins$gene %in% coad_kins, ] +coad_phos_kins2 <- coad_phos_kins2[!coad_phos_kins2$site %in% rownames(coad_phos_kins1), ] +rownames(coad_phos_kins2) <- coad_phos_kins2$site +coad_phos_kins2 <- coad_phos_kins2[ , c(-1, -(ncol(coad_phos_kins2)-1), -ncol(coad_phos_kins2))] +coad_kin_prot_nonactSite_Pcorr <- list() +for(i in 1:nrow(coad_phos_kins2)){ + if(sub("x.*","",rownames(coad_phos_kins2)[i]) %in% coad_kins){ + submat <- cbind(as.numeric(coad_phos_kins2[i, ]), as.numeric(coad_prot_kins[sub("x.*","",rownames(coad_phos_kins2)[i]), colnames(coad_phos_kins2)])) + if(sum(rowSums(!is.na(submat))==2) > 30){ + coad_kin_prot_nonactSite_Pcorr[[rownames(coad_phos_kins2)[i]]] <- rcorr(submat, type = "pearson")$r[1,2] + } + } +} + +gbm_prot_kins <- gbm_prot[sub("\\|.*", "", gbm_prot$idx) %in% kins_mapped_act1$gene, ] +gbm_prot_kins$gene <- kins_mapped_act1[sub("\\|.*", "", gbm_prot_kins$idx), "gene_name"] +sum(duplicated(gbm_prot_kins$gene)) +rownames(gbm_prot_kins) <- gbm_prot_kins$gene +gbm_prot_kins <- gbm_prot_kins[ , !colnames(gbm_prot_kins) %in% c("idx", "gene")] +gbm_kins <- intersect(rownames(gbm_prot_kins), sub("x.*","",rownames(gbm_phos_kins1))) + +gbm_kin_prot_actSite_Pcorr <- list() +for(i in 1:nrow(gbm_phos_kins1)){ + if(sub("x.*","",rownames(gbm_phos_kins1)[i]) %in% gbm_kins){ + submat <- cbind(as.numeric(gbm_phos_kins1[i, ]), as.numeric(gbm_prot_kins[sub("x.*","",rownames(gbm_phos_kins1)[i]), colnames(gbm_phos_kins1)])) + if(sum(rowSums(!is.na(submat))==2) > 30){ + gbm_kin_prot_actSite_Pcorr[[rownames(gbm_phos_kins1)[i]]] <- rcorr(submat, type = "pearson")$r[1,2] + } + } +} + +gbm_phos_kins2 <- gbm_phos_kins[gbm_phos_kins$gene %in% gbm_kins, ] +gbm_phos_kins2 <- gbm_phos_kins2[!gbm_phos_kins2$site %in% rownames(gbm_phos_kins1), ] +rownames(gbm_phos_kins2) <- paste0(gbm_phos_kins2$site, gbm_phos_kins2$idx) +gbm_phos_kins2 <- gbm_phos_kins2[ , c(-1, -(ncol(gbm_phos_kins2)-1), -ncol(gbm_phos_kins2))] +gbm_kin_prot_nonactSite_Pcorr <- list() +for(i in 1:nrow(gbm_phos_kins2)){ + if(sub("x.*","",rownames(gbm_phos_kins2)[i]) %in% gbm_kins){ + submat <- cbind(as.numeric(gbm_phos_kins2[i, ]), as.numeric(gbm_prot_kins[sub("x.*","",rownames(gbm_phos_kins2)[i]), colnames(gbm_phos_kins2)])) + if(sum(rowSums(!is.na(submat))==2) > 30){ + gbm_kin_prot_nonactSite_Pcorr[[rownames(gbm_phos_kins2)[i]]] <- rcorr(submat, type = "pearson")$r[1,2] + } + } +} + +hnscc_prot_kins <- hnscc_prot[sub("\\|.*", "", hnscc_prot$idx) %in% kins_mapped_act1$gene, ] +hnscc_prot_kins$gene <- kins_mapped_act1[sub("\\|.*", "", hnscc_prot_kins$idx), "gene_name"] +sum(duplicated(hnscc_prot_kins$gene)) +rownames(hnscc_prot_kins) <- hnscc_prot_kins$gene +hnscc_prot_kins <- hnscc_prot_kins[ , !colnames(hnscc_prot_kins) %in% c("idx", "gene")] +hnscc_kins <- intersect(rownames(hnscc_prot_kins), sub("x.*","",rownames(hnscc_phos_kins1))) + +hnscc_kin_prot_actSite_Pcorr <- list() +for(i in 1:nrow(hnscc_phos_kins1)){ + if(sub("x.*","",rownames(hnscc_phos_kins1)[i]) %in% hnscc_kins){ + submat <- cbind(as.numeric(hnscc_phos_kins1[i, ]), as.numeric(hnscc_prot_kins[sub("x.*","",rownames(hnscc_phos_kins1)[i]), colnames(hnscc_phos_kins1)])) + if(sum(rowSums(!is.na(submat))==2) > 30){ + hnscc_kin_prot_actSite_Pcorr[[rownames(hnscc_phos_kins1)[i]]] <- rcorr(submat, type = "pearson")$r[1,2] + } + } +} + +hnscc_phos_kins2 <- hnscc_phos_kins[hnscc_phos_kins$gene %in% hnscc_kins, ] +hnscc_phos_kins2 <- hnscc_phos_kins2[!hnscc_phos_kins2$site %in% rownames(hnscc_phos_kins1), ] +rownames(hnscc_phos_kins2) <- paste0(hnscc_phos_kins2$site, hnscc_phos_kins2$idx) +hnscc_phos_kins2 <- hnscc_phos_kins2[ , c(-1, -(ncol(hnscc_phos_kins2)-1), -ncol(hnscc_phos_kins2))] +hnscc_kin_prot_nonactSite_Pcorr <- list() +for(i in 1:nrow(hnscc_phos_kins2)){ + if(sub("x.*","",rownames(hnscc_phos_kins2)[i]) %in% hnscc_kins){ + submat <- cbind(as.numeric(hnscc_phos_kins2[i, ]), as.numeric(hnscc_prot_kins[sub("x.*","",rownames(hnscc_phos_kins2)[i]), colnames(hnscc_phos_kins2)])) + if(sum(rowSums(!is.na(submat))==2) > 30){ + hnscc_kin_prot_nonactSite_Pcorr[[rownames(hnscc_phos_kins2)[i]]] <- rcorr(submat, type = "pearson")$r[1,2] + } + } +} + +lscc_prot_kins <- lscc_prot[sub("\\|.*", "", lscc_prot$idx) %in% kins_mapped_act1$gene, ] +lscc_prot_kins$gene <- kins_mapped_act1[sub("\\|.*", "", lscc_prot_kins$idx), "gene_name"] +sum(duplicated(lscc_prot_kins$gene)) +rownames(lscc_prot_kins) <- lscc_prot_kins$gene +lscc_prot_kins <- lscc_prot_kins[ , !colnames(lscc_prot_kins) %in% c("idx", "gene")] +lscc_kins <- intersect(rownames(lscc_prot_kins), sub("x.*","",rownames(lscc_phos_kins1))) + +lscc_kin_prot_actSite_Pcorr <- list() +for(i in 1:nrow(lscc_phos_kins1)){ + if(sub("x.*","",rownames(lscc_phos_kins1)[i]) %in% lscc_kins){ + submat <- cbind(as.numeric(lscc_phos_kins1[i, ]), as.numeric(lscc_prot_kins[sub("x.*","",rownames(lscc_phos_kins1)[i]), colnames(lscc_phos_kins1)])) + if(sum(rowSums(!is.na(submat))==2) > 30){ + lscc_kin_prot_actSite_Pcorr[[rownames(lscc_phos_kins1)[i]]] <- rcorr(submat, type = "pearson")$r[1,2] + } + } +} + +lscc_phos_kins2 <- lscc_phos_kins[lscc_phos_kins$gene %in% lscc_kins, ] +lscc_phos_kins2 <- lscc_phos_kins2[!lscc_phos_kins2$site %in% rownames(lscc_phos_kins1), ] +rownames(lscc_phos_kins2) <- paste0(lscc_phos_kins2$site, lscc_phos_kins2$idx) +lscc_phos_kins2 <- lscc_phos_kins2[ , c(-1, -(ncol(lscc_phos_kins2)-1), -ncol(lscc_phos_kins2))] +lscc_kin_prot_nonactSite_Pcorr <- list() +for(i in 1:nrow(lscc_phos_kins2)){ + if(sub("x.*","",rownames(lscc_phos_kins2)[i]) %in% lscc_kins){ + submat <- cbind(as.numeric(lscc_phos_kins2[i, ]), as.numeric(lscc_prot_kins[sub("x.*","",rownames(lscc_phos_kins2)[i]), colnames(lscc_phos_kins2)])) + if(sum(rowSums(!is.na(submat))==2) > 30){ + lscc_kin_prot_nonactSite_Pcorr[[rownames(lscc_phos_kins2)[i]]] <- rcorr(submat, type = "pearson")$r[1,2] + } + } +} + +luad_prot_kins <- luad_prot[sub("\\|.*", "", luad_prot$idx) %in% kins_mapped_act1$gene, ] +luad_prot_kins$gene <- kins_mapped_act1[sub("\\|.*", "", luad_prot_kins$idx), "gene_name"] +sum(duplicated(luad_prot_kins$gene)) +rownames(luad_prot_kins) <- luad_prot_kins$gene +luad_prot_kins <- luad_prot_kins[ , !colnames(luad_prot_kins) %in% c("idx", "gene")] +luad_kins <- intersect(rownames(luad_prot_kins), sub("x.*","",rownames(luad_phos_kins1))) + +luad_kin_prot_actSite_Pcorr <- list() +for(i in 1:nrow(luad_phos_kins1)){ + if(sub("x.*","",rownames(luad_phos_kins1)[i]) %in% luad_kins){ + submat <- cbind(as.numeric(luad_phos_kins1[i, ]), as.numeric(luad_prot_kins[sub("x.*","",rownames(luad_phos_kins1)[i]), colnames(luad_phos_kins1)])) + if(sum(rowSums(!is.na(submat))==2) > 30){ + luad_kin_prot_actSite_Pcorr[[rownames(luad_phos_kins1)[i]]] <- rcorr(submat, type = "pearson")$r[1,2] + } + } +} + +luad_phos_kins2 <- luad_phos_kins[luad_phos_kins$gene %in% luad_kins, ] +luad_phos_kins2 <- luad_phos_kins2[!luad_phos_kins2$site %in% rownames(luad_phos_kins1), ] +rownames(luad_phos_kins2) <- paste0(luad_phos_kins2$site, luad_phos_kins2$idx) +luad_phos_kins2 <- luad_phos_kins2[ , c(-1, -(ncol(luad_phos_kins2)-1), -ncol(luad_phos_kins2))] +luad_kin_prot_nonactSite_Pcorr <- list() +for(i in 1:nrow(luad_phos_kins2)){ + if(sub("x.*","",rownames(luad_phos_kins2)[i]) %in% luad_kins){ + submat <- cbind(as.numeric(luad_phos_kins2[i, ]), as.numeric(luad_prot_kins[sub("x.*","",rownames(luad_phos_kins2)[i]), colnames(luad_phos_kins2)])) + if(sum(rowSums(!is.na(submat))==2) > 30){ + luad_kin_prot_nonactSite_Pcorr[[rownames(luad_phos_kins2)[i]]] <- rcorr(submat, type = "pearson")$r[1,2] + } + } +} + +ov_prot_kins <- ov_prot[sub("\\|.*", "", ov_prot$idx) %in% kins_mapped_act1$gene, ] +ov_prot_kins$gene <- kins_mapped_act1[sub("\\|.*", "", ov_prot_kins$idx), "gene_name"] +sum(duplicated(ov_prot_kins$gene)) +rownames(ov_prot_kins) <- ov_prot_kins$gene +ov_prot_kins <- ov_prot_kins[ , !colnames(ov_prot_kins) %in% c("idx", "gene")] +ov_kins <- intersect(rownames(ov_prot_kins), sub("x.*","",rownames(ov_phos_kins1))) + +ov_kin_prot_actSite_Pcorr <- list() +for(i in 1:nrow(ov_phos_kins1)){ + if(sub("x.*","",rownames(ov_phos_kins1)[i]) %in% ov_kins){ + submat <- cbind(as.numeric(ov_phos_kins1[i, ]), as.numeric(ov_prot_kins[sub("x.*","",rownames(ov_phos_kins1)[i]), colnames(ov_phos_kins1)])) + if(sum(rowSums(!is.na(submat))==2) > 30){ + ov_kin_prot_actSite_Pcorr[[rownames(ov_phos_kins1)[i]]] <- rcorr(submat, type = "pearson")$r[1,2] + } + } +} + +ov_phos_kins2 <- ov_phos_kins[ov_phos_kins$gene %in% ov_kins, ] +ov_phos_kins2 <- ov_phos_kins2[!ov_phos_kins2$site %in% rownames(ov_phos_kins1), ] +rownames(ov_phos_kins2) <- ov_phos_kins2$site +ov_phos_kins2 <- ov_phos_kins2[ , c(-1, -(ncol(ov_phos_kins2)-1), -ncol(ov_phos_kins2))] +ov_kin_prot_nonactSite_Pcorr <- list() +for(i in 1:nrow(ov_phos_kins2)){ + if(sub("x.*","",rownames(ov_phos_kins2)[i]) %in% ov_kins){ + submat <- cbind(as.numeric(ov_phos_kins2[i, ]), as.numeric(ov_prot_kins[sub("x.*","",rownames(ov_phos_kins2)[i]), colnames(ov_phos_kins2)])) + if(sum(rowSums(!is.na(submat))==2) > 30){ + ov_kin_prot_nonactSite_Pcorr[[rownames(ov_phos_kins2)[i]]] <- rcorr(submat, type = "pearson")$r[1,2] + } + } +} + +pdac_prot_kins <- pdac_prot[sub("\\|.*", "", pdac_prot$idx) %in% kins_mapped_act1$gene, ] +pdac_prot_kins$gene <- kins_mapped_act1[sub("\\|.*", "", pdac_prot_kins$idx), "gene_name"] +sum(duplicated(pdac_prot_kins$gene)) +rownames(pdac_prot_kins) <- pdac_prot_kins$gene +pdac_prot_kins <- pdac_prot_kins[ , !colnames(pdac_prot_kins) %in% c("idx", "gene")] +pdac_kins <- intersect(rownames(pdac_prot_kins), sub("x.*","",rownames(pdac_phos_kins1))) + +pdac_kin_prot_actSite_Pcorr <- list() +for(i in 1:nrow(pdac_phos_kins1)){ + if(sub("x.*","",rownames(pdac_phos_kins1)[i]) %in% pdac_kins){ + submat <- cbind(as.numeric(pdac_phos_kins1[i, ]), as.numeric(pdac_prot_kins[sub("x.*","",rownames(pdac_phos_kins1)[i]), colnames(pdac_phos_kins1)])) + if(sum(rowSums(!is.na(submat))==2) > 30){ + pdac_kin_prot_actSite_Pcorr[[rownames(pdac_phos_kins1)[i]]] <- rcorr(submat, type = "pearson")$r[1,2] + } + } +} + +pdac_phos_kins2 <- pdac_phos_kins[pdac_phos_kins$gene %in% pdac_kins, ] +pdac_phos_kins2 <- pdac_phos_kins2[!pdac_phos_kins2$site %in% rownames(pdac_phos_kins1), ] +rownames(pdac_phos_kins2) <- paste0(pdac_phos_kins2$site, pdac_phos_kins2$idx) +pdac_phos_kins2 <- pdac_phos_kins2[ , c(-1, -(ncol(pdac_phos_kins2)-1), -ncol(pdac_phos_kins2))] +pdac_kin_prot_nonactSite_Pcorr <- list() +for(i in 1:nrow(pdac_phos_kins2)){ + if(sub("x.*","",rownames(pdac_phos_kins2)[i]) %in% pdac_kins){ + submat <- cbind(as.numeric(pdac_phos_kins2[i, ]), as.numeric(pdac_prot_kins[sub("x.*","",rownames(pdac_phos_kins2)[i]), colnames(pdac_phos_kins2)])) + if(sum(rowSums(!is.na(submat))==2) > 30){ + pdac_kin_prot_nonactSite_Pcorr[[rownames(pdac_phos_kins2)[i]]] <- rcorr(submat, type = "pearson")$r[1,2] + } + } +} + +ucec_prot_kins <- ucec_prot[sub("\\|.*", "", ucec_prot$idx) %in% kins_mapped_act1$gene, ] +ucec_prot_kins$gene <- kins_mapped_act1[sub("\\|.*", "", ucec_prot_kins$idx), "gene_name"] +sum(duplicated(ucec_prot_kins$gene)) +rownames(ucec_prot_kins) <- ucec_prot_kins$gene +ucec_prot_kins <- ucec_prot_kins[ , !colnames(ucec_prot_kins) %in% c("idx", "gene")] +ucec_kins <- intersect(rownames(ucec_prot_kins), sub("x.*","",rownames(ucec_phos_kins1))) + +ucec_kin_prot_actSite_Pcorr <- list() +for(i in 1:nrow(ucec_phos_kins1)){ + if(sub("x.*","",rownames(ucec_phos_kins1)[i]) %in% ucec_kins){ + submat <- cbind(as.numeric(ucec_phos_kins1[i, ]), as.numeric(ucec_prot_kins[sub("x.*","",rownames(ucec_phos_kins1)[i]), colnames(ucec_phos_kins1)])) + if(sum(rowSums(!is.na(submat))==2) > 30){ + ucec_kin_prot_actSite_Pcorr[[rownames(ucec_phos_kins1)[i]]] <- rcorr(submat, type = "pearson")$r[1,2] + } + } +} + +ucec_phos_kins2 <- ucec_phos_kins[ucec_phos_kins$gene %in% ucec_kins, ] +ucec_phos_kins2 <- ucec_phos_kins2[!ucec_phos_kins2$site %in% rownames(ucec_phos_kins1), ] +rownames(ucec_phos_kins2) <- paste0(ucec_phos_kins2$site, ucec_phos_kins2$idx) +ucec_phos_kins2 <- ucec_phos_kins2[ , c(-1, -(ncol(ucec_phos_kins2)-1), -ncol(ucec_phos_kins2))] +ucec_kin_prot_nonactSite_Pcorr <- list() +for(i in 1:nrow(ucec_phos_kins2)){ + if(sub("x.*","",rownames(ucec_phos_kins2)[i]) %in% ucec_kins){ + submat <- cbind(as.numeric(ucec_phos_kins2[i, ]), as.numeric(ucec_prot_kins[sub("x.*","",rownames(ucec_phos_kins2)[i]), colnames(ucec_phos_kins2)])) + if(sum(rowSums(!is.na(submat))==2) > 30){ + ucec_kin_prot_nonactSite_Pcorr[[rownames(ucec_phos_kins2)[i]]] <- rcorr(submat, type = "pearson")$r[1,2] + } + } +} +``` + +```{r} +par(cex.axis=0.5) +boxplot(unlist(brca_kin_prot_actSite_Pcorr), unlist(brca_kin_prot_nonactSite_Pcorr), 0, unlist(ccrcc_kin_prot_actSite_Pcorr), unlist(ccrcc_kin_prot_nonactSite_Pcorr), 0, unlist(coad_kin_prot_actSite_Pcorr), unlist(coad_kin_prot_nonactSite_Pcorr), 0, unlist(gbm_kin_prot_actSite_Pcorr), unlist(gbm_kin_prot_nonactSite_Pcorr), 0, unlist(hnscc_kin_prot_actSite_Pcorr), unlist(hnscc_kin_prot_nonactSite_Pcorr), 0, unlist(lscc_kin_prot_actSite_Pcorr), unlist(lscc_kin_prot_nonactSite_Pcorr), 0, unlist(luad_kin_prot_actSite_Pcorr), unlist(luad_kin_prot_nonactSite_Pcorr), 0, unlist(ov_kin_prot_actSite_Pcorr), unlist(ov_kin_prot_nonactSite_Pcorr), 0, unlist(pdac_kin_prot_actSite_Pcorr), unlist(pdac_kin_prot_nonactSite_Pcorr), 0, unlist(ucec_kin_prot_actSite_Pcorr), unlist(ucec_kin_prot_nonactSite_Pcorr), names = c("BRCA","","","CCRCC","","","COAD","","","GBM","","","HNSCC","","","LSCC","","","LUAD","","","OV","","","PDAC","","","UCEC","")) +``` + +combined density plot for Pearson correlations (Fig. 2A) +```{r} +clpal <- c("BRCA"="#fd8cd5", + "CCRCC"="#ed7711", + "COAD"="#0728e4", + "GBM"="#62666b", + "HNSCC"="#89263b", + "LSCC"="#cb4763", + "LUAD"="#d3d3d3", + "OV"="#107d9d", + "PDAC"="#b80ec4", + "UCEC"="#f04688") +allCT_actSiteCorr <- cbind(unlist(brca_kin_prot_actSite_Pcorr), "BRCA") +allCT_actSiteCorr <- rbind(allCT_actSiteCorr, cbind(unlist(ccrcc_kin_prot_actSite_Pcorr), "CCRCC")) +allCT_actSiteCorr <- rbind(allCT_actSiteCorr, cbind(unlist(coad_kin_prot_actSite_Pcorr), "COAD")) +allCT_actSiteCorr <- rbind(allCT_actSiteCorr, cbind(unlist(gbm_kin_prot_actSite_Pcorr), "GBM")) +allCT_actSiteCorr <- rbind(allCT_actSiteCorr, cbind(unlist(hnscc_kin_prot_actSite_Pcorr), "HNSCC")) +allCT_actSiteCorr <- rbind(allCT_actSiteCorr, cbind(unlist(lscc_kin_prot_actSite_Pcorr), "LSCC")) +allCT_actSiteCorr <- rbind(allCT_actSiteCorr, cbind(unlist(luad_kin_prot_actSite_Pcorr), "LUAD")) +allCT_actSiteCorr <- rbind(allCT_actSiteCorr, cbind(unlist(ov_kin_prot_actSite_Pcorr), "OV")) +allCT_actSiteCorr <- rbind(allCT_actSiteCorr, cbind(unlist(pdac_kin_prot_actSite_Pcorr), "PDAC")) +allCT_actSiteCorr <- rbind(allCT_actSiteCorr, cbind(unlist(ucec_kin_prot_actSite_Pcorr), "UCEC")) %>% as.data.frame +allCT_actSiteCorr$V1 <- as.numeric(allCT_actSiteCorr$V1) +ggplot(allCT_actSiteCorr, aes(x=V1, y=V2, fill=V2)) + + geom_density_ridges(alpha=0.4)+ + scale_fill_manual(values=clpal)+ + scale_y_discrete(limits=unique(rev(allCT_actSiteCorr$V2)))+ + xlab("Pearson correlation r") + + ylab("")+ + stat_density_ridges(quantile_lines = TRUE, quantiles = 2, alpha=0.3)+ + #geom_segment(allCT_lines, mapping=aes(x=vline, xend=vline, y=as.numeric(rev(V2)), yend=as.numeric(rev(V2))+1.5), inherit.aes = F) +theme_bw(base_size = 10) + + #theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + + theme( axis.text.y=element_text(colour="black"), + axis.ticks = element_line(colour = "black", size = 0.25), + plot.title = element_text(hjust = 0.5), + legend.position = "top", + legend.direction = "horizontal") +``` + +```{r} +save.image("KIA_benchmarking_defGSset_activating_site_analysis_v6_ckpt1.Rda") +``` + + +```{r} +calcActSiteProtCorrPval <- function(phos_protFilt, prot_actSite_corr, phos_df, prot_df, perm=1000){ + tot <- 0 + phos_prot_corr_meds <- numeric() + for(j in 1:perm){ + set.seed(j) + rand_sites <- sample(setdiff(phos_protFilt$idx, names(prot_actSite_corr)), length(prot_actSite_corr), replace = F, ) + phos_prot_corr <- list() + for(i in 1:length(rand_sites)){ + #if(sub("\\|.*", "", rand_sites[i]) %in% brca_prot$idx){ + submat <- cbind(as.numeric(phos_df[phos_df$idx == rand_sites[i], 2:ncol(phos_df)]), as.numeric(prot_df[prot_df$idx==sub("\\|.*", "", rand_sites[i]), colnames(phos_df)[2:ncol(phos_df)]])) + # if(sum(rowSums(!is.na(submat))==2) > 30){ + phos_prot_corr[[rand_sites[i]]] <- rcorr(submat, type = "pearson")$r[1,2] + # } + #} + } + phos_prot_corr_meds[j] <- median(as.numeric(phos_prot_corr), na.rm = T) + if(phos_prot_corr_meds[j] <= median(as.numeric(prot_actSite_corr), na.rm = T)){ + tot <- tot + 1 + } + } + return(list(phos_prot_corr_meds, tot/j)) +} +``` + + +```{r warning=FALSE} +brca_phos_protFilt <- brca_phos +for(i in 1:nrow(brca_phos)){ + submat <- cbind(as.numeric(brca_phos[i, 2:ncol(brca_phos)]), as.numeric(brca_prot[brca_prot$idx==sub("\\|.*", "", brca_phos$idx[i]), colnames(brca_phos)[2:ncol(brca_phos)]])) + if(sum(rowSums(!is.na(submat))==2) < 30){ + brca_phos_protFilt <- brca_phos_protFilt[brca_phos_protFilt$idx != brca_phos$idx[i], ] + } +} + +ccrcc_phos_protFilt <- ccrcc_phos +for(i in 1:nrow(ccrcc_phos)){ + submat <- cbind(as.numeric(ccrcc_phos[i, 2:ncol(ccrcc_phos)]), as.numeric(ccrcc_prot[ccrcc_prot$idx==sub("\\|.*", "", ccrcc_phos$idx[i]), colnames(ccrcc_phos)[2:ncol(ccrcc_phos)]])) + if(sum(rowSums(!is.na(submat))==2) < 30){ + ccrcc_phos_protFilt <- ccrcc_phos_protFilt[ccrcc_phos_protFilt$idx != ccrcc_phos$idx[i], ] + } +} + +coad_phos_protFilt <- coad_phos +for(i in 1:nrow(coad_phos)){ + submat <- cbind(as.numeric(coad_phos[i, 2:ncol(coad_phos)]), as.numeric(coad_prot[coad_prot$idx==sub("\\|.*", "", coad_phos$idx[i]), colnames(coad_phos)[2:ncol(coad_phos)]])) + if(sum(rowSums(!is.na(submat))==2) < 30){ + coad_phos_protFilt <- coad_phos_protFilt[coad_phos_protFilt$idx != coad_phos$idx[i], ] + } +} + +gbm_phos_protFilt <- gbm_phos +for(i in 1:nrow(gbm_phos)){ + submat <- cbind(as.numeric(gbm_phos[i, 2:ncol(gbm_phos)]), as.numeric(gbm_prot[gbm_prot$idx==sub("\\|.*", "", gbm_phos$idx[i]), colnames(gbm_phos)[2:ncol(gbm_phos)]])) + if(sum(rowSums(!is.na(submat))==2) < 30){ + gbm_phos_protFilt <- gbm_phos_protFilt[gbm_phos_protFilt$idx != gbm_phos$idx[i], ] + } +} + +hnscc_phos_protFilt <- hnscc_phos +for(i in 1:nrow(hnscc_phos)){ + submat <- cbind(as.numeric(hnscc_phos[i, 2:ncol(hnscc_phos)]), as.numeric(hnscc_prot[hnscc_prot$idx==sub("\\|.*", "", hnscc_phos$idx[i]), colnames(hnscc_phos)[2:ncol(hnscc_phos)]])) + if(sum(rowSums(!is.na(submat))==2) < 30){ + hnscc_phos_protFilt <- hnscc_phos_protFilt[hnscc_phos_protFilt$idx != hnscc_phos$idx[i], ] + } +} + +lscc_phos_protFilt <- lscc_phos +for(i in 1:nrow(lscc_phos)){ + submat <- cbind(as.numeric(lscc_phos[i, 2:ncol(lscc_phos)]), as.numeric(lscc_prot[lscc_prot$idx==sub("\\|.*", "", lscc_phos$idx[i]), colnames(lscc_phos)[2:ncol(lscc_phos)]])) + if(sum(rowSums(!is.na(submat))==2) < 30){ + lscc_phos_protFilt <- lscc_phos_protFilt[lscc_phos_protFilt$idx != lscc_phos$idx[i], ] + } +} + +luad_phos_protFilt <- luad_phos +for(i in 1:nrow(luad_phos)){ + submat <- cbind(as.numeric(luad_phos[i, 2:ncol(luad_phos)]), as.numeric(luad_prot[luad_prot$idx==sub("\\|.*", "", luad_phos$idx[i]), colnames(luad_phos)[2:ncol(luad_phos)]])) + if(sum(rowSums(!is.na(submat))==2) < 30){ + luad_phos_protFilt <- luad_phos_protFilt[luad_phos_protFilt$idx != luad_phos$idx[i], ] + } +} + +pdac_phos_protFilt <- pdac_phos +for(i in 1:nrow(pdac_phos)){ + submat <- cbind(as.numeric(pdac_phos[i, 2:ncol(pdac_phos)]), as.numeric(pdac_prot[pdac_prot$idx==sub("\\|.*", "", pdac_phos$idx[i]), colnames(pdac_phos)[2:ncol(pdac_phos)]])) + if(sum(rowSums(!is.na(submat))==2) < 30){ + pdac_phos_protFilt <- pdac_phos_protFilt[pdac_phos_protFilt$idx != pdac_phos$idx[i], ] + } +} + +ov_phos_protFilt <- ov_phos +for(i in 1:nrow(ov_phos)){ + submat <- cbind(as.numeric(ov_phos[i, 2:ncol(ov_phos)]), as.numeric(ov_prot[ov_prot$idx==sub("\\|.*", "", ov_phos$idx[i]), colnames(ov_phos)[2:ncol(ov_phos)]])) + if(sum(rowSums(!is.na(submat))==2) < 30){ + ov_phos_protFilt <- ov_phos_protFilt[ov_phos_protFilt$idx != ov_phos$idx[i], ] + } +} + +ucec_phos_protFilt <- ucec_phos +for(i in 1:nrow(ucec_phos)){ + submat <- cbind(as.numeric(ucec_phos[i, 2:ncol(ucec_phos)]), as.numeric(ucec_prot[ucec_prot$idx==sub("\\|.*", "", ucec_phos$idx[i]), colnames(ucec_phos)[2:ncol(ucec_phos)]])) + if(sum(rowSums(!is.na(submat))==2) < 30){ + ucec_phos_protFilt <- ucec_phos_protFilt[ucec_phos_protFilt$idx != ucec_phos$idx[i], ] + } +} + +brca_test <- calcActSiteProtCorrPval(phos_protFilt = brca_phos_protFilt, prot_actSite_corr = brca_kin_prot_actSite_Pcorr, phos_df = brca_phos, prot_df = brca_prot, perm = 1000) + +ccrcc_test <- calcActSiteProtCorrPval(phos_protFilt = ccrcc_phos_protFilt, prot_actSite_corr = ccrcc_kin_prot_actSite_Pcorr, phos_df = ccrcc_phos, prot_df = ccrcc_prot) + +coad_test <- calcActSiteProtCorrPval(phos_protFilt = coad_phos_protFilt, prot_actSite_corr = coad_kin_prot_actSite_Pcorr, phos_df = coad_phos, prot_df = coad_prot) + +gbm_test <- calcActSiteProtCorrPval(phos_protFilt = gbm_phos_protFilt, prot_actSite_corr = gbm_kin_prot_actSite_Pcorr, phos_df = gbm_phos, prot_df = gbm_prot) + +hnscc_test <- calcActSiteProtCorrPval(phos_protFilt = hnscc_phos_protFilt, prot_actSite_corr = hnscc_kin_prot_actSite_Pcorr, phos_df = hnscc_phos, prot_df = hnscc_prot) + +lscc_test <- calcActSiteProtCorrPval(phos_protFilt = lscc_phos_protFilt, prot_actSite_corr = lscc_kin_prot_actSite_Pcorr, phos_df = lscc_phos, prot_df = lscc_prot) + +luad_test <- calcActSiteProtCorrPval(phos_protFilt = luad_phos_protFilt, prot_actSite_corr = luad_kin_prot_actSite_Pcorr, phos_df = luad_phos, prot_df = luad_prot) + +ov_test <- calcActSiteProtCorrPval(phos_protFilt = ov_phos_protFilt, prot_actSite_corr = ov_kin_prot_actSite_Pcorr, phos_df = ov_phos, prot_df = ov_prot) + +pdac_test <- calcActSiteProtCorrPval(phos_protFilt = pdac_phos_protFilt, prot_actSite_corr = pdac_kin_prot_actSite_Pcorr, phos_df = pdac_phos, prot_df = pdac_prot) + +ucec_test <- calcActSiteProtCorrPval(phos_protFilt = ucec_phos_protFilt, prot_actSite_corr = ucec_kin_prot_actSite_Pcorr, phos_df = ucec_phos, prot_df = ucec_prot) + +save.image("KIA_benchmarking_defGSset_activating_site_analysis_v6_ckpt2.Rda") +``` + +```{r} +load("KIA_benchmarking_defGSset_activating_site_analysis_v6_ckpt2.Rda") +``` + + +```{r} +allCT_test <- cbind(brca_test[[1]], "BRCA") +allCT_test <- rbind(allCT_test, cbind(ccrcc_test[[1]], "CCRCC")) +allCT_test <- rbind(allCT_test, cbind(coad_test[[1]], "COAD")) +allCT_test <- rbind(allCT_test, cbind(gbm_test[[1]], "GBM")) +allCT_test <- rbind(allCT_test, cbind(hnscc_test[[1]], "HNSCC")) +allCT_test <- rbind(allCT_test, cbind(lscc_test[[1]], "LSCC")) +allCT_test <- rbind(allCT_test, cbind(luad_test[[1]], "LUAD")) +allCT_test <- rbind(allCT_test, cbind(ov_test[[1]], "OV")) +allCT_test <- rbind(allCT_test, cbind(pdac_test[[1]], "PDAC")) +allCT_test <- rbind(allCT_test, cbind(ucec_test[[1]], "UCEC")) +allCT_test <- as.data.frame(allCT_test) +allCT_test$V1 <- as.numeric(allCT_test$V1) +allCT_lines <- as.data.frame(matrix(nrow = 10, ncol = 2)) +allCT_lines$V2 <- unique(allCT_test$V2) +allCT_lines$V1[allCT_lines$V2=="BRCA"] <- median(unlist(brca_kin_prot_actSite_Pcorr)) +allCT_lines$V1[allCT_lines$V2=="CCRCC"] <- median(unlist(ccrcc_kin_prot_actSite_Pcorr)) +allCT_lines$V1[allCT_lines$V2=="COAD"] <- median(unlist(coad_kin_prot_actSite_Pcorr)) +allCT_lines$V1[allCT_lines$V2=="GBM"] <- median(unlist(gbm_kin_prot_actSite_Pcorr)) +allCT_lines$V1[allCT_lines$V2=="HNSCC"] <- median(unlist(hnscc_kin_prot_actSite_Pcorr)) +allCT_lines$V1[allCT_lines$V2=="LSCC"] <- median(unlist(lscc_kin_prot_actSite_Pcorr)) +allCT_lines$V1[allCT_lines$V2=="LUAD"] <- median(unlist(luad_kin_prot_actSite_Pcorr)) +allCT_lines$V1[allCT_lines$V2=="OV"] <- median(unlist(ov_kin_prot_actSite_Pcorr)) +allCT_lines$V1[allCT_lines$V2=="PDAC"] <- median(unlist(pdac_kin_prot_actSite_Pcorr)) +allCT_lines$V1[allCT_lines$V2=="UCEC"] <- median(unlist(ucec_kin_prot_actSite_Pcorr)) +allCT_lines$V3 <- NA +allCT_lines$V3[allCT_lines$V2=="BRCA"] <- median(unlist(brca_kin_prot_nonactSite_Pcorr)) +allCT_lines$V3[allCT_lines$V2=="CCRCC"] <- median(unlist(ccrcc_kin_prot_nonactSite_Pcorr)) +allCT_lines$V3[allCT_lines$V2=="COAD"] <- median(unlist(coad_kin_prot_nonactSite_Pcorr)) +allCT_lines$V3[allCT_lines$V2=="GBM"] <- median(unlist(gbm_kin_prot_nonactSite_Pcorr)) +allCT_lines$V3[allCT_lines$V2=="HNSCC"] <- median(unlist(hnscc_kin_prot_nonactSite_Pcorr)) +allCT_lines$V3[allCT_lines$V2=="LSCC"] <- median(unlist(lscc_kin_prot_nonactSite_Pcorr)) +allCT_lines$V3[allCT_lines$V2=="LUAD"] <- median(unlist(luad_kin_prot_nonactSite_Pcorr)) +allCT_lines$V3[allCT_lines$V2=="OV"] <- median(unlist(ov_kin_prot_nonactSite_Pcorr)) +allCT_lines$V3[allCT_lines$V2=="PDAC"] <- median(unlist(pdac_kin_prot_nonactSite_Pcorr)) +allCT_lines$V3[allCT_lines$V2=="UCEC"] <- median(unlist(ucec_kin_prot_nonactSite_Pcorr)) +colnames(allCT_lines)[1] <- "vline" +colnames(allCT_lines)[3] <- "vline2" +allCT_test$V2 <- as.factor(allCT_test$V2) +allCT_lines$V2 <- as.factor(allCT_lines$V2) +ggplot(allCT_test, aes(x=V1, y=V2, fill=V2)) + + geom_density_ridges(alpha=0.6)+ + scale_fill_manual(values=clpal)+ + scale_y_discrete(limits=unique(rev(allCT_test$V2)))+ + geom_segment(allCT_lines, mapping=aes(x=vline, xend=vline, y=as.numeric(rev(V2)), yend=as.numeric(rev(V2))+1), inherit.aes = F, col="red")+ + geom_segment(allCT_lines, mapping=aes(x=vline2, xend=vline2, y=as.numeric(rev(V2)), yend=as.numeric(rev(V2))+1), inherit.aes = F, col="blue")+ + labs(x="Median Correlation Coefficient", y="")+ + theme_bw(base_size = 10) + + #theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) + + theme( axis.text.y=element_text(colour="black"), + axis.ticks = element_line(colour = "black", size = 0.25), + plot.title = element_text(hjust = 0.5), + legend.position = "top", + legend.direction = "horizontal") +``` + +association of activating sites with KSEA scores +read in scores +```{r} +brca_scores <- readRDS("../../../Documents/pancan/kinase activity benchmarking/kinase_activity_scores_brca_v5.Rds") +coad_scores <- readRDS("../../../Documents/pancan/kinase activity benchmarking/kinase_activity_scores_brca_v5.Rds") +ccrcc_scores <- readRDS("../../../Documents/pancan/kinase activity benchmarking/kinase_activity_scores_ccrcc_v5.Rds") +coad_scores <- readRDS("../../../Documents/pancan/kinase activity benchmarking/kinase_activity_scores_coad_v5.Rds") +gbm_scores <- readRDS("../../../Documents/pancan/kinase activity benchmarking/kinase_activity_scores_gbm_v5.Rds") +hnscc_scores <- readRDS("../../../Documents/pancan/kinase activity benchmarking/kinase_activity_scores_hnscc_v5.Rds") +lscc_scores <- readRDS("../../../Documents/pancan/kinase activity benchmarking/kinase_activity_scores_lscc_v5.Rds") +luad_scores <- readRDS("../../../Documents/pancan/kinase activity benchmarking/kinase_activity_scores_luad_v5.Rds") +ucec_scores <- readRDS("../../../Documents/pancan/kinase activity benchmarking/kinase_activity_scores_ucec_v5.Rds") +ov_scores <- readRDS("../../../Documents/pancan/kinase activity benchmarking/kinase_activity_scores_ov_v5.Rds") +pdac_scores <- readRDS("../../../Documents/pancan/kinase activity benchmarking/kinase_activity_scores_pdac_v5.Rds") + +coad_scores$KSEA <- coad_scores$KSEA[ , !colnames(coad_scores$KSEA)=="fifteenmer"] +ccrcc_scores$KSEA <- ccrcc_scores$KSEA[ , !colnames(ccrcc_scores$KSEA)=="fifteenmer"] +gbm_scores$KSEA <- gbm_scores$KSEA[ , !colnames(gbm_scores$KSEA)=="fifteenmer"] +hnscc_scores$KSEA <- hnscc_scores$KSEA[ , !colnames(hnscc_scores$KSEA)=="fifteenmer"] +lscc_scores$KSEA <- lscc_scores$KSEA[ , !colnames(lscc_scores$KSEA)=="fifteenmer"] +luad_scores$KSEA <- luad_scores$KSEA[ , !colnames(luad_scores$KSEA)=="fifteenmer"] +ov_scores$KSEA <- ov_scores$KSEA[ , !colnames(ov_scores$KSEA)=="fifteenmer"] +pdac_scores$KSEA <- pdac_scores$KSEA[ , !colnames(pdac_scores$KSEA)=="fifteenmer"] +ucec_scores$KSEA <- ucec_scores$KSEA[ , !colnames(ucec_scores$KSEA)=="fifteenmer"] +``` + + +```{r} +brca_ksea_act_site_kins <- intersect(rownames(brca_scores$KSEA), sub("x.*","", rownames(brca_phos_kins1))) +brca_ksea_act_site_kins <- intersect(brca_ksea_act_site_kins, rownames(brca_prot_kins)) +brca_kseaZ <- numeric() +brca_act_siteZ <- numeric() +brca_kseaZ_prot <- numeric() +brca_protZ <- numeric() +for(i in 1:length(brca_ksea_act_site_kins)){ + act_sites <- rownames(brca_phos_kins1)[sub("x.*","",rownames(brca_phos_kins1))==brca_ksea_act_site_kins[i]] + scale(as.numeric(brca_scores$KSEA[brca_ksea_act_site_kins[i], ])) -> scores2add + names(scores2add) <- paste0(brca_ksea_act_site_kins[i], "_", colnames(brca_scores$KSEA)) + brca_kseaZ_prot <- c(brca_kseaZ_prot, scores2add) + scale(as.numeric(brca_prot_kins[brca_ksea_act_site_kins[i], colnames(brca_scores$KSEA)])) -> prots2add + names(prots2add) <- paste0(brca_ksea_act_site_kins[i], "_", colnames(brca_scores$KSEA)) + brca_protZ <- c(brca_protZ, prots2add) + for(j in 1:length(act_sites)){ + scale(as.numeric(brca_scores$KSEA[brca_ksea_act_site_kins[i], ])) -> scores2add + names(scores2add) <- paste0(act_sites[j], "_", colnames(brca_scores$KSEA)) + brca_kseaZ <- c(brca_kseaZ, scores2add) + scale(as.numeric(brca_phos_kins1[act_sites[j], colnames(brca_scores$KSEA)])) -> sites2add + names(sites2add) <- paste0(act_sites[j], "_", colnames(brca_scores$KSEA)) + brca_act_siteZ <- c(brca_act_siteZ, sites2add) + } +} + +ccrcc_ksea_act_site_kins <- intersect(rownames(ccrcc_scores$KSEA), sub("x.*","", rownames(ccrcc_phos_kins1))) +ccrcc_ksea_act_site_kins <- intersect(ccrcc_ksea_act_site_kins, rownames(ccrcc_prot_kins)) +ccrcc_kseaZ <- numeric() +ccrcc_act_siteZ <- numeric() +ccrcc_kseaZ_prot <- numeric() +ccrcc_protZ <- numeric() +for(i in 1:length(ccrcc_ksea_act_site_kins)){ + act_sites <- rownames(ccrcc_phos_kins1)[sub("x.*","",rownames(ccrcc_phos_kins1))==ccrcc_ksea_act_site_kins[i]] + scale(as.numeric(ccrcc_scores$KSEA[ccrcc_ksea_act_site_kins[i], ])) -> scores2add + names(scores2add) <- paste0(ccrcc_ksea_act_site_kins[i], "_", colnames(ccrcc_scores$KSEA)) + ccrcc_kseaZ_prot <- c(ccrcc_kseaZ_prot, scores2add) + scale(as.numeric(ccrcc_prot_kins[ccrcc_ksea_act_site_kins[i], colnames(ccrcc_scores$KSEA)])) -> prots2add + names(prots2add) <- paste0(ccrcc_ksea_act_site_kins[i], "_", colnames(ccrcc_scores$KSEA)) + ccrcc_protZ <- c(ccrcc_protZ, prots2add) + for(j in 1:length(act_sites)){ + scale(as.numeric(ccrcc_scores$KSEA[ccrcc_ksea_act_site_kins[i], ])) -> scores2add + names(scores2add) <- paste0(act_sites[j], "_", colnames(ccrcc_scores$KSEA)) + ccrcc_kseaZ <- c(ccrcc_kseaZ, scores2add) + scale(as.numeric(ccrcc_phos_kins1[act_sites[j], colnames(ccrcc_scores$KSEA)])) -> sites2add + names(sites2add) <- paste0(act_sites[j], "_", colnames(ccrcc_scores$KSEA)) + ccrcc_act_siteZ <- c(ccrcc_act_siteZ, sites2add) + } +} + +coad_ksea_act_site_kins <- intersect(rownames(coad_scores$KSEA), sub("x.*","", rownames(coad_phos_kins1))) +coad_ksea_act_site_kins <- intersect(coad_ksea_act_site_kins, rownames(coad_prot_kins)) +coad_kseaZ <- numeric() +coad_act_siteZ <- numeric() +coad_kseaZ_prot <- numeric() +coad_protZ <- numeric() +for(i in 1:length(coad_ksea_act_site_kins)){ + act_sites <- rownames(coad_phos_kins1)[sub("x.*","",rownames(coad_phos_kins1))==coad_ksea_act_site_kins[i]] + scale(as.numeric(coad_scores$KSEA[coad_ksea_act_site_kins[i], ])) -> scores2add + names(scores2add) <- paste0(coad_ksea_act_site_kins[i], "_", colnames(coad_scores$KSEA)) + coad_kseaZ_prot <- c(coad_kseaZ_prot, scores2add) + scale(as.numeric(coad_prot_kins[coad_ksea_act_site_kins[i], colnames(coad_scores$KSEA)])) -> prots2add + names(prots2add) <- paste0(coad_ksea_act_site_kins[i], "_", colnames(coad_scores$KSEA)) + coad_protZ <- c(coad_protZ, prots2add) + for(j in 1:length(act_sites)){ + scale(as.numeric(coad_scores$KSEA[coad_ksea_act_site_kins[i], ])) -> scores2add + names(scores2add) <- paste0(act_sites[j], "_", colnames(coad_scores$KSEA)) + coad_kseaZ <- c(coad_kseaZ, scores2add) + scale(as.numeric(coad_phos_kins1[act_sites[j], colnames(coad_scores$KSEA)])) -> sites2add + names(sites2add) <- paste0(act_sites[j], "_", colnames(coad_scores$KSEA)) + coad_act_siteZ <- c(coad_act_siteZ, sites2add) + } +} + +gbm_ksea_act_site_kins <- intersect(rownames(gbm_scores$KSEA), sub("x.*","", rownames(gbm_phos_kins1))) +gbm_ksea_act_site_kins <- intersect(gbm_ksea_act_site_kins, rownames(gbm_prot_kins)) +gbm_kseaZ <- numeric() +gbm_act_siteZ <- numeric() +gbm_kseaZ_prot <- numeric() +gbm_protZ <- numeric() +for(i in 1:length(gbm_ksea_act_site_kins)){ + act_sites <- rownames(gbm_phos_kins1)[sub("x.*","",rownames(gbm_phos_kins1))==gbm_ksea_act_site_kins[i]] + scale(as.numeric(gbm_scores$KSEA[gbm_ksea_act_site_kins[i], ])) -> scores2add + names(scores2add) <- paste0(gbm_ksea_act_site_kins[i], "_", colnames(gbm_scores$KSEA)) + gbm_kseaZ_prot <- c(gbm_kseaZ_prot, scores2add) + scale(as.numeric(gbm_prot_kins[gbm_ksea_act_site_kins[i], colnames(gbm_scores$KSEA)])) -> prots2add + names(prots2add) <- paste0(gbm_ksea_act_site_kins[i], "_", colnames(gbm_scores$KSEA)) + gbm_protZ <- c(gbm_protZ, prots2add) + for(j in 1:length(act_sites)){ + scale(as.numeric(gbm_scores$KSEA[gbm_ksea_act_site_kins[i], ])) -> scores2add + names(scores2add) <- paste0(act_sites[j], "_", colnames(gbm_scores$KSEA)) + gbm_kseaZ <- c(gbm_kseaZ, scores2add) + scale(as.numeric(gbm_phos_kins1[act_sites[j], colnames(gbm_scores$KSEA)])) -> sites2add + names(sites2add) <- paste0(act_sites[j], "_", colnames(gbm_scores$KSEA)) + gbm_act_siteZ <- c(gbm_act_siteZ, sites2add) + } +} + +hnscc_ksea_act_site_kins <- intersect(rownames(hnscc_scores$KSEA), sub("x.*","", rownames(hnscc_phos_kins1))) +hnscc_ksea_act_site_kins <- intersect(hnscc_ksea_act_site_kins, rownames(hnscc_prot_kins)) +hnscc_kseaZ <- numeric() +hnscc_act_siteZ <- numeric() +hnscc_kseaZ_prot <- numeric() +hnscc_protZ <- numeric() +for(i in 1:length(hnscc_ksea_act_site_kins)){ + act_sites <- rownames(hnscc_phos_kins1)[sub("x.*","",rownames(hnscc_phos_kins1))==hnscc_ksea_act_site_kins[i]] + scale(as.numeric(hnscc_scores$KSEA[hnscc_ksea_act_site_kins[i], ])) -> scores2add + names(scores2add) <- paste0(hnscc_ksea_act_site_kins[i], "_", colnames(hnscc_scores$KSEA)) + hnscc_kseaZ_prot <- c(hnscc_kseaZ_prot, scores2add) + scale(as.numeric(hnscc_prot_kins[hnscc_ksea_act_site_kins[i], colnames(hnscc_scores$KSEA)])) -> prots2add + names(prots2add) <- paste0(hnscc_ksea_act_site_kins[i], "_", colnames(hnscc_scores$KSEA)) + hnscc_protZ <- c(hnscc_protZ, prots2add) + for(j in 1:length(act_sites)){ + scale(as.numeric(hnscc_scores$KSEA[hnscc_ksea_act_site_kins[i], ])) -> scores2add + names(scores2add) <- paste0(act_sites[j], "_", colnames(hnscc_scores$KSEA)) + hnscc_kseaZ <- c(hnscc_kseaZ, scores2add) + scale(as.numeric(hnscc_phos_kins1[act_sites[j], colnames(hnscc_scores$KSEA)])) -> sites2add + names(sites2add) <- paste0(act_sites[j], "_", colnames(hnscc_scores$KSEA)) + hnscc_act_siteZ <- c(hnscc_act_siteZ, sites2add) + } +} + +lscc_ksea_act_site_kins <- intersect(rownames(lscc_scores$KSEA), sub("x.*","", rownames(lscc_phos_kins1))) +lscc_ksea_act_site_kins <- intersect(lscc_ksea_act_site_kins, rownames(lscc_prot_kins)) +lscc_kseaZ <- numeric() +lscc_act_siteZ <- numeric() +lscc_kseaZ_prot <- numeric() +lscc_protZ <- numeric() +for(i in 1:length(lscc_ksea_act_site_kins)){ + act_sites <- rownames(lscc_phos_kins1)[sub("x.*","",rownames(lscc_phos_kins1))==lscc_ksea_act_site_kins[i]] + scale(as.numeric(lscc_scores$KSEA[lscc_ksea_act_site_kins[i], ])) -> scores2add + names(scores2add) <- paste0(lscc_ksea_act_site_kins[i], "_", colnames(lscc_scores$KSEA)) + lscc_kseaZ_prot <- c(lscc_kseaZ_prot, scores2add) + scale(as.numeric(lscc_prot_kins[lscc_ksea_act_site_kins[i], colnames(lscc_scores$KSEA)])) -> prots2add + names(prots2add) <- paste0(lscc_ksea_act_site_kins[i], "_", colnames(lscc_scores$KSEA)) + lscc_protZ <- c(lscc_protZ, prots2add) + for(j in 1:length(act_sites)){ + scale(as.numeric(lscc_scores$KSEA[lscc_ksea_act_site_kins[i], ])) -> scores2add + names(scores2add) <- paste0(act_sites[j], "_", colnames(lscc_scores$KSEA)) + lscc_kseaZ <- c(lscc_kseaZ, scores2add) + scale(as.numeric(lscc_phos_kins1[act_sites[j], colnames(lscc_scores$KSEA)])) -> sites2add + names(sites2add) <- paste0(act_sites[j], "_", colnames(lscc_scores$KSEA)) + lscc_act_siteZ <- c(lscc_act_siteZ, sites2add) + } +} + +luad_ksea_act_site_kins <- intersect(rownames(luad_scores$KSEA), sub("x.*","", rownames(luad_phos_kins1))) +luad_ksea_act_site_kins <- intersect(luad_ksea_act_site_kins, rownames(luad_prot_kins)) +luad_kseaZ <- numeric() +luad_act_siteZ <- numeric() +luad_kseaZ_prot <- numeric() +luad_protZ <- numeric() +for(i in 1:length(luad_ksea_act_site_kins)){ + act_sites <- rownames(luad_phos_kins1)[sub("x.*","",rownames(luad_phos_kins1))==luad_ksea_act_site_kins[i]] + scale(as.numeric(luad_scores$KSEA[luad_ksea_act_site_kins[i], ])) -> scores2add + names(scores2add) <- paste0(luad_ksea_act_site_kins[i], "_", colnames(luad_scores$KSEA)) + luad_kseaZ_prot <- c(luad_kseaZ_prot, scores2add) + scale(as.numeric(luad_prot_kins[luad_ksea_act_site_kins[i], colnames(luad_scores$KSEA)])) -> prots2add + names(prots2add) <- paste0(luad_ksea_act_site_kins[i], "_", colnames(luad_scores$KSEA)) + luad_protZ <- c(luad_protZ, prots2add) + for(j in 1:length(act_sites)){ + scale(as.numeric(luad_scores$KSEA[luad_ksea_act_site_kins[i], ])) -> scores2add + names(scores2add) <- paste0(act_sites[j], "_", colnames(luad_scores$KSEA)) + luad_kseaZ <- c(luad_kseaZ, scores2add) + scale(as.numeric(luad_phos_kins1[act_sites[j], colnames(luad_scores$KSEA)])) -> sites2add + names(sites2add) <- paste0(act_sites[j], "_", colnames(luad_scores$KSEA)) + luad_act_siteZ <- c(luad_act_siteZ, sites2add) + } +} + +ov_ksea_act_site_kins <- intersect(rownames(ov_scores$KSEA), sub("x.*","", rownames(ov_phos_kins1))) +ov_ksea_act_site_kins <- intersect(ov_ksea_act_site_kins, rownames(ov_prot_kins)) +ov_kseaZ <- numeric() +ov_act_siteZ <- numeric() +ov_kseaZ_prot <- numeric() +ov_protZ <- numeric() +for(i in 1:length(ov_ksea_act_site_kins)){ + act_sites <- rownames(ov_phos_kins1)[sub("x.*","",rownames(ov_phos_kins1))==ov_ksea_act_site_kins[i]] + scale(as.numeric(ov_scores$KSEA[ov_ksea_act_site_kins[i], ])) -> scores2add + names(scores2add) <- paste0(ov_ksea_act_site_kins[i], "_", colnames(ov_scores$KSEA)) + ov_kseaZ_prot <- c(ov_kseaZ_prot, scores2add) + scale(as.numeric(ov_prot_kins[ov_ksea_act_site_kins[i], colnames(ov_scores$KSEA)])) -> prots2add + names(prots2add) <- paste0(ov_ksea_act_site_kins[i], "_", colnames(ov_scores$KSEA)) + ov_protZ <- c(ov_protZ, prots2add) + for(j in 1:length(act_sites)){ + scale(as.numeric(ov_scores$KSEA[ov_ksea_act_site_kins[i], ])) -> scores2add + names(scores2add) <- paste0(act_sites[j], "_", colnames(ov_scores$KSEA)) + ov_kseaZ <- c(ov_kseaZ, scores2add) + scale(as.numeric(ov_phos_kins1[act_sites[j], colnames(ov_scores$KSEA)])) -> sites2add + names(sites2add) <- paste0(act_sites[j], "_", colnames(ov_scores$KSEA)) + ov_act_siteZ <- c(ov_act_siteZ, sites2add) + } +} + +pdac_ksea_act_site_kins <- intersect(rownames(pdac_scores$KSEA), sub("x.*","", rownames(pdac_phos_kins1))) +pdac_ksea_act_site_kins <- intersect(pdac_ksea_act_site_kins, rownames(pdac_prot_kins)) +pdac_kseaZ <- numeric() +pdac_act_siteZ <- numeric() +pdac_kseaZ_prot <- numeric() +pdac_protZ <- numeric() +for(i in 1:length(pdac_ksea_act_site_kins)){ + act_sites <- rownames(pdac_phos_kins1)[sub("x.*","",rownames(pdac_phos_kins1))==pdac_ksea_act_site_kins[i]] + scale(as.numeric(pdac_scores$KSEA[pdac_ksea_act_site_kins[i], ])) -> scores2add + names(scores2add) <- paste0(pdac_ksea_act_site_kins[i], "_", colnames(pdac_scores$KSEA)) + pdac_kseaZ_prot <- c(pdac_kseaZ_prot, scores2add) + scale(as.numeric(pdac_prot_kins[pdac_ksea_act_site_kins[i], colnames(pdac_scores$KSEA)])) -> prots2add + names(prots2add) <- paste0(pdac_ksea_act_site_kins[i], "_", colnames(pdac_scores$KSEA)) + pdac_protZ <- c(pdac_protZ, prots2add) + for(j in 1:length(act_sites)){ + scale(as.numeric(pdac_scores$KSEA[pdac_ksea_act_site_kins[i], ])) -> scores2add + names(scores2add) <- paste0(act_sites[j], "_", colnames(pdac_scores$KSEA)) + pdac_kseaZ <- c(pdac_kseaZ, scores2add) + scale(as.numeric(pdac_phos_kins1[act_sites[j], colnames(pdac_scores$KSEA)])) -> sites2add + names(sites2add) <- paste0(act_sites[j], "_", colnames(pdac_scores$KSEA)) + pdac_act_siteZ <- c(pdac_act_siteZ, sites2add) + } +} + +ucec_ksea_act_site_kins <- intersect(rownames(ucec_scores$KSEA), sub("x.*","", rownames(ucec_phos_kins1))) +ucec_ksea_act_site_kins <- intersect(ucec_ksea_act_site_kins, rownames(ucec_prot_kins)) +ucec_kseaZ <- numeric() +ucec_act_siteZ <- numeric() +ucec_kseaZ_prot <- numeric() +ucec_protZ <- numeric() +for(i in 1:length(ucec_ksea_act_site_kins)){ + act_sites <- rownames(ucec_phos_kins1)[sub("x.*","",rownames(ucec_phos_kins1))==ucec_ksea_act_site_kins[i]] + scale(as.numeric(ucec_scores$KSEA[ucec_ksea_act_site_kins[i], ])) -> scores2add + names(scores2add) <- paste0(ucec_ksea_act_site_kins[i], "_", colnames(ucec_scores$KSEA)) + ucec_kseaZ_prot <- c(ucec_kseaZ_prot, scores2add) + scale(as.numeric(ucec_prot_kins[ucec_ksea_act_site_kins[i], colnames(ucec_scores$KSEA)])) -> prots2add + names(prots2add) <- paste0(ucec_ksea_act_site_kins[i], "_", colnames(ucec_scores$KSEA)) + ucec_protZ <- c(ucec_protZ, prots2add) + for(j in 1:length(act_sites)){ + scale(as.numeric(ucec_scores$KSEA[ucec_ksea_act_site_kins[i], ])) -> scores2add + names(scores2add) <- paste0(act_sites[j], "_", colnames(ucec_scores$KSEA)) + ucec_kseaZ <- c(ucec_kseaZ, scores2add) + scale(as.numeric(ucec_phos_kins1[act_sites[j], colnames(ucec_scores$KSEA)])) -> sites2add + names(sites2add) <- paste0(act_sites[j], "_", colnames(ucec_scores$KSEA)) + ucec_act_siteZ <- c(ucec_act_siteZ, sites2add) + } +} +``` + +```{r} +viocols <- as.character(sapply(clpal, rep, times=2)) +viocols <- adjust_transparency(viocols, 0.6) +viocols2 <- viocols +viocols4 <- as.character(sapply(clpal, function(x) c("white", x))) +for(i in 1:10){ + viocols2[i * 2 - 1] <- adjust_transparency(viocols2[i * 2 - 1], 0.5) +} +viocols3 <- adjust_transparency(viocols, 0.7) + +viocols5 <- character() +for(i in 1:(length(viocols2)/2)){ + viocols5 <- c(viocols5, viocols2[(2*i - 1):(2*i)], "white") +} + +vioplot(brca_kseaZ[(brca_act_siteZ < -1.645) & !is.na(brca_act_siteZ) & !is.na(brca_kseaZ)], brca_kseaZ[(brca_act_siteZ > 1.645) & !is.na(brca_act_siteZ) & !is.na(brca_kseaZ)], 0, ccrcc_kseaZ[(ccrcc_act_siteZ < -1.645) & !is.na(ccrcc_act_siteZ) & !is.na(ccrcc_kseaZ)], ccrcc_kseaZ[(ccrcc_act_siteZ > 1.645) & !is.na(ccrcc_act_siteZ) & !is.na(ccrcc_kseaZ)], 0, coad_kseaZ[(coad_act_siteZ < -1.645) & !is.na(coad_act_siteZ) & !is.na(coad_kseaZ)], coad_kseaZ[(coad_act_siteZ > 1.645) & !is.na(coad_act_siteZ) & !is.na(coad_kseaZ)], 0, gbm_kseaZ[(gbm_act_siteZ < -1.645) & !is.na(gbm_act_siteZ) & !is.na(gbm_kseaZ)], gbm_kseaZ[(gbm_act_siteZ > 1.645) & !is.na(gbm_act_siteZ) & !is.na(gbm_kseaZ)], 0, hnscc_kseaZ[(hnscc_act_siteZ < -1.645) & !is.na(hnscc_act_siteZ) & !is.na(hnscc_kseaZ)], hnscc_kseaZ[(hnscc_act_siteZ > 1.645) & !is.na(hnscc_act_siteZ) & !is.na(hnscc_kseaZ)], 0, lscc_kseaZ[(lscc_act_siteZ < -1.645) & !is.na(lscc_act_siteZ) & !is.na(lscc_kseaZ)], lscc_kseaZ[(lscc_act_siteZ > 1.645) & !is.na(lscc_act_siteZ) & !is.na(lscc_kseaZ)], 0, luad_kseaZ[(luad_act_siteZ < -1.645) & !is.na(luad_act_siteZ) & !is.na(luad_kseaZ)], luad_kseaZ[(luad_act_siteZ > 1.645) & !is.na(luad_act_siteZ) & !is.na(luad_kseaZ)], 0, ov_kseaZ[(ov_act_siteZ < -1.645) & !is.na(ov_act_siteZ) & !is.na(ov_kseaZ)], ov_kseaZ[(ov_act_siteZ > 1.645) & !is.na(ov_act_siteZ) & !is.na(ov_kseaZ)], 0, pdac_kseaZ[(pdac_act_siteZ < -1.645) & !is.na(pdac_act_siteZ) & !is.na(pdac_kseaZ)], pdac_kseaZ[(pdac_act_siteZ > 1.645) & !is.na(pdac_act_siteZ) & !is.na(pdac_kseaZ)], 0, ucec_kseaZ[(ucec_act_siteZ < -1.645) & !is.na(ucec_act_siteZ) & !is.na(ucec_kseaZ)], ucec_kseaZ[(ucec_act_siteZ > 1.645) & !is.na(ucec_act_siteZ) & !is.na(ucec_kseaZ)], 0, main = "activating site outliers", ylab = "KSEA Z-score", names = c("BRCA", "", "", "CCRCC", "", "", "COAD", "", "", "GBM", "", "", "HNSCC", "", "", "LSCC", "", "", "LUAD", "", "", "OV", "", "", "PDAC", "", "", "UCEC", "", ""), cex.names = 0.3, colMed = rep(c("blue", "red", "white"), times=10), col=viocols5, border = viocols5, lineCol = viocols5, rectCol = viocols5, ylim=c(-3,4)) +abline(h=0) + +vioplot(brca_kseaZ_prot[(brca_protZ < -1.645) & !is.na(brca_protZ) & !is.na(brca_kseaZ_prot)], brca_kseaZ_prot[(brca_protZ > 1.645) & !is.na(brca_protZ) & !is.na(brca_kseaZ_prot)], 0, ccrcc_kseaZ_prot[(ccrcc_protZ < -1.645) & !is.na(ccrcc_protZ) & !is.na(ccrcc_kseaZ_prot)], ccrcc_kseaZ_prot[(ccrcc_protZ > 1.645) & !is.na(ccrcc_protZ) & !is.na(ccrcc_kseaZ_prot)], 0, coad_kseaZ_prot[(coad_protZ < -1.645) & !is.na(coad_protZ) & !is.na(coad_kseaZ_prot)], coad_kseaZ_prot[(coad_protZ > 1.645) & !is.na(coad_protZ) & !is.na(coad_kseaZ_prot)], 0, gbm_kseaZ_prot[(gbm_protZ < -1.645) & !is.na(gbm_protZ) & !is.na(gbm_kseaZ_prot)], gbm_kseaZ_prot[(gbm_protZ > 1.645) & !is.na(gbm_protZ) & !is.na(gbm_kseaZ_prot)], 0, hnscc_kseaZ_prot[(hnscc_protZ < -1.645) & !is.na(hnscc_protZ) & !is.na(hnscc_kseaZ_prot)], hnscc_kseaZ_prot[(hnscc_protZ > 1.645) & !is.na(hnscc_protZ) & !is.na(hnscc_kseaZ_prot)], 0, lscc_kseaZ_prot[(lscc_protZ < -1.645) & !is.na(lscc_protZ) & !is.na(lscc_kseaZ_prot)], lscc_kseaZ_prot[(lscc_protZ > 1.645) & !is.na(lscc_protZ) & !is.na(lscc_kseaZ_prot)], 0, luad_kseaZ_prot[(luad_protZ < -1.645) & !is.na(luad_protZ) & !is.na(luad_kseaZ_prot)], luad_kseaZ_prot[(luad_protZ > 1.645) & !is.na(luad_protZ) & !is.na(luad_kseaZ_prot)], 0, ov_kseaZ_prot[(ov_protZ < -1.645) & !is.na(ov_protZ) & !is.na(ov_kseaZ_prot)], ov_kseaZ_prot[(ov_protZ > 1.645) & !is.na(ov_protZ) & !is.na(ov_kseaZ_prot)], 0, pdac_kseaZ_prot[(pdac_protZ < -1.645) & !is.na(pdac_protZ) & !is.na(pdac_kseaZ_prot)], pdac_kseaZ_prot[(pdac_protZ > 1.645) & !is.na(pdac_protZ) & !is.na(pdac_kseaZ_prot)], 0, ucec_kseaZ_prot[(ucec_protZ < -1.645) & !is.na(ucec_protZ) & !is.na(ucec_kseaZ_prot)], ucec_kseaZ_prot[(ucec_protZ > 1.645) & !is.na(ucec_protZ) & !is.na(ucec_kseaZ_prot)], 0, main = "protein outliers", ylab = "KSEA Z-score", names = c("BRCA", "", "", "CCRCC", "", "", "COAD", "", "", "GBM", "", "", "HNSCC", "", "", "LSCC", "", "", "LUAD", "", "", "OV", "", "", "PDAC", "", "", "UCEC", "", ""), cex.names = 0.3, colMed = rep(c("blue", "red", "white"), times=10), col=viocols5, border = viocols5, lineCol = viocols5, rectCol = viocols5, ylim=c(-3,4)) +abline(h=0) +``` + +```{r} +vioplot(brca_kseaZ[(brca_act_siteZ < -0.842) & !is.na(brca_act_siteZ) & !is.na(brca_kseaZ)], brca_kseaZ[(brca_act_siteZ > 0.842) & !is.na(brca_act_siteZ) & !is.na(brca_kseaZ)], 0, ccrcc_kseaZ[(ccrcc_act_siteZ < -0.842) & !is.na(ccrcc_act_siteZ) & !is.na(ccrcc_kseaZ)], ccrcc_kseaZ[(ccrcc_act_siteZ > 0.842) & !is.na(ccrcc_act_siteZ) & !is.na(ccrcc_kseaZ)], 0, coad_kseaZ[(coad_act_siteZ < -0.842) & !is.na(coad_act_siteZ) & !is.na(coad_kseaZ)], coad_kseaZ[(coad_act_siteZ > 0.842) & !is.na(coad_act_siteZ) & !is.na(coad_kseaZ)], 0, gbm_kseaZ[(gbm_act_siteZ < -0.842) & !is.na(gbm_act_siteZ) & !is.na(gbm_kseaZ)], gbm_kseaZ[(gbm_act_siteZ > 0.842) & !is.na(gbm_act_siteZ) & !is.na(gbm_kseaZ)], 0, hnscc_kseaZ[(hnscc_act_siteZ < -0.842) & !is.na(hnscc_act_siteZ) & !is.na(hnscc_kseaZ)], hnscc_kseaZ[(hnscc_act_siteZ > 0.842) & !is.na(hnscc_act_siteZ) & !is.na(hnscc_kseaZ)], 0, lscc_kseaZ[(lscc_act_siteZ < -0.842) & !is.na(lscc_act_siteZ) & !is.na(lscc_kseaZ)], lscc_kseaZ[(lscc_act_siteZ > 0.842) & !is.na(lscc_act_siteZ) & !is.na(lscc_kseaZ)], 0, luad_kseaZ[(luad_act_siteZ < -0.842) & !is.na(luad_act_siteZ) & !is.na(luad_kseaZ)], luad_kseaZ[(luad_act_siteZ > 0.842) & !is.na(luad_act_siteZ) & !is.na(luad_kseaZ)], 0, ov_kseaZ[(ov_act_siteZ < -0.842) & !is.na(ov_act_siteZ) & !is.na(ov_kseaZ)], ov_kseaZ[(ov_act_siteZ > 0.842) & !is.na(ov_act_siteZ) & !is.na(ov_kseaZ)], 0, pdac_kseaZ[(pdac_act_siteZ < -0.842) & !is.na(pdac_act_siteZ) & !is.na(pdac_kseaZ)], pdac_kseaZ[(pdac_act_siteZ > 0.842) & !is.na(pdac_act_siteZ) & !is.na(pdac_kseaZ)], 0, ucec_kseaZ[(ucec_act_siteZ < -0.842) & !is.na(ucec_act_siteZ) & !is.na(ucec_kseaZ)], ucec_kseaZ[(ucec_act_siteZ > 0.842) & !is.na(ucec_act_siteZ) & !is.na(ucec_kseaZ)], 0, main = "activating site outliers", ylab = "KSEA Z-score", names = c("BRCA", "", "", "CCRCC", "", "", "COAD", "", "", "GBM", "", "", "HNSCC", "", "", "LSCC", "", "", "LUAD", "", "", "OV", "", "", "PDAC", "", "", "UCEC", "", ""), cex.names = 0.3, colMed = rep(c("blue", "red", "white"), times=10), col=viocols5, border = viocols5, lineCol = viocols5, rectCol = viocols5, ylim=c(-3,4)) +abline(h=0) + +vioplot(brca_kseaZ_prot[(brca_protZ < -0.842) & !is.na(brca_protZ) & !is.na(brca_kseaZ_prot)], brca_kseaZ_prot[(brca_protZ > 0.842) & !is.na(brca_protZ) & !is.na(brca_kseaZ_prot)], 0, ccrcc_kseaZ_prot[(ccrcc_protZ < -0.842) & !is.na(ccrcc_protZ) & !is.na(ccrcc_kseaZ_prot)], ccrcc_kseaZ_prot[(ccrcc_protZ > 0.842) & !is.na(ccrcc_protZ) & !is.na(ccrcc_kseaZ_prot)], 0, coad_kseaZ_prot[(coad_protZ < -0.842) & !is.na(coad_protZ) & !is.na(coad_kseaZ_prot)], coad_kseaZ_prot[(coad_protZ > 0.842) & !is.na(coad_protZ) & !is.na(coad_kseaZ_prot)], 0, gbm_kseaZ_prot[(gbm_protZ < -0.842) & !is.na(gbm_protZ) & !is.na(gbm_kseaZ_prot)], gbm_kseaZ_prot[(gbm_protZ > 0.842) & !is.na(gbm_protZ) & !is.na(gbm_kseaZ_prot)], 0, hnscc_kseaZ_prot[(hnscc_protZ < -0.842) & !is.na(hnscc_protZ) & !is.na(hnscc_kseaZ_prot)], hnscc_kseaZ_prot[(hnscc_protZ > 0.842) & !is.na(hnscc_protZ) & !is.na(hnscc_kseaZ_prot)], 0, lscc_kseaZ_prot[(lscc_protZ < -0.842) & !is.na(lscc_protZ) & !is.na(lscc_kseaZ_prot)], lscc_kseaZ_prot[(lscc_protZ > 0.842) & !is.na(lscc_protZ) & !is.na(lscc_kseaZ_prot)], 0, luad_kseaZ_prot[(luad_protZ < -0.842) & !is.na(luad_protZ) & !is.na(luad_kseaZ_prot)], luad_kseaZ_prot[(luad_protZ > 0.842) & !is.na(luad_protZ) & !is.na(luad_kseaZ_prot)], 0, ov_kseaZ_prot[(ov_protZ < -0.842) & !is.na(ov_protZ) & !is.na(ov_kseaZ_prot)], ov_kseaZ_prot[(ov_protZ > 0.842) & !is.na(ov_protZ) & !is.na(ov_kseaZ_prot)], 0, pdac_kseaZ_prot[(pdac_protZ < -0.842) & !is.na(pdac_protZ) & !is.na(pdac_kseaZ_prot)], pdac_kseaZ_prot[(pdac_protZ > 0.842) & !is.na(pdac_protZ) & !is.na(pdac_kseaZ_prot)], 0, ucec_kseaZ_prot[(ucec_protZ < -0.842) & !is.na(ucec_protZ) & !is.na(ucec_kseaZ_prot)], ucec_kseaZ_prot[(ucec_protZ > 0.842) & !is.na(ucec_protZ) & !is.na(ucec_kseaZ_prot)], 0, main = "protein outliers", ylab = "KSEA Z-score", names = c("BRCA", "", "", "CCRCC", "", "", "COAD", "", "", "GBM", "", "", "HNSCC", "", "", "LSCC", "", "", "LUAD", "", "", "OV", "", "", "PDAC", "", "", "UCEC", "", ""), cex.names = 0.3, colMed = rep(c("blue", "red", "white"), times=10), col=viocols5, border = viocols5, lineCol = viocols5, rectCol = viocols5, ylim=c(-3,4)) +abline(h=0) +``` + +for each kinase activating site with matching protein data, filter to kinases with both high and low samples for both measurements and calculate difference between high and low samples; determine if there is a significant difference between sites and proteins for these comparisons +```{r} +brca_act_sites <- unique(sub("_.*", "", names(brca_act_siteZ))) +brca_ksea_delta_act <- numeric() +brca_ksea_delta_prot <- numeric() +for(i in 1:length(brca_act_sites)){ + actZ <- brca_act_siteZ[sub("_.*", "", names(brca_act_siteZ))==brca_act_sites[i]] + actZ_hi <- actZ[actZ > 1.645 & !is.na(actZ)] + actZ_lo <- actZ[actZ < -1.645 & !is.na(actZ)] + if((length(actZ_hi) > 0) & (length(actZ_lo) > 0)){ + protZ <- brca_protZ[sub("_.*", "", names(brca_protZ))==sub("x.*","", brca_act_sites[i])] + protZ_hi <- protZ[protZ > 1.645 & !is.na(protZ)] + protZ_lo <- protZ[protZ < -1.645 & !is.na(protZ)] + if((length(protZ_hi) > 0) & (length(protZ_lo) > 0)){ + ksea_actZ_hi <- brca_kseaZ[names(actZ_hi)] + ksea_actZ_hi <- ksea_actZ_hi[!is.na(ksea_actZ_hi)] + ksea_actZ_lo <- brca_kseaZ[names(actZ_lo)] + ksea_actZ_lo <- ksea_actZ_lo[!is.na(ksea_actZ_lo)] + ksea_protZ_hi <- brca_kseaZ_prot[names(protZ_hi)] + ksea_protZ_hi <- ksea_protZ_hi[!is.na(ksea_protZ_hi)] + ksea_protZ_lo <- brca_kseaZ_prot[names(protZ_lo)] + ksea_protZ_lo <- ksea_protZ_lo[!is.na(ksea_protZ_lo)] + if((length(ksea_actZ_hi) > 0) & (length(ksea_actZ_lo) > 0) & (length(ksea_protZ_hi) > 0) & length(ksea_protZ_lo) > 0){ + toadd <- mean(ksea_actZ_hi) - mean(ksea_actZ_lo) + names(toadd) <- brca_act_sites[i] + brca_ksea_delta_act <- c(brca_ksea_delta_act, toadd) + toadd <- mean(ksea_protZ_hi) - mean(ksea_protZ_lo) + names(toadd) <- sub("x.*","", brca_act_sites[i]) + brca_ksea_delta_prot <- c(brca_ksea_delta_prot, toadd) + } + } + } +} +brca_ksea_delta_prot <- brca_ksea_delta_prot[!duplicated(names(brca_ksea_delta_prot))] +wilcox.test(brca_ksea_delta_act, brca_ksea_delta_prot, alternative = "greater") +``` + +convert to function for rapid deployment to other cancer types +```{r} +testKSEAdiff <- function(act_siteZ, protZ_ip, kseaZ, kseaZ_prot){ + act_sites <- unique(sub("_.*", "", names(act_siteZ))) + ksea_delta_act <- numeric() + ksea_delta_prot <- numeric() + for(i in 1:length(act_sites)){ + actZ <- act_siteZ[sub("_.*", "", names(act_siteZ))==act_sites[i]] + actZ_hi <- actZ[actZ > 1.645 & !is.na(actZ)] + actZ_lo <- actZ[actZ < -1.645 & !is.na(actZ)] + if((length(actZ_hi) > 0) & (length(actZ_lo) > 0)){ + protZ <- protZ_ip[sub("_.*", "", names(protZ_ip))==sub("x.*","", act_sites[i])] + protZ_hi <- protZ[protZ > 1.645 & !is.na(protZ)] + protZ_lo <- protZ[protZ < -1.645 & !is.na(protZ)] + if((length(protZ_hi) > 0) & (length(protZ_lo) > 0)){ + ksea_actZ_hi <- kseaZ[names(actZ_hi)] + ksea_actZ_hi <- ksea_actZ_hi[!is.na(ksea_actZ_hi)] + ksea_actZ_lo <- kseaZ[names(actZ_lo)] + ksea_actZ_lo <- ksea_actZ_lo[!is.na(ksea_actZ_lo)] + ksea_protZ_hi <- kseaZ_prot[names(protZ_hi)] + ksea_protZ_hi <- ksea_protZ_hi[!is.na(ksea_protZ_hi)] + ksea_protZ_lo <- kseaZ_prot[names(protZ_lo)] + ksea_protZ_lo <- ksea_protZ_lo[!is.na(ksea_protZ_lo)] + if((length(ksea_actZ_hi) > 0) & (length(ksea_actZ_lo) > 0) & (length(ksea_protZ_hi) > 0) & length(ksea_protZ_lo) > 0){ + toadd <- mean(ksea_actZ_hi) - mean(ksea_actZ_lo) + names(toadd) <- act_sites[i] + ksea_delta_act <- c(ksea_delta_act, toadd) + toadd <- mean(ksea_protZ_hi) - mean(ksea_protZ_lo) + names(toadd) <- sub("x.*","", act_sites[i]) + ksea_delta_prot <- c(ksea_delta_prot, toadd) + } + } + } + } + ksea_delta_prot_paired <- ksea_delta_prot + ksea_delta_prot <- ksea_delta_prot[!duplicated(names(ksea_delta_prot))] + op <- list(ksea_delta_act, ksea_delta_prot, wilcox.test(ksea_delta_act, ksea_delta_prot, alternative = "greater"), ksea_delta_prot_paired, wilcox.test(ksea_delta_act, ksea_delta_prot_paired, alternative = "greater", paired=T)) + names(op) <- c("KSEA_difference_for_activating_sites", "KSEA_difference_for_protein", "Wilcoxon_test_results", "KSEA_difference_for_protein_paired", "paired_Wilcoxon_test_results") + return(op) +} +``` + +test on BRCA and apply to rest +```{r} +brca_test2 <- testKSEAdiff(brca_act_siteZ, brca_protZ, brca_kseaZ, brca_kseaZ_prot) +brca_test2$KSEA_difference_for_activating_sites == brca_ksea_delta_act +brca_test2$KSEA_different_for_protein == brca_ksea_delta_prot +brca_test2$Wilcoxon_test_results + +ccrcc_diff <- testKSEAdiff(ccrcc_act_siteZ, ccrcc_protZ, ccrcc_kseaZ, ccrcc_kseaZ_prot) +ccrcc_diff$Wilcoxon_test_results + +coad_diff <- testKSEAdiff(coad_act_siteZ, coad_protZ, coad_kseaZ, coad_kseaZ_prot) +coad_diff$Wilcoxon_test_results + +gbm_diff <- testKSEAdiff(gbm_act_siteZ, gbm_protZ, gbm_kseaZ, gbm_kseaZ_prot) +gbm_diff$Wilcoxon_test_results + +hnscc_diff <- testKSEAdiff(hnscc_act_siteZ, hnscc_protZ, hnscc_kseaZ, hnscc_kseaZ_prot) +hnscc_diff$Wilcoxon_test_results + +lscc_diff <- testKSEAdiff(lscc_act_siteZ, lscc_protZ, lscc_kseaZ, lscc_kseaZ_prot) +lscc_diff$Wilcoxon_test_results + +luad_diff <- testKSEAdiff(luad_act_siteZ, luad_protZ, luad_kseaZ, luad_kseaZ_prot) +luad_diff$Wilcoxon_test_results + +ov_diff <- testKSEAdiff(ov_act_siteZ, ov_protZ, ov_kseaZ, ov_kseaZ_prot) +ov_diff$Wilcoxon_test_results + +pdac_diff <- testKSEAdiff(pdac_act_siteZ, pdac_protZ, pdac_kseaZ, pdac_kseaZ_prot) +pdac_diff$Wilcoxon_test_results + +ucec_diff <- testKSEAdiff(ucec_act_siteZ, ucec_protZ, ucec_kseaZ, ucec_kseaZ_prot) +ucec_diff$Wilcoxon_test_results +``` + +```{r} +wilcox.test(c(brca_test2$KSEA_difference_for_activating_sites, ccrcc_diff$KSEA_difference_for_activating_sites, coad_diff$KSEA_difference_for_activating_sites, gbm_diff$KSEA_difference_for_activating_sites, hnscc_diff$KSEA_difference_for_activating_sites, lscc_diff$KSEA_difference_for_activating_sites, luad_diff$KSEA_difference_for_activating_sites, ov_diff$KSEA_difference_for_activating_sites, pdac_diff$KSEA_difference_for_activating_sites, ucec_diff$KSEA_difference_for_activating_sites), c(brca_test2$KSEA_difference_for_protein, ccrcc_diff$KSEA_difference_for_protein, coad_diff$KSEA_difference_for_protein, gbm_diff$KSEA_difference_for_protein, hnscc_diff$KSEA_difference_for_protein, lscc_diff$KSEA_difference_for_protein, luad_diff$KSEA_difference_for_protein, ov_diff$KSEA_difference_for_protein, pdac_diff$KSEA_difference_for_protein, ucec_diff$KSEA_difference_for_protein)) + +wilcox.test(c(brca_test2$KSEA_difference_for_activating_sites, ccrcc_diff$KSEA_difference_for_activating_sites, coad_diff$KSEA_difference_for_activating_sites, gbm_diff$KSEA_difference_for_activating_sites, hnscc_diff$KSEA_difference_for_activating_sites, lscc_diff$KSEA_difference_for_activating_sites, luad_diff$KSEA_difference_for_activating_sites, ov_diff$KSEA_difference_for_activating_sites, pdac_diff$KSEA_difference_for_activating_sites, ucec_diff$KSEA_difference_for_activating_sites), c(brca_test2$KSEA_difference_for_protein_paired, ccrcc_diff$KSEA_difference_for_protein_paired, coad_diff$KSEA_difference_for_protein_paired, gbm_diff$KSEA_difference_for_protein_paired, hnscc_diff$KSEA_difference_for_protein_paired, lscc_diff$KSEA_difference_for_protein_paired, luad_diff$KSEA_difference_for_protein_paired, ov_diff$KSEA_difference_for_protein_paired, pdac_diff$KSEA_difference_for_protein_paired, ucec_diff$KSEA_difference_for_protein_paired), paired = T) + +vioplot(c(brca_test2$KSEA_difference_for_activating_sites, ccrcc_diff$KSEA_difference_for_activating_sites, coad_diff$KSEA_difference_for_activating_sites, gbm_diff$KSEA_difference_for_activating_sites, hnscc_diff$KSEA_difference_for_activating_sites, lscc_diff$KSEA_difference_for_activating_sites, luad_diff$KSEA_difference_for_activating_sites, ov_diff$KSEA_difference_for_activating_sites, pdac_diff$KSEA_difference_for_activating_sites, ucec_diff$KSEA_difference_for_activating_sites), c(brca_test2$KSEA_difference_for_protein, ccrcc_diff$KSEA_difference_for_protein, coad_diff$KSEA_difference_for_protein, gbm_diff$KSEA_difference_for_protein, hnscc_diff$KSEA_difference_for_protein, lscc_diff$KSEA_difference_for_protein, luad_diff$KSEA_difference_for_protein, ov_diff$KSEA_difference_for_protein, pdac_diff$KSEA_difference_for_protein, ucec_diff$KSEA_difference_for_protein)) +``` + +```{r} +brca_test2$KSEA_difference_for_activating_sites_mean <- numeric() +for(i in 1:length(brca_test2$KSEA_difference_for_protein)){ + brca_test2$KSEA_difference_for_activating_sites_mean <- c(brca_test2$KSEA_difference_for_activating_sites_mean, mean(brca_test2$KSEA_difference_for_activating_sites[sub("x.*", "", names(brca_test2$KSEA_difference_for_activating_sites)) == names(brca_test2$KSEA_difference_for_protein)[i]])) +} +names(brca_test2$KSEA_difference_for_activating_sites_mean) <- names(brca_test2$KSEA_difference_for_protein) + +wilcox.test(brca_test2$KSEA_difference_for_activating_sites_mean, brca_test2$KSEA_difference_for_protein, alternative = "greater") +wilcox.test(brca_test2$KSEA_difference_for_activating_sites_mean, brca_test2$KSEA_difference_for_protein, alternative = "greater", paired = T) + +ccrcc_diff$KSEA_difference_for_activating_sites_mean <- numeric() +for(i in 1:length(ccrcc_diff$KSEA_difference_for_protein)){ + ccrcc_diff$KSEA_difference_for_activating_sites_mean <- c(ccrcc_diff$KSEA_difference_for_activating_sites_mean, mean(ccrcc_diff$KSEA_difference_for_activating_sites[sub("x.*", "", names(ccrcc_diff$KSEA_difference_for_activating_sites)) == names(ccrcc_diff$KSEA_difference_for_protein)[i]])) +} +names(ccrcc_diff$KSEA_difference_for_activating_sites_mean) <- names(ccrcc_diff$KSEA_difference_for_protein) + +wilcox.test(ccrcc_diff$KSEA_difference_for_activating_sites_mean, ccrcc_diff$KSEA_difference_for_protein, alternative = "greater") +wilcox.test(ccrcc_diff$KSEA_difference_for_activating_sites_mean, ccrcc_diff$KSEA_difference_for_protein, alternative = "greater", paired = T) + +coad_diff$KSEA_difference_for_activating_sites_mean <- numeric() +for(i in 1:length(coad_diff$KSEA_difference_for_protein)){ + coad_diff$KSEA_difference_for_activating_sites_mean <- c(coad_diff$KSEA_difference_for_activating_sites_mean, mean(coad_diff$KSEA_difference_for_activating_sites[sub("x.*", "", names(coad_diff$KSEA_difference_for_activating_sites)) == names(coad_diff$KSEA_difference_for_protein)[i]])) +} +names(coad_diff$KSEA_difference_for_activating_sites_mean) <- names(coad_diff$KSEA_difference_for_protein) + +wilcox.test(coad_diff$KSEA_difference_for_activating_sites_mean, coad_diff$KSEA_difference_for_protein, alternative = "greater") +wilcox.test(coad_diff$KSEA_difference_for_activating_sites_mean, coad_diff$KSEA_difference_for_protein, alternative = "greater", paired = T) + +gbm_diff$KSEA_difference_for_activating_sites_mean <- numeric() +for(i in 1:length(gbm_diff$KSEA_difference_for_protein)){ + gbm_diff$KSEA_difference_for_activating_sites_mean <- c(gbm_diff$KSEA_difference_for_activating_sites_mean, mean(gbm_diff$KSEA_difference_for_activating_sites[sub("x.*", "", names(gbm_diff$KSEA_difference_for_activating_sites)) == names(gbm_diff$KSEA_difference_for_protein)[i]])) +} +names(gbm_diff$KSEA_difference_for_activating_sites_mean) <- names(gbm_diff$KSEA_difference_for_protein) + +wilcox.test(gbm_diff$KSEA_difference_for_activating_sites_mean, gbm_diff$KSEA_difference_for_protein, alternative = "greater") +wilcox.test(gbm_diff$KSEA_difference_for_activating_sites_mean, gbm_diff$KSEA_difference_for_protein, alternative = "greater", paired = T) + +hnscc_diff$KSEA_difference_for_activating_sites_mean <- numeric() +for(i in 1:length(hnscc_diff$KSEA_difference_for_protein)){ + hnscc_diff$KSEA_difference_for_activating_sites_mean <- c(hnscc_diff$KSEA_difference_for_activating_sites_mean, mean(hnscc_diff$KSEA_difference_for_activating_sites[sub("x.*", "", names(hnscc_diff$KSEA_difference_for_activating_sites)) == names(hnscc_diff$KSEA_difference_for_protein)[i]])) +} +names(hnscc_diff$KSEA_difference_for_activating_sites_mean) <- names(hnscc_diff$KSEA_difference_for_protein) + +wilcox.test(hnscc_diff$KSEA_difference_for_activating_sites_mean, hnscc_diff$KSEA_difference_for_protein, alternative = "greater") +wilcox.test(hnscc_diff$KSEA_difference_for_activating_sites_mean, hnscc_diff$KSEA_difference_for_protein, alternative = "greater", paired = T) + +lscc_diff$KSEA_difference_for_activating_sites_mean <- numeric() +for(i in 1:length(lscc_diff$KSEA_difference_for_protein)){ + lscc_diff$KSEA_difference_for_activating_sites_mean <- c(lscc_diff$KSEA_difference_for_activating_sites_mean, mean(lscc_diff$KSEA_difference_for_activating_sites[sub("x.*", "", names(lscc_diff$KSEA_difference_for_activating_sites)) == names(lscc_diff$KSEA_difference_for_protein)[i]])) +} +names(lscc_diff$KSEA_difference_for_activating_sites_mean) <- names(lscc_diff$KSEA_difference_for_protein) + +wilcox.test(lscc_diff$KSEA_difference_for_activating_sites_mean, lscc_diff$KSEA_difference_for_protein, alternative = "greater") +wilcox.test(lscc_diff$KSEA_difference_for_activating_sites_mean, lscc_diff$KSEA_difference_for_protein, alternative = "greater", paired = T) + +luad_diff$KSEA_difference_for_activating_sites_mean <- numeric() +for(i in 1:length(luad_diff$KSEA_difference_for_protein)){ + luad_diff$KSEA_difference_for_activating_sites_mean <- c(luad_diff$KSEA_difference_for_activating_sites_mean, mean(luad_diff$KSEA_difference_for_activating_sites[sub("x.*", "", names(luad_diff$KSEA_difference_for_activating_sites)) == names(luad_diff$KSEA_difference_for_protein)[i]])) +} +names(luad_diff$KSEA_difference_for_activating_sites_mean) <- names(luad_diff$KSEA_difference_for_protein) + +wilcox.test(luad_diff$KSEA_difference_for_activating_sites_mean, luad_diff$KSEA_difference_for_protein, alternative = "greater") +wilcox.test(luad_diff$KSEA_difference_for_activating_sites_mean, luad_diff$KSEA_difference_for_protein, alternative = "greater", paired = T) + +ov_diff$KSEA_difference_for_activating_sites_mean <- numeric() +for(i in 1:length(ov_diff$KSEA_difference_for_protein)){ + ov_diff$KSEA_difference_for_activating_sites_mean <- c(ov_diff$KSEA_difference_for_activating_sites_mean, mean(ov_diff$KSEA_difference_for_activating_sites[sub("x.*", "", names(ov_diff$KSEA_difference_for_activating_sites)) == names(ov_diff$KSEA_difference_for_protein)[i]])) +} +names(ov_diff$KSEA_difference_for_activating_sites_mean) <- names(ov_diff$KSEA_difference_for_protein) + +wilcox.test(ov_diff$KSEA_difference_for_activating_sites_mean, ov_diff$KSEA_difference_for_protein, alternative = "greater") +wilcox.test(ov_diff$KSEA_difference_for_activating_sites_mean, ov_diff$KSEA_difference_for_protein, alternative = "greater", paired = T) + +pdac_diff$KSEA_difference_for_activating_sites_mean <- numeric() +for(i in 1:length(pdac_diff$KSEA_difference_for_protein)){ + pdac_diff$KSEA_difference_for_activating_sites_mean <- c(pdac_diff$KSEA_difference_for_activating_sites_mean, mean(pdac_diff$KSEA_difference_for_activating_sites[sub("x.*", "", names(pdac_diff$KSEA_difference_for_activating_sites)) == names(pdac_diff$KSEA_difference_for_protein)[i]])) +} +names(pdac_diff$KSEA_difference_for_activating_sites_mean) <- names(pdac_diff$KSEA_difference_for_protein) + +wilcox.test(pdac_diff$KSEA_difference_for_activating_sites_mean, pdac_diff$KSEA_difference_for_protein, alternative = "greater") +wilcox.test(pdac_diff$KSEA_difference_for_activating_sites_mean, pdac_diff$KSEA_difference_for_protein, alternative = "greater", paired = T) + +ucec_diff$KSEA_difference_for_activating_sites_mean <- numeric() +for(i in 1:length(ucec_diff$KSEA_difference_for_protein)){ + ucec_diff$KSEA_difference_for_activating_sites_mean <- c(ucec_diff$KSEA_difference_for_activating_sites_mean, mean(ucec_diff$KSEA_difference_for_activating_sites[sub("x.*", "", names(ucec_diff$KSEA_difference_for_activating_sites)) == names(ucec_diff$KSEA_difference_for_protein)[i]])) +} +names(ucec_diff$KSEA_difference_for_activating_sites_mean) <- names(ucec_diff$KSEA_difference_for_protein) + +wilcox.test(ucec_diff$KSEA_difference_for_activating_sites_mean, ucec_diff$KSEA_difference_for_protein, alternative = "greater") +wilcox.test(ucec_diff$KSEA_difference_for_activating_sites_mean, ucec_diff$KSEA_difference_for_protein, alternative = "greater", paired = T) +``` + +```{r} +wilcox.test(c(brca_test2$KSEA_difference_for_activating_sites_mean, ccrcc_diff$KSEA_difference_for_activating_sites_mean, coad_diff$KSEA_difference_for_activating_sites_mean, gbm_diff$KSEA_difference_for_activating_sites_mean, hnscc_diff$KSEA_difference_for_activating_sites_mean, lscc_diff$KSEA_difference_for_activating_sites_mean, luad_diff$KSEA_difference_for_activating_sites_mean, ov_diff$KSEA_difference_for_activating_sites_mean, pdac_diff$KSEA_difference_for_activating_sites_mean, ucec_diff$KSEA_difference_for_activating_sites_mean), c(brca_test2$KSEA_difference_for_protein, ccrcc_diff$KSEA_difference_for_protein, coad_diff$KSEA_difference_for_protein, gbm_diff$KSEA_difference_for_protein, hnscc_diff$KSEA_difference_for_protein, lscc_diff$KSEA_difference_for_protein, luad_diff$KSEA_difference_for_protein, ov_diff$KSEA_difference_for_protein, pdac_diff$KSEA_difference_for_protein, ucec_diff$KSEA_difference_for_protein)) + +wilcox.test(c(brca_test2$KSEA_difference_for_activating_sites_mean, ccrcc_diff$KSEA_difference_for_activating_sites_mean, coad_diff$KSEA_difference_for_activating_sites_mean, gbm_diff$KSEA_difference_for_activating_sites_mean, hnscc_diff$KSEA_difference_for_activating_sites_mean, lscc_diff$KSEA_difference_for_activating_sites_mean, luad_diff$KSEA_difference_for_activating_sites_mean, ov_diff$KSEA_difference_for_activating_sites_mean, pdac_diff$KSEA_difference_for_activating_sites_mean, ucec_diff$KSEA_difference_for_activating_sites_mean), c(brca_test2$KSEA_difference_for_protein, ccrcc_diff$KSEA_difference_for_protein, coad_diff$KSEA_difference_for_protein, gbm_diff$KSEA_difference_for_protein, hnscc_diff$KSEA_difference_for_protein, lscc_diff$KSEA_difference_for_protein, luad_diff$KSEA_difference_for_protein, ov_diff$KSEA_difference_for_protein, pdac_diff$KSEA_difference_for_protein, ucec_diff$KSEA_difference_for_protein), paired = T) + +vioplot(c(brca_test2$KSEA_difference_for_activating_sites_mean, ccrcc_diff$KSEA_difference_for_activating_sites_mean, coad_diff$KSEA_difference_for_activating_sites_mean, gbm_diff$KSEA_difference_for_activating_sites_mean, hnscc_diff$KSEA_difference_for_activating_sites_mean, lscc_diff$KSEA_difference_for_activating_sites_mean, luad_diff$KSEA_difference_for_activating_sites_mean, ov_diff$KSEA_difference_for_activating_sites_mean, pdac_diff$KSEA_difference_for_activating_sites_mean, ucec_diff$KSEA_difference_for_activating_sites_mean), c(brca_test2$KSEA_difference_for_protein, ccrcc_diff$KSEA_difference_for_protein, coad_diff$KSEA_difference_for_protein, gbm_diff$KSEA_difference_for_protein, hnscc_diff$KSEA_difference_for_protein, lscc_diff$KSEA_difference_for_protein, luad_diff$KSEA_difference_for_protein, ov_diff$KSEA_difference_for_protein, pdac_diff$KSEA_difference_for_protein, ucec_diff$KSEA_difference_for_protein)) +``` + +modified function using defined GS pos and neg pairs instead of all site thresholds +```{r} +testKSEAdiff2 <- function(GS_pos, GS_neg, prot_ip, ksea_scores, thr=1.645){ + kins <- intersect(names(GS_pos), rownames(prot_ip)) + kins <- intersect(kins, rownames(ksea_scores)) + ksea_pos_act <- numeric() + ksea_neg_act <- numeric() + ksea_pos_prot <- numeric() + ksea_neg_prot <- numeric() + ksea_delta_act <- numeric() + ksea_delta_prot <- numeric() + for(i in 1:length(kins)){ + kseaZ <- scale(as.numeric(ksea_scores[kins[i], ])) + names(kseaZ) <- colnames(ksea_scores) + ksea_pos_act <- c(ksea_pos_act, kseaZ[GS_pos[[kins[i]]]]) + ksea_neg_act <- c(ksea_neg_act, kseaZ[GS_neg[[kins[i]]]]) + ksea_delta_act <- c(ksea_delta_act, mean(kseaZ[GS_pos[[kins[i]]]], na.rm = T) - mean(kseaZ[GS_neg[[kins[i]]]], na.rm=T)) + protZ <- scale(as.numeric(prot_ip[kins[i], ])) + names(protZ) <- colnames(prot_ip) + protZ_hi <- protZ[protZ > thr & !is.na(protZ)] + protZ_lo <- protZ[protZ < -thr & !is.na(protZ)] + ksea_pos_prot <- c(ksea_pos_prot, kseaZ[names(protZ_hi)]) + ksea_neg_prot <- c(ksea_neg_prot, kseaZ[names(protZ_lo)]) + ksea_delta_prot <- c(ksea_delta_prot, mean(kseaZ[names(protZ_hi)], na.rm = T) - mean(kseaZ[names(protZ_lo)], na.rm = T)) + } + #ksea_delta_prot_paired <- ksea_delta_prot + #ksea_delta_prot <- ksea_delta_prot[!duplicated(names(ksea_delta_prot))] + op <- list(ksea_pos_act, ksea_neg_act, ksea_pos_prot, ksea_neg_prot, ksea_delta_act, ksea_delta_prot, wilcox.test(ksea_delta_act, ksea_delta_prot, alternative = "greater"), wilcox.test(ksea_delta_act, ksea_delta_prot, alternative = "greater", paired=T)) + names(op) <- c("KSEA_pos_sites", "KSEA_neg_sites", "KSEA_pos_prots", "KSEA_neg_prots", "KSEA_difference_for_activating_sites", "KSEA_difference_for_protein", "Wilcoxon_test_results", "paired_Wilcoxon_test_results") + return(op) +} +``` + +```{r} +brca_diff2 <- testKSEAdiff2(brca_GS_pos, brca_GS_neg, brca_prot_kins, brca_scores$KSEA) +boxplot(brca_diff2$KSEA_pos_sites, brca_diff2$KSEA_neg_sites, brca_diff2$KSEA_pos_prots, brca_diff2$KSEA_neg_prots) +vioplot(brca_diff2$KSEA_difference_for_activating_sites, brca_diff2$KSEA_difference_for_protein) +``` + +```{r} +ccrcc_diff2 <- testKSEAdiff2(ccrcc_GS_pos, ccrcc_GS_neg, ccrcc_prot_kins, ccrcc_scores$KSEA) +boxplot(ccrcc_diff2$KSEA_pos_sites, ccrcc_diff2$KSEA_neg_sites, ccrcc_diff2$KSEA_pos_prots, ccrcc_diff2$KSEA_neg_prots) +vioplot(ccrcc_diff2$KSEA_difference_for_activating_sites, ccrcc_diff2$KSEA_difference_for_protein) + +coad_diff2 <- testKSEAdiff2(coad_GS_pos, coad_GS_neg, coad_prot_kins, coad_scores$KSEA) +boxplot(coad_diff2$KSEA_pos_sites, coad_diff2$KSEA_neg_sites, coad_diff2$KSEA_pos_prots, coad_diff2$KSEA_neg_prots) +vioplot(coad_diff2$KSEA_difference_for_activating_sites, coad_diff2$KSEA_difference_for_protein) + +gbm_diff2 <- testKSEAdiff2(gbm_GS_pos, gbm_GS_neg, gbm_prot_kins, gbm_scores$KSEA) +boxplot(gbm_diff2$KSEA_pos_sites, gbm_diff2$KSEA_neg_sites, gbm_diff2$KSEA_pos_prots, gbm_diff2$KSEA_neg_prots) +vioplot(gbm_diff2$KSEA_difference_for_activating_sites, gbm_diff2$KSEA_difference_for_protein) + +hnscc_diff2 <- testKSEAdiff2(hnscc_GS_pos, hnscc_GS_neg, hnscc_prot_kins, hnscc_scores$KSEA) +boxplot(hnscc_diff2$KSEA_pos_sites, hnscc_diff2$KSEA_neg_sites, hnscc_diff2$KSEA_pos_prots, hnscc_diff2$KSEA_neg_prots) +vioplot(hnscc_diff2$KSEA_difference_for_activating_sites, hnscc_diff2$KSEA_difference_for_protein) + +lscc_diff2 <- testKSEAdiff2(lscc_GS_pos, lscc_GS_neg, lscc_prot_kins, lscc_scores$KSEA) +boxplot(lscc_diff2$KSEA_pos_sites, lscc_diff2$KSEA_neg_sites, lscc_diff2$KSEA_pos_prots, lscc_diff2$KSEA_neg_prots) +vioplot(lscc_diff2$KSEA_difference_for_activating_sites, lscc_diff2$KSEA_difference_for_protein) + +luad_diff2 <- testKSEAdiff2(luad_GS_pos, luad_GS_neg, luad_prot_kins, luad_scores$KSEA) +boxplot(luad_diff2$KSEA_pos_sites, luad_diff2$KSEA_neg_sites, luad_diff2$KSEA_pos_prots, luad_diff2$KSEA_neg_prots) +vioplot(luad_diff2$KSEA_difference_for_activating_sites, luad_diff2$KSEA_difference_for_protein) + +ov_diff2 <- testKSEAdiff2(ov_GS_pos, ov_GS_neg, ov_prot_kins, ov_scores$KSEA) +boxplot(ov_diff2$KSEA_pos_sites, ov_diff2$KSEA_neg_sites, ov_diff2$KSEA_pos_prots, ov_diff2$KSEA_neg_prots) +vioplot(ov_diff2$KSEA_difference_for_activating_sites, ov_diff2$KSEA_difference_for_protein) + +pdac_diff2 <- testKSEAdiff2(pdac_GS_pos, pdac_GS_neg, pdac_prot_kins, pdac_scores$KSEA) +boxplot(pdac_diff2$KSEA_pos_sites, pdac_diff2$KSEA_neg_sites, pdac_diff2$KSEA_pos_prots, pdac_diff2$KSEA_neg_prots) +vioplot(pdac_diff2$KSEA_difference_for_activating_sites, pdac_diff2$KSEA_difference_for_protein) + +ucec_diff2 <- testKSEAdiff2(ucec_GS_pos, ucec_GS_neg, ucec_prot_kins, ucec_scores$KSEA) +boxplot(ucec_diff2$KSEA_pos_sites, ucec_diff2$KSEA_neg_sites, ucec_diff2$KSEA_pos_prots, ucec_diff2$KSEA_neg_prots) +vioplot(ucec_diff2$KSEA_difference_for_activating_sites, ucec_diff2$KSEA_difference_for_protein) +``` + +```{r} +wilcox.test(c(brca_diff2$KSEA_difference_for_activating_sites, ccrcc_diff2$KSEA_difference_for_activating_sites, coad_diff2$KSEA_difference_for_activating_sites, gbm_diff2$KSEA_difference_for_activating_sites, hnscc_diff2$KSEA_difference_for_activating_sites, lscc_diff2$KSEA_difference_for_activating_sites, luad_diff2$KSEA_difference_for_activating_sites, ov_diff2$KSEA_difference_for_activating_sites, pdac_diff2$KSEA_difference_for_activating_sites, ucec_diff2$KSEA_difference_for_activating_sites), c(brca_diff2$KSEA_difference_for_protein, ccrcc_diff2$KSEA_difference_for_protein, coad_diff2$KSEA_difference_for_protein, gbm_diff2$KSEA_difference_for_protein, hnscc_diff2$KSEA_difference_for_protein, lscc_diff2$KSEA_difference_for_protein, luad_diff2$KSEA_difference_for_protein, ov_diff2$KSEA_difference_for_protein, pdac_diff2$KSEA_difference_for_protein, ucec_diff2$KSEA_difference_for_protein), greater=T, paired = T) + +vioplot(c(brca_diff2$KSEA_difference_for_activating_sites, ccrcc_diff2$KSEA_difference_for_activating_sites, coad_diff2$KSEA_difference_for_activating_sites, gbm_diff2$KSEA_difference_for_activating_sites, hnscc_diff2$KSEA_difference_for_activating_sites, lscc_diff2$KSEA_difference_for_activating_sites, luad_diff2$KSEA_difference_for_activating_sites, ov_diff2$KSEA_difference_for_activating_sites, pdac_diff2$KSEA_difference_for_activating_sites, ucec_diff2$KSEA_difference_for_activating_sites), c(brca_diff2$KSEA_difference_for_protein, ccrcc_diff2$KSEA_difference_for_protein, coad_diff2$KSEA_difference_for_protein, gbm_diff2$KSEA_difference_for_protein, hnscc_diff2$KSEA_difference_for_protein, lscc_diff2$KSEA_difference_for_protein, luad_diff2$KSEA_difference_for_protein, ov_diff2$KSEA_difference_for_protein, pdac_diff2$KSEA_difference_for_protein, ucec_diff2$KSEA_difference_for_protein)) +``` + +```{r} +ks.test(brca_kseaZ[(brca_act_siteZ < -1.645) & !is.na(brca_act_siteZ) & !is.na(brca_kseaZ)], brca_kseaZ[(brca_act_siteZ > 1.645) & !is.na(brca_act_siteZ) & !is.na(brca_kseaZ)], alternative = "greater") +ks.test(ccrcc_kseaZ[(ccrcc_act_siteZ < -1.645) & !is.na(ccrcc_act_siteZ) & !is.na(ccrcc_kseaZ)], ccrcc_kseaZ[(ccrcc_act_siteZ > 1.645) & !is.na(ccrcc_act_siteZ) & !is.na(ccrcc_kseaZ)], alternative = "greater") +ks.test(coad_kseaZ[(coad_act_siteZ < -1.645) & !is.na(coad_act_siteZ) & !is.na(coad_kseaZ)], coad_kseaZ[(coad_act_siteZ > 1.645) & !is.na(coad_act_siteZ) & !is.na(coad_kseaZ)], alternative = "greater") +ks.test(gbm_kseaZ[(gbm_act_siteZ < -1.645) & !is.na(gbm_act_siteZ) & !is.na(gbm_kseaZ)], gbm_kseaZ[(gbm_act_siteZ > 1.645) & !is.na(gbm_act_siteZ) & !is.na(gbm_kseaZ)], alternative = "greater") +ks.test(hnscc_kseaZ[(hnscc_act_siteZ < -1.645) & !is.na(hnscc_act_siteZ) & !is.na(hnscc_kseaZ)], hnscc_kseaZ[(hnscc_act_siteZ > 1.645) & !is.na(hnscc_act_siteZ) & !is.na(hnscc_kseaZ)], alternative = "greater") +ks.test(lscc_kseaZ[(lscc_act_siteZ < -1.645) & !is.na(lscc_act_siteZ) & !is.na(lscc_kseaZ)], lscc_kseaZ[(lscc_act_siteZ > 1.645) & !is.na(lscc_act_siteZ) & !is.na(lscc_kseaZ)], alternative = "greater") +ks.test(luad_kseaZ[(luad_act_siteZ < -1.645) & !is.na(luad_act_siteZ) & !is.na(luad_kseaZ)], luad_kseaZ[(luad_act_siteZ > 1.645) & !is.na(luad_act_siteZ) & !is.na(luad_kseaZ)], alternative = "greater") +ks.test(ov_kseaZ[(ov_act_siteZ < -1.645) & !is.na(ov_act_siteZ) & !is.na(ov_kseaZ)], ov_kseaZ[(ov_act_siteZ > 1.645) & !is.na(ov_act_siteZ) & !is.na(ov_kseaZ)], alternative = "greater") +ks.test(pdac_kseaZ[(pdac_act_siteZ < -1.645) & !is.na(pdac_act_siteZ) & !is.na(pdac_kseaZ)], pdac_kseaZ[(pdac_act_siteZ > 1.645) & !is.na(pdac_act_siteZ) & !is.na(pdac_kseaZ)], alternative = "greater") +ks.test(ucec_kseaZ[(ucec_act_siteZ < -1.645) & !is.na(ucec_act_siteZ) & !is.na(ucec_kseaZ)], ucec_kseaZ[(ucec_act_siteZ > 1.645) & !is.na(ucec_act_siteZ) & !is.na(ucec_kseaZ)], alternative = "greater") +``` + +```{r} +ks.test(brca_kseaZ_prot[(brca_protZ < -1.645) & !is.na(brca_protZ) & !is.na(brca_kseaZ_prot)], brca_kseaZ_prot[(brca_protZ > 1.645) & !is.na(brca_protZ) & !is.na(brca_kseaZ_prot)], alternative = "greater") +ks.test(ccrcc_kseaZ_prot[(ccrcc_protZ < -1.645) & !is.na(ccrcc_protZ) & !is.na(ccrcc_kseaZ_prot)], ccrcc_kseaZ_prot[(ccrcc_protZ > 1.645) & !is.na(ccrcc_protZ) & !is.na(ccrcc_kseaZ_prot)], alternative = "greater") +ks.test(coad_kseaZ_prot[(coad_protZ < -1.645) & !is.na(coad_protZ) & !is.na(coad_kseaZ_prot)], coad_kseaZ_prot[(coad_protZ > 1.645) & !is.na(coad_protZ) & !is.na(coad_kseaZ_prot)], alternative = "greater") +ks.test(gbm_kseaZ_prot[(gbm_protZ < -1.645) & !is.na(gbm_protZ) & !is.na(gbm_kseaZ_prot)], gbm_kseaZ_prot[(gbm_protZ > 1.645) & !is.na(gbm_protZ) & !is.na(gbm_kseaZ_prot)], alternative = "greater") +ks.test(hnscc_kseaZ_prot[(hnscc_protZ < -1.645) & !is.na(hnscc_protZ) & !is.na(hnscc_kseaZ_prot)], hnscc_kseaZ_prot[(hnscc_protZ > 1.645) & !is.na(hnscc_protZ) & !is.na(hnscc_kseaZ_prot)], alternative = "greater") +ks.test(lscc_kseaZ_prot[(lscc_protZ < -1.645) & !is.na(lscc_protZ) & !is.na(lscc_kseaZ_prot)], lscc_kseaZ_prot[(lscc_protZ > 1.645) & !is.na(lscc_protZ) & !is.na(lscc_kseaZ_prot)], alternative = "greater") +ks.test(luad_kseaZ_prot[(luad_protZ < -1.645) & !is.na(luad_protZ) & !is.na(luad_kseaZ_prot)], luad_kseaZ_prot[(luad_protZ > 1.645) & !is.na(luad_protZ) & !is.na(luad_kseaZ_prot)], alternative = "greater") +ks.test(ov_kseaZ_prot[(ov_protZ < -1.645) & !is.na(ov_protZ) & !is.na(ov_kseaZ_prot)], ov_kseaZ_prot[(ov_protZ > 1.645) & !is.na(ov_protZ) & !is.na(ov_kseaZ_prot)], alternative = "greater") +ks.test(pdac_kseaZ_prot[(pdac_protZ < -1.645) & !is.na(pdac_protZ) & !is.na(pdac_kseaZ_prot)], pdac_kseaZ_prot[(pdac_protZ > 1.645) & !is.na(pdac_protZ) & !is.na(pdac_kseaZ_prot)], alternative = "greater") +ks.test(ucec_kseaZ_prot[(ucec_protZ < -1.645) & !is.na(ucec_protZ) & !is.na(ucec_kseaZ_prot)], ucec_kseaZ_prot[(ucec_protZ > 1.645) & !is.na(ucec_protZ) & !is.na(ucec_kseaZ_prot)], alternative = "greater") +``` + +```{r} +ks.test(brca_kseaZ[(brca_act_siteZ < -0.842) & !is.na(brca_act_siteZ) & !is.na(brca_kseaZ)], brca_kseaZ[(brca_act_siteZ > 0.842) & !is.na(brca_act_siteZ) & !is.na(brca_kseaZ)], alternative = "greater") +ks.test(ccrcc_kseaZ[(ccrcc_act_siteZ < -0.842) & !is.na(ccrcc_act_siteZ) & !is.na(ccrcc_kseaZ)], ccrcc_kseaZ[(ccrcc_act_siteZ > 0.842) & !is.na(ccrcc_act_siteZ) & !is.na(ccrcc_kseaZ)], alternative = "greater") +ks.test(coad_kseaZ[(coad_act_siteZ < -0.842) & !is.na(coad_act_siteZ) & !is.na(coad_kseaZ)], coad_kseaZ[(coad_act_siteZ > 0.842) & !is.na(coad_act_siteZ) & !is.na(coad_kseaZ)], alternative = "greater") +ks.test(gbm_kseaZ[(gbm_act_siteZ < -0.842) & !is.na(gbm_act_siteZ) & !is.na(gbm_kseaZ)], gbm_kseaZ[(gbm_act_siteZ > 0.842) & !is.na(gbm_act_siteZ) & !is.na(gbm_kseaZ)], alternative = "greater") +ks.test(hnscc_kseaZ[(hnscc_act_siteZ < -0.842) & !is.na(hnscc_act_siteZ) & !is.na(hnscc_kseaZ)], hnscc_kseaZ[(hnscc_act_siteZ > 0.842) & !is.na(hnscc_act_siteZ) & !is.na(hnscc_kseaZ)], alternative = "greater") +ks.test(lscc_kseaZ[(lscc_act_siteZ < -0.842) & !is.na(lscc_act_siteZ) & !is.na(lscc_kseaZ)], lscc_kseaZ[(lscc_act_siteZ > 0.842) & !is.na(lscc_act_siteZ) & !is.na(lscc_kseaZ)], alternative = "greater") +ks.test(luad_kseaZ[(luad_act_siteZ < -0.842) & !is.na(luad_act_siteZ) & !is.na(luad_kseaZ)], luad_kseaZ[(luad_act_siteZ > 0.842) & !is.na(luad_act_siteZ) & !is.na(luad_kseaZ)], alternative = "greater") +ks.test(ov_kseaZ[(ov_act_siteZ < -0.842) & !is.na(ov_act_siteZ) & !is.na(ov_kseaZ)], ov_kseaZ[(ov_act_siteZ > 0.842) & !is.na(ov_act_siteZ) & !is.na(ov_kseaZ)], alternative = "greater") +ks.test(pdac_kseaZ[(pdac_act_siteZ < -0.842) & !is.na(pdac_act_siteZ) & !is.na(pdac_kseaZ)], pdac_kseaZ[(pdac_act_siteZ > 0.842) & !is.na(pdac_act_siteZ) & !is.na(pdac_kseaZ)], alternative = "greater") +ks.test(ucec_kseaZ[(ucec_act_siteZ < -0.842) & !is.na(ucec_act_siteZ) & !is.na(ucec_kseaZ)], ucec_kseaZ[(ucec_act_siteZ > 0.842) & !is.na(ucec_act_siteZ) & !is.na(ucec_kseaZ)], alternative = "greater") +``` + +```{r} +ks.test(brca_kseaZ_prot[(brca_protZ < -0.842) & !is.na(brca_protZ) & !is.na(brca_kseaZ_prot)], brca_kseaZ_prot[(brca_protZ > 0.842) & !is.na(brca_protZ) & !is.na(brca_kseaZ_prot)], alternative = "greater") +ks.test(ccrcc_kseaZ_prot[(ccrcc_protZ < -0.842) & !is.na(ccrcc_protZ) & !is.na(ccrcc_kseaZ_prot)], ccrcc_kseaZ_prot[(ccrcc_protZ > 0.842) & !is.na(ccrcc_protZ) & !is.na(ccrcc_kseaZ_prot)], alternative = "greater") +ks.test(coad_kseaZ_prot[(coad_protZ < -0.842) & !is.na(coad_protZ) & !is.na(coad_kseaZ_prot)], coad_kseaZ_prot[(coad_protZ > 0.842) & !is.na(coad_protZ) & !is.na(coad_kseaZ_prot)], alternative = "greater") +ks.test(gbm_kseaZ_prot[(gbm_protZ < -0.842) & !is.na(gbm_protZ) & !is.na(gbm_kseaZ_prot)], gbm_kseaZ_prot[(gbm_protZ > 0.842) & !is.na(gbm_protZ) & !is.na(gbm_kseaZ_prot)], alternative = "greater") +ks.test(hnscc_kseaZ_prot[(hnscc_protZ < -0.842) & !is.na(hnscc_protZ) & !is.na(hnscc_kseaZ_prot)], hnscc_kseaZ_prot[(hnscc_protZ > 0.842) & !is.na(hnscc_protZ) & !is.na(hnscc_kseaZ_prot)], alternative = "greater") +ks.test(lscc_kseaZ_prot[(lscc_protZ < -0.842) & !is.na(lscc_protZ) & !is.na(lscc_kseaZ_prot)], lscc_kseaZ_prot[(lscc_protZ > 0.842) & !is.na(lscc_protZ) & !is.na(lscc_kseaZ_prot)], alternative = "greater") +ks.test(luad_kseaZ_prot[(luad_protZ < -0.842) & !is.na(luad_protZ) & !is.na(luad_kseaZ_prot)], luad_kseaZ_prot[(luad_protZ > 0.842) & !is.na(luad_protZ) & !is.na(luad_kseaZ_prot)], alternative = "greater") +ks.test(ov_kseaZ_prot[(ov_protZ < -0.842) & !is.na(ov_protZ) & !is.na(ov_kseaZ_prot)], ov_kseaZ_prot[(ov_protZ > 0.842) & !is.na(ov_protZ) & !is.na(ov_kseaZ_prot)], alternative = "greater") +ks.test(pdac_kseaZ_prot[(pdac_protZ < -0.842) & !is.na(pdac_protZ) & !is.na(pdac_kseaZ_prot)], pdac_kseaZ_prot[(pdac_protZ > 0.842) & !is.na(pdac_protZ) & !is.na(pdac_kseaZ_prot)], alternative = "greater") +ks.test(ucec_kseaZ_prot[(ucec_protZ < -0.842) & !is.na(ucec_protZ) & !is.na(ucec_kseaZ_prot)], ucec_kseaZ_prot[(ucec_protZ > 0.842) & !is.na(ucec_protZ) & !is.na(ucec_kseaZ_prot)], alternative = "greater") +``` + +```{r} +save.image("KIA_benchmarking_defGSset_activating_site_analysis_v6_ckpt_final.Rda") +``` + +```{r} +load("KIA_benchmarking_defGSset_activating_site_analysis_v5_ckpt_final.Rda") +``` + +generate matrix of activating site-KSEA correlations vs protein-KSEA correlations ordered by increasing activating site-protein correlation to determine if sites with poor correlation to host protein show better correlation with KSEA scores +```{r} +brca_kin_prot_actSite_Pcorr[order(unlist(brca_kin_prot_actSite_Pcorr))] -> brca_kin_prot_actSite_Pcorr +brca_act_siteVprot <- matrix(unlist(brca_kin_prot_actSite_Pcorr), ncol = 1, dimnames = list(names(brca_kin_prot_actSite_Pcorr), "site-prot_corr")) +brca_act_siteVprot <- brca_act_siteVprot[sub("x.*", "", rownames(brca_act_siteVprot)) %in% brca_ksea_act_site_kins, , drop=F] +brca_act_site_ksea_corr <- list() +brca_prot_ksea_corr <- list() +brca_act_siteVprot <- cbind(brca_act_siteVprot, 0, 0) %>% as.data.frame +brca_act_siteVprot$V4 <- "white" +brca_act_siteVprot$V5 <- "white" +for(i in 1:nrow(brca_act_siteVprot)){ + brca_act_site_ksea_corr[[rownames(brca_act_siteVprot)[i]]] <- rcorr(as.numeric(brca_phos_kins1[rownames(brca_act_siteVprot)[i], ]), scale(as.numeric(brca_scores$KSEA[sub("x.*","",rownames(brca_act_siteVprot)[i]), colnames(brca_phos_kins1)]))) + brca_act_siteVprot[i, 2] <- brca_act_site_ksea_corr[[rownames(brca_act_siteVprot)[i]]]$r[1,2] + if(brca_act_site_ksea_corr[[rownames(brca_act_siteVprot)[i]]]$P[1,2] < 0.05){ + brca_act_siteVprot[i, 4] <- "red" + } + brca_prot_ksea_corr[[rownames(brca_act_siteVprot)[i]]] <- rcorr(as.numeric(brca_prot_kins[sub("x.*","",rownames(brca_act_siteVprot)[i]), ]), scale(as.numeric(brca_scores$KSEA[sub("x.*","",rownames(brca_act_siteVprot)[i]), colnames(brca_prot_kins)]))) + brca_act_siteVprot[i, 3] <- brca_prot_ksea_corr[[rownames(brca_act_siteVprot)[i]]]$r[1,2] + if(brca_prot_ksea_corr[[rownames(brca_act_siteVprot)[i]]]$P[1,2] < 0.05){ + brca_act_siteVprot[i, 5] <- "blue" + } +} +colnames(brca_act_siteVprot)[2:3] <- c("act-site_ksea_corr", "prot_ksea_corr") +brca_act_siteVprot$diff <- brca_act_siteVprot[,2] - brca_act_siteVprot[,3] +plot(as.numeric(brca_act_siteVprot[,1]), as.numeric(brca_act_siteVprot[,6])) +abline(h=0) +``` + +```{r} +plot(as.numeric(brca_act_siteVprot[,2]), as.numeric(brca_act_siteVprot[,1]), col="red", xlim=c(-0.2, 1), ylim=c(-0.2,1), main="BRCA", pch=21, bg=brca_act_siteVprot$V4) +par(new=T) +plot(as.numeric(brca_act_siteVprot[,3]), as.numeric(brca_act_siteVprot[,1]), col="blue", xlim=c(-0.2, 1), ylim=c(-0.2,1), pch=21, bg=brca_act_siteVprot$V5) +for(i in 1:nrow(brca_act_siteVprot)){ + segments(brca_act_siteVprot[i,2], brca_act_siteVprot[i,1], brca_act_siteVprot[i,3], brca_act_siteVprot[i,1]) +} +``` + +for correlations, we need to focus on samples with data for both site and protein; require at least 30 samples with KSEA scores +```{r} +brca_act_siteVprot2 <- matrix(unlist(brca_kin_prot_actSite_Pcorr), ncol = 1, dimnames = list(names(brca_kin_prot_actSite_Pcorr), "site-prot_corr")) +brca_act_siteVprot2 <- brca_act_siteVprot2[sub("x.*", "", rownames(brca_act_siteVprot2)) %in% brca_ksea_act_site_kins, , drop=F] +brca_act_site_ksea_corr2 <- list() +brca_prot_ksea_corr2 <- list() +brca_act_siteVprot2 <- cbind(brca_act_siteVprot2, NA, NA) %>% as.data.frame +brca_act_siteVprot2$V4 <- "white" +brca_act_siteVprot2$V5 <- "white" +for(i in 1:nrow(brca_act_siteVprot2)){ + comm_samp <- intersect(colnames(brca_phos_kins1)[!is.na(brca_phos_kins1[rownames(brca_act_siteVprot2)[i], ])], colnames(brca_prot_kins)[!is.na(brca_prot_kins[sub("x.*","",rownames(brca_act_siteVprot2)[i]), ])]) + if(sum(!is.na(brca_scores$KSEA[sub("x.*","",rownames(brca_act_siteVprot2)[i]), comm_samp])) > 29){ + brca_act_site_ksea_corr2[[rownames(brca_act_siteVprot2)[i]]] <- rcorr(as.numeric(brca_phos_kins1[rownames(brca_act_siteVprot2)[i], comm_samp]), as.numeric(scale(as.numeric(brca_scores$KSEA[sub("x.*","",rownames(brca_act_siteVprot2)[i]), comm_samp])))) + brca_act_siteVprot2[i, 2] <- brca_act_site_ksea_corr2[[rownames(brca_act_siteVprot2)[i]]]$r[1,2] + if(brca_act_site_ksea_corr2[[rownames(brca_act_siteVprot2)[i]]]$P[1,2] < 0.05){ + brca_act_siteVprot2[i, 4] <- "red" + } + brca_prot_ksea_corr2[[rownames(brca_act_siteVprot2)[i]]] <- rcorr(as.numeric(brca_prot_kins[sub("x.*","",rownames(brca_act_siteVprot2)[i]), comm_samp]), as.numeric(scale(as.numeric(brca_scores$KSEA[sub("x.*","",rownames(brca_act_siteVprot2)[i]), comm_samp])))) + brca_act_siteVprot2[i, 3] <- brca_prot_ksea_corr2[[rownames(brca_act_siteVprot2)[i]]]$r[1,2] + if(brca_prot_ksea_corr2[[rownames(brca_act_siteVprot2)[i]]]$P[1,2] < 0.05){ + brca_act_siteVprot2[i, 5] <- "blue" + } + } +} +colnames(brca_act_siteVprot2)[2:3] <- c("act-site_ksea_corr", "prot_ksea_corr") +brca_act_siteVprot2 <- brca_act_siteVprot2[!is.na(brca_act_siteVprot2$`act-site_ksea_corr`), ] +brca_act_siteVprot2$diff <- brca_act_siteVprot2[,2] - brca_act_siteVprot2[,3] +plot(as.numeric(brca_act_siteVprot2[,1]), as.numeric(brca_act_siteVprot2[,6])) +abline(h=0) + +plot(as.numeric(brca_act_siteVprot2[,2]), as.numeric(brca_act_siteVprot2[,1]), col="red", xlim=c(-0.2, 1), ylim=c(-0.2,1), main="BRCA", pch=21, bg=brca_act_siteVprot2$V4) +par(new=T) +plot(as.numeric(brca_act_siteVprot2[,3]), as.numeric(brca_act_siteVprot2[,1]), col="blue", xlim=c(-0.2, 1), ylim=c(-0.2,1), pch=21, bg=brca_act_siteVprot2$V5) +for(i in 1:nrow(brca_act_siteVprot2)){ + segments(brca_act_siteVprot2[i,2], brca_act_siteVprot2[i,1], brca_act_siteVprot2[i,3], brca_act_siteVprot2[i,1]) +} +``` + + +```{r} +coad_kin_prot_actSite_Pcorr[order(unlist(coad_kin_prot_actSite_Pcorr))] -> coad_kin_prot_actSite_Pcorr +coad_act_siteVprot <- matrix(unlist(coad_kin_prot_actSite_Pcorr), ncol = 1, dimnames = list(names(coad_kin_prot_actSite_Pcorr), "site-prot_corr")) +coad_act_siteVprot <- coad_act_siteVprot[sub("x.*", "", rownames(coad_act_siteVprot)) %in% coad_ksea_act_site_kins, , drop=F] +coad_act_site_ksea_corr <- list() +coad_prot_ksea_corr <- list() +coad_act_siteVprot <- cbind(coad_act_siteVprot, 0, 0) %>% as.data.frame +coad_act_siteVprot$V4 <- "white" +coad_act_siteVprot$V5 <- "white" +for(i in 1:nrow(coad_act_siteVprot)){ + coad_act_site_ksea_corr[[rownames(coad_act_siteVprot)[i]]] <- rcorr(as.numeric(coad_phos_kins1[rownames(coad_act_siteVprot)[i], ]), scale(as.numeric(coad_scores$KSEA[sub("x.*","",rownames(coad_act_siteVprot)[i]), colnames(coad_phos_kins1)]))) + coad_act_siteVprot[i, 2] <- coad_act_site_ksea_corr[[rownames(coad_act_siteVprot)[i]]]$r[1,2] + if(coad_act_site_ksea_corr[[rownames(coad_act_siteVprot)[i]]]$P[1,2] < 0.05){ + coad_act_siteVprot[i, 4] <- "red" + } + coad_prot_ksea_corr[[rownames(coad_act_siteVprot)[i]]] <- rcorr(as.numeric(coad_prot_kins[sub("x.*","",rownames(coad_act_siteVprot)[i]), ]), scale(as.numeric(coad_scores$KSEA[sub("x.*","",rownames(coad_act_siteVprot)[i]), colnames(coad_prot_kins)]))) + coad_act_siteVprot[i, 3] <- coad_prot_ksea_corr[[rownames(coad_act_siteVprot)[i]]]$r[1,2] + if(coad_prot_ksea_corr[[rownames(coad_act_siteVprot)[i]]]$P[1,2] < 0.05){ + coad_act_siteVprot[i, 5] <- "blue" + } +} +colnames(coad_act_siteVprot)[2:3] <- c("act-site_ksea_corr", "prot_ksea_corr") +coad_act_siteVprot$diff <- coad_act_siteVprot[,2] - coad_act_siteVprot[,3] +plot(as.numeric(coad_act_siteVprot[,1]), as.numeric(coad_act_siteVprot[,6])) +abline(h=0) +``` + +```{r} +plot(as.numeric(coad_act_siteVprot[,2]), as.numeric(coad_act_siteVprot[,1]), col="red", xlim=c(-0.3, 1), ylim=c(-0.2,1), main="COAD", pch=21, bg=coad_act_siteVprot$V4) +par(new=T) +plot(as.numeric(coad_act_siteVprot[,3]), as.numeric(coad_act_siteVprot[,1]), col="blue", xlim=c(-0.3, 1), ylim=c(-0.2,1), pch=21, bg=coad_act_siteVprot$V5) +for(i in 1:nrow(coad_act_siteVprot)){ + segments(coad_act_siteVprot[i,2], coad_act_siteVprot[i,1], coad_act_siteVprot[i,3], coad_act_siteVprot[i,1]) +} +``` + +for correlations, we need to focus on samples with data for both site and protein; require at least 30 samples with KSEA scores +```{r} +coad_act_siteVprot2 <- matrix(unlist(coad_kin_prot_actSite_Pcorr), ncol = 1, dimnames = list(names(coad_kin_prot_actSite_Pcorr), "site-prot_corr")) +coad_act_siteVprot2 <- coad_act_siteVprot2[sub("x.*", "", rownames(coad_act_siteVprot2)) %in% coad_ksea_act_site_kins, , drop=F] +coad_act_site_ksea_corr2 <- list() +coad_prot_ksea_corr2 <- list() +coad_act_siteVprot2 <- cbind(coad_act_siteVprot2, NA, NA) %>% as.data.frame +coad_act_siteVprot2$V4 <- "white" +coad_act_siteVprot2$V5 <- "white" +for(i in 1:nrow(coad_act_siteVprot2)){ + comm_samp <- intersect(colnames(coad_phos_kins1)[!is.na(coad_phos_kins1[rownames(coad_act_siteVprot2)[i], ])], colnames(coad_prot_kins)[!is.na(coad_prot_kins[sub("x.*","",rownames(coad_act_siteVprot2)[i]), ])]) + if(sum(!is.na(coad_scores$KSEA[sub("x.*","",rownames(coad_act_siteVprot2)[i]), comm_samp])) > 29){ + coad_act_site_ksea_corr2[[rownames(coad_act_siteVprot2)[i]]] <- rcorr(as.numeric(coad_phos_kins1[rownames(coad_act_siteVprot2)[i], comm_samp]), as.numeric(scale(as.numeric(coad_scores$KSEA[sub("x.*","",rownames(coad_act_siteVprot2)[i]), comm_samp])))) + coad_act_siteVprot2[i, 2] <- coad_act_site_ksea_corr2[[rownames(coad_act_siteVprot2)[i]]]$r[1,2] + if(coad_act_site_ksea_corr2[[rownames(coad_act_siteVprot2)[i]]]$P[1,2] < 0.05){ + coad_act_siteVprot2[i, 4] <- "red" + } + coad_prot_ksea_corr2[[rownames(coad_act_siteVprot2)[i]]] <- rcorr(as.numeric(coad_prot_kins[sub("x.*","",rownames(coad_act_siteVprot2)[i]), comm_samp]), as.numeric(scale(as.numeric(coad_scores$KSEA[sub("x.*","",rownames(coad_act_siteVprot2)[i]), comm_samp])))) + coad_act_siteVprot2[i, 3] <- coad_prot_ksea_corr2[[rownames(coad_act_siteVprot2)[i]]]$r[1,2] + if(coad_prot_ksea_corr2[[rownames(coad_act_siteVprot2)[i]]]$P[1,2] < 0.05){ + coad_act_siteVprot2[i, 5] <- "blue" + } + } +} +colnames(coad_act_siteVprot2)[2:3] <- c("act-site_ksea_corr", "prot_ksea_corr") +coad_act_siteVprot2 <- coad_act_siteVprot2[!is.na(coad_act_siteVprot2$`act-site_ksea_corr`), ] +coad_act_siteVprot2$diff <- coad_act_siteVprot2[,2] - coad_act_siteVprot2[,3] +plot(as.numeric(coad_act_siteVprot2[,1]), as.numeric(coad_act_siteVprot2[,6])) +abline(h=0) + +plot(as.numeric(coad_act_siteVprot2[,2]), as.numeric(coad_act_siteVprot2[,1]), col="red", xlim=c(-0.3, 0.8), ylim=c(-0.2,1), main="coad", pch=21, bg=coad_act_siteVprot2$V4) +par(new=T) +plot(as.numeric(coad_act_siteVprot2[,3]), as.numeric(coad_act_siteVprot2[,1]), col="blue", xlim=c(-0.3, 0.8), ylim=c(-0.2,1), pch=21, bg=coad_act_siteVprot2$V5) +for(i in 1:nrow(coad_act_siteVprot2)){ + segments(coad_act_siteVprot2[i,2], coad_act_siteVprot2[i,1], coad_act_siteVprot2[i,3], coad_act_siteVprot2[i,1]) +} +``` + +for correlations, we need to focus on samples with data for both site and protein; require at least 30 samples with KSEA scores +```{r} +ccrcc_act_siteVprot2 <- matrix(unlist(ccrcc_kin_prot_actSite_Pcorr), ncol = 1, dimnames = list(names(ccrcc_kin_prot_actSite_Pcorr), "site-prot_corr")) +ccrcc_act_siteVprot2 <- ccrcc_act_siteVprot2[sub("x.*", "", rownames(ccrcc_act_siteVprot2)) %in% ccrcc_ksea_act_site_kins, , drop=F] +ccrcc_act_site_ksea_corr2 <- list() +ccrcc_prot_ksea_corr2 <- list() +ccrcc_act_siteVprot2 <- cbind(ccrcc_act_siteVprot2, NA, NA) %>% as.data.frame +ccrcc_act_siteVprot2$V4 <- "white" +ccrcc_act_siteVprot2$V5 <- "white" +for(i in 1:nrow(ccrcc_act_siteVprot2)){ + comm_samp <- intersect(colnames(ccrcc_phos_kins1)[!is.na(ccrcc_phos_kins1[rownames(ccrcc_act_siteVprot2)[i], ])], colnames(ccrcc_prot_kins)[!is.na(ccrcc_prot_kins[sub("x.*","",rownames(ccrcc_act_siteVprot2)[i]), ])]) + if(sum(!is.na(ccrcc_scores$KSEA[sub("x.*","",rownames(ccrcc_act_siteVprot2)[i]), comm_samp])) > 29){ + ccrcc_act_site_ksea_corr2[[rownames(ccrcc_act_siteVprot2)[i]]] <- rcorr(as.numeric(ccrcc_phos_kins1[rownames(ccrcc_act_siteVprot2)[i], comm_samp]), as.numeric(scale(as.numeric(ccrcc_scores$KSEA[sub("x.*","",rownames(ccrcc_act_siteVprot2)[i]), comm_samp])))) + ccrcc_act_siteVprot2[i, 2] <- ccrcc_act_site_ksea_corr2[[rownames(ccrcc_act_siteVprot2)[i]]]$r[1,2] + if(ccrcc_act_site_ksea_corr2[[rownames(ccrcc_act_siteVprot2)[i]]]$P[1,2] < 0.05){ + ccrcc_act_siteVprot2[i, 4] <- "red" + } + ccrcc_prot_ksea_corr2[[rownames(ccrcc_act_siteVprot2)[i]]] <- rcorr(as.numeric(ccrcc_prot_kins[sub("x.*","",rownames(ccrcc_act_siteVprot2)[i]), comm_samp]), as.numeric(scale(as.numeric(ccrcc_scores$KSEA[sub("x.*","",rownames(ccrcc_act_siteVprot2)[i]), comm_samp])))) + ccrcc_act_siteVprot2[i, 3] <- ccrcc_prot_ksea_corr2[[rownames(ccrcc_act_siteVprot2)[i]]]$r[1,2] + if(ccrcc_prot_ksea_corr2[[rownames(ccrcc_act_siteVprot2)[i]]]$P[1,2] < 0.05){ + ccrcc_act_siteVprot2[i, 5] <- "blue" + } + } +} +colnames(ccrcc_act_siteVprot2)[2:3] <- c("act-site_ksea_corr", "prot_ksea_corr") +ccrcc_act_siteVprot2 <- ccrcc_act_siteVprot2[!is.na(ccrcc_act_siteVprot2$`act-site_ksea_corr`), ] +ccrcc_act_siteVprot2$diff <- ccrcc_act_siteVprot2[,2] - ccrcc_act_siteVprot2[,3] +plot(as.numeric(ccrcc_act_siteVprot2[,1]), as.numeric(ccrcc_act_siteVprot2[,6])) +abline(h=0) + +plot(as.numeric(ccrcc_act_siteVprot2[,2]), as.numeric(ccrcc_act_siteVprot2[,1]), col="red", xlim=c(-0.4, 0.8), ylim=c(-0.2,1), main="ccrcc", pch=21, bg=ccrcc_act_siteVprot2$V4) +par(new=T) +plot(as.numeric(ccrcc_act_siteVprot2[,3]), as.numeric(ccrcc_act_siteVprot2[,1]), col="blue", xlim=c(-0.4, 0.8), ylim=c(-0.2,1), pch=21, bg=ccrcc_act_siteVprot2$V5) +for(i in 1:nrow(ccrcc_act_siteVprot2)){ + segments(ccrcc_act_siteVprot2[i,2], ccrcc_act_siteVprot2[i,1], ccrcc_act_siteVprot2[i,3], ccrcc_act_siteVprot2[i,1]) +} +``` + +for correlations, we need to focus on samples with data for both site and protein; require at least 30 samples with KSEA scores +```{r} +gbm_act_siteVprot2 <- matrix(unlist(gbm_kin_prot_actSite_Pcorr), ncol = 1, dimnames = list(names(gbm_kin_prot_actSite_Pcorr), "site-prot_corr")) +gbm_act_siteVprot2 <- gbm_act_siteVprot2[sub("x.*", "", rownames(gbm_act_siteVprot2)) %in% gbm_ksea_act_site_kins, , drop=F] +gbm_act_site_ksea_corr2 <- list() +gbm_prot_ksea_corr2 <- list() +gbm_act_siteVprot2 <- cbind(gbm_act_siteVprot2, NA, NA) %>% as.data.frame +gbm_act_siteVprot2$V4 <- "white" +gbm_act_siteVprot2$V5 <- "white" +for(i in 1:nrow(gbm_act_siteVprot2)){ + comm_samp <- intersect(colnames(gbm_phos_kins1)[!is.na(gbm_phos_kins1[rownames(gbm_act_siteVprot2)[i], ])], colnames(gbm_prot_kins)[!is.na(gbm_prot_kins[sub("x.*","",rownames(gbm_act_siteVprot2)[i]), ])]) + if(sum(!is.na(gbm_scores$KSEA[sub("x.*","",rownames(gbm_act_siteVprot2)[i]), comm_samp])) > 29){ + gbm_act_site_ksea_corr2[[rownames(gbm_act_siteVprot2)[i]]] <- rcorr(as.numeric(gbm_phos_kins1[rownames(gbm_act_siteVprot2)[i], comm_samp]), as.numeric(scale(as.numeric(gbm_scores$KSEA[sub("x.*","",rownames(gbm_act_siteVprot2)[i]), comm_samp])))) + gbm_act_siteVprot2[i, 2] <- gbm_act_site_ksea_corr2[[rownames(gbm_act_siteVprot2)[i]]]$r[1,2] + if(gbm_act_site_ksea_corr2[[rownames(gbm_act_siteVprot2)[i]]]$P[1,2] < 0.05){ + gbm_act_siteVprot2[i, 4] <- "red" + } + gbm_prot_ksea_corr2[[rownames(gbm_act_siteVprot2)[i]]] <- rcorr(as.numeric(gbm_prot_kins[sub("x.*","",rownames(gbm_act_siteVprot2)[i]), comm_samp]), as.numeric(scale(as.numeric(gbm_scores$KSEA[sub("x.*","",rownames(gbm_act_siteVprot2)[i]), comm_samp])))) + gbm_act_siteVprot2[i, 3] <- gbm_prot_ksea_corr2[[rownames(gbm_act_siteVprot2)[i]]]$r[1,2] + if(gbm_prot_ksea_corr2[[rownames(gbm_act_siteVprot2)[i]]]$P[1,2] < 0.05){ + gbm_act_siteVprot2[i, 5] <- "blue" + } + } +} +colnames(gbm_act_siteVprot2)[2:3] <- c("act-site_ksea_corr", "prot_ksea_corr") +gbm_act_siteVprot2 <- gbm_act_siteVprot2[!is.na(gbm_act_siteVprot2$`act-site_ksea_corr`), ] +gbm_act_siteVprot2$diff <- gbm_act_siteVprot2[,2] - gbm_act_siteVprot2[,3] +plot(as.numeric(gbm_act_siteVprot2[,1]), as.numeric(gbm_act_siteVprot2[,6])) +abline(h=0) + +plot(as.numeric(gbm_act_siteVprot2[,2]), as.numeric(gbm_act_siteVprot2[,1]), col="red", xlim=c(-0.4, 0.8), ylim=c(-0.2,1), main="gbm", pch=21, bg=gbm_act_siteVprot2$V4) +par(new=T) +plot(as.numeric(gbm_act_siteVprot2[,3]), as.numeric(gbm_act_siteVprot2[,1]), col="blue", xlim=c(-0.4, 0.8), ylim=c(-0.2,1), pch=21, bg=gbm_act_siteVprot2$V5) +for(i in 1:nrow(gbm_act_siteVprot2)){ + segments(gbm_act_siteVprot2[i,2], gbm_act_siteVprot2[i,1], gbm_act_siteVprot2[i,3], gbm_act_siteVprot2[i,1]) +} +``` + +for correlations, we need to focus on samples with data for both site and protein; require at least 30 samples with KSEA scores +```{r} +hnscc_act_siteVprot2 <- matrix(unlist(hnscc_kin_prot_actSite_Pcorr), ncol = 1, dimnames = list(names(hnscc_kin_prot_actSite_Pcorr), "site-prot_corr")) +hnscc_act_siteVprot2 <- hnscc_act_siteVprot2[sub("x.*", "", rownames(hnscc_act_siteVprot2)) %in% hnscc_ksea_act_site_kins, , drop=F] +hnscc_act_site_ksea_corr2 <- list() +hnscc_prot_ksea_corr2 <- list() +hnscc_act_siteVprot2 <- cbind(hnscc_act_siteVprot2, NA, NA) %>% as.data.frame +hnscc_act_siteVprot2$V4 <- "white" +hnscc_act_siteVprot2$V5 <- "white" +for(i in 1:nrow(hnscc_act_siteVprot2)){ + comm_samp <- intersect(colnames(hnscc_phos_kins1)[!is.na(hnscc_phos_kins1[rownames(hnscc_act_siteVprot2)[i], ])], colnames(hnscc_prot_kins)[!is.na(hnscc_prot_kins[sub("x.*","",rownames(hnscc_act_siteVprot2)[i]), ])]) + if(sum(!is.na(hnscc_scores$KSEA[sub("x.*","",rownames(hnscc_act_siteVprot2)[i]), comm_samp])) > 29){ + hnscc_act_site_ksea_corr2[[rownames(hnscc_act_siteVprot2)[i]]] <- rcorr(as.numeric(hnscc_phos_kins1[rownames(hnscc_act_siteVprot2)[i], comm_samp]), as.numeric(scale(as.numeric(hnscc_scores$KSEA[sub("x.*","",rownames(hnscc_act_siteVprot2)[i]), comm_samp])))) + hnscc_act_siteVprot2[i, 2] <- hnscc_act_site_ksea_corr2[[rownames(hnscc_act_siteVprot2)[i]]]$r[1,2] + if(hnscc_act_site_ksea_corr2[[rownames(hnscc_act_siteVprot2)[i]]]$P[1,2] < 0.05){ + hnscc_act_siteVprot2[i, 4] <- "red" + } + hnscc_prot_ksea_corr2[[rownames(hnscc_act_siteVprot2)[i]]] <- rcorr(as.numeric(hnscc_prot_kins[sub("x.*","",rownames(hnscc_act_siteVprot2)[i]), comm_samp]), as.numeric(scale(as.numeric(hnscc_scores$KSEA[sub("x.*","",rownames(hnscc_act_siteVprot2)[i]), comm_samp])))) + hnscc_act_siteVprot2[i, 3] <- hnscc_prot_ksea_corr2[[rownames(hnscc_act_siteVprot2)[i]]]$r[1,2] + if(hnscc_prot_ksea_corr2[[rownames(hnscc_act_siteVprot2)[i]]]$P[1,2] < 0.05){ + hnscc_act_siteVprot2[i, 5] <- "blue" + } + } +} +colnames(hnscc_act_siteVprot2)[2:3] <- c("act-site_ksea_corr", "prot_ksea_corr") +hnscc_act_siteVprot2 <- hnscc_act_siteVprot2[!is.na(hnscc_act_siteVprot2$`act-site_ksea_corr`), ] +hnscc_act_siteVprot2$diff <- hnscc_act_siteVprot2[,2] - hnscc_act_siteVprot2[,3] +plot(as.numeric(hnscc_act_siteVprot2[,1]), as.numeric(hnscc_act_siteVprot2[,6])) +abline(h=0) + +plot(as.numeric(hnscc_act_siteVprot2[,2]), as.numeric(hnscc_act_siteVprot2[,1]), col="red", xlim=c(-0.4, 0.8), ylim=c(-0.2,1), main="hnscc", pch=21, bg=hnscc_act_siteVprot2$V4) +par(new=T) +plot(as.numeric(hnscc_act_siteVprot2[,3]), as.numeric(hnscc_act_siteVprot2[,1]), col="blue", xlim=c(-0.4, 0.8), ylim=c(-0.2,1), pch=21, bg=hnscc_act_siteVprot2$V5) +for(i in 1:nrow(hnscc_act_siteVprot2)){ + segments(hnscc_act_siteVprot2[i,2], hnscc_act_siteVprot2[i,1], hnscc_act_siteVprot2[i,3], hnscc_act_siteVprot2[i,1]) +} +``` + +for correlations, we need to focus on samples with data for both site and protein; require at least 30 samples with KSEA scores +```{r} +lscc_act_siteVprot2 <- matrix(unlist(lscc_kin_prot_actSite_Pcorr), ncol = 1, dimnames = list(names(lscc_kin_prot_actSite_Pcorr), "site-prot_corr")) +lscc_act_siteVprot2 <- lscc_act_siteVprot2[sub("x.*", "", rownames(lscc_act_siteVprot2)) %in% lscc_ksea_act_site_kins, , drop=F] +lscc_act_site_ksea_corr2 <- list() +lscc_prot_ksea_corr2 <- list() +lscc_act_siteVprot2 <- cbind(lscc_act_siteVprot2, NA, NA) %>% as.data.frame +lscc_act_siteVprot2$V4 <- "white" +lscc_act_siteVprot2$V5 <- "white" +for(i in 1:nrow(lscc_act_siteVprot2)){ + comm_samp <- intersect(colnames(lscc_phos_kins1)[!is.na(lscc_phos_kins1[rownames(lscc_act_siteVprot2)[i], ])], colnames(lscc_prot_kins)[!is.na(lscc_prot_kins[sub("x.*","",rownames(lscc_act_siteVprot2)[i]), ])]) + if(sum(!is.na(lscc_scores$KSEA[sub("x.*","",rownames(lscc_act_siteVprot2)[i]), comm_samp])) > 29){ + lscc_act_site_ksea_corr2[[rownames(lscc_act_siteVprot2)[i]]] <- rcorr(as.numeric(lscc_phos_kins1[rownames(lscc_act_siteVprot2)[i], comm_samp]), as.numeric(scale(as.numeric(lscc_scores$KSEA[sub("x.*","",rownames(lscc_act_siteVprot2)[i]), comm_samp])))) + lscc_act_siteVprot2[i, 2] <- lscc_act_site_ksea_corr2[[rownames(lscc_act_siteVprot2)[i]]]$r[1,2] + if(lscc_act_site_ksea_corr2[[rownames(lscc_act_siteVprot2)[i]]]$P[1,2] < 0.05){ + lscc_act_siteVprot2[i, 4] <- "red" + } + lscc_prot_ksea_corr2[[rownames(lscc_act_siteVprot2)[i]]] <- rcorr(as.numeric(lscc_prot_kins[sub("x.*","",rownames(lscc_act_siteVprot2)[i]), comm_samp]), as.numeric(scale(as.numeric(lscc_scores$KSEA[sub("x.*","",rownames(lscc_act_siteVprot2)[i]), comm_samp])))) + lscc_act_siteVprot2[i, 3] <- lscc_prot_ksea_corr2[[rownames(lscc_act_siteVprot2)[i]]]$r[1,2] + if(lscc_prot_ksea_corr2[[rownames(lscc_act_siteVprot2)[i]]]$P[1,2] < 0.05){ + lscc_act_siteVprot2[i, 5] <- "blue" + } + } +} +colnames(lscc_act_siteVprot2)[2:3] <- c("act-site_ksea_corr", "prot_ksea_corr") +lscc_act_siteVprot2 <- lscc_act_siteVprot2[!is.na(lscc_act_siteVprot2$`act-site_ksea_corr`), ] +lscc_act_siteVprot2$diff <- lscc_act_siteVprot2[,2] - lscc_act_siteVprot2[,3] +plot(as.numeric(lscc_act_siteVprot2[,1]), as.numeric(lscc_act_siteVprot2[,6])) +abline(h=0) + +plot(as.numeric(lscc_act_siteVprot2[,2]), as.numeric(lscc_act_siteVprot2[,1]), col="red", xlim=c(-0.4, 0.9), ylim=c(-0.2,1), main="lscc", pch=21, bg=lscc_act_siteVprot2$V4) +par(new=T) +plot(as.numeric(lscc_act_siteVprot2[,3]), as.numeric(lscc_act_siteVprot2[,1]), col="blue", xlim=c(-0.4, 0.9), ylim=c(-0.2,1), pch=21, bg=lscc_act_siteVprot2$V5) +for(i in 1:nrow(lscc_act_siteVprot2)){ + segments(lscc_act_siteVprot2[i,2], lscc_act_siteVprot2[i,1], lscc_act_siteVprot2[i,3], lscc_act_siteVprot2[i,1]) +} +``` + +for correlations, we need to focus on samples with data for both site and protein; require at least 30 samples with KSEA scores +```{r} +luad_act_siteVprot2 <- matrix(unlist(luad_kin_prot_actSite_Pcorr), ncol = 1, dimnames = list(names(luad_kin_prot_actSite_Pcorr), "site-prot_corr")) +luad_act_siteVprot2 <- luad_act_siteVprot2[sub("x.*", "", rownames(luad_act_siteVprot2)) %in% luad_ksea_act_site_kins, , drop=F] +luad_act_site_ksea_corr2 <- list() +luad_prot_ksea_corr2 <- list() +luad_act_siteVprot2 <- cbind(luad_act_siteVprot2, NA, NA) %>% as.data.frame +luad_act_siteVprot2$V4 <- "white" +luad_act_siteVprot2$V5 <- "white" +for(i in 1:nrow(luad_act_siteVprot2)){ + comm_samp <- intersect(colnames(luad_phos_kins1)[!is.na(luad_phos_kins1[rownames(luad_act_siteVprot2)[i], ])], colnames(luad_prot_kins)[!is.na(luad_prot_kins[sub("x.*","",rownames(luad_act_siteVprot2)[i]), ])]) + if(sum(!is.na(luad_scores$KSEA[sub("x.*","",rownames(luad_act_siteVprot2)[i]), comm_samp])) > 29){ + luad_act_site_ksea_corr2[[rownames(luad_act_siteVprot2)[i]]] <- rcorr(as.numeric(luad_phos_kins1[rownames(luad_act_siteVprot2)[i], comm_samp]), as.numeric(scale(as.numeric(luad_scores$KSEA[sub("x.*","",rownames(luad_act_siteVprot2)[i]), comm_samp])))) + luad_act_siteVprot2[i, 2] <- luad_act_site_ksea_corr2[[rownames(luad_act_siteVprot2)[i]]]$r[1,2] + if(luad_act_site_ksea_corr2[[rownames(luad_act_siteVprot2)[i]]]$P[1,2] < 0.05){ + luad_act_siteVprot2[i, 4] <- "red" + } + luad_prot_ksea_corr2[[rownames(luad_act_siteVprot2)[i]]] <- rcorr(as.numeric(luad_prot_kins[sub("x.*","",rownames(luad_act_siteVprot2)[i]), comm_samp]), as.numeric(scale(as.numeric(luad_scores$KSEA[sub("x.*","",rownames(luad_act_siteVprot2)[i]), comm_samp])))) + luad_act_siteVprot2[i, 3] <- luad_prot_ksea_corr2[[rownames(luad_act_siteVprot2)[i]]]$r[1,2] + if(luad_prot_ksea_corr2[[rownames(luad_act_siteVprot2)[i]]]$P[1,2] < 0.05){ + luad_act_siteVprot2[i, 5] <- "blue" + } + } +} +colnames(luad_act_siteVprot2)[2:3] <- c("act-site_ksea_corr", "prot_ksea_corr") +luad_act_siteVprot2 <- luad_act_siteVprot2[!is.na(luad_act_siteVprot2$`act-site_ksea_corr`), ] +luad_act_siteVprot2$diff <- luad_act_siteVprot2[,2] - luad_act_siteVprot2[,3] +plot(as.numeric(luad_act_siteVprot2[,1]), as.numeric(luad_act_siteVprot2[,6])) +abline(h=0) + +plot(as.numeric(luad_act_siteVprot2[,2]), as.numeric(luad_act_siteVprot2[,1]), col="red", xlim=c(-0.4, 0.9), ylim=c(-0.2,1), main="luad", pch=21, bg=luad_act_siteVprot2$V4) +par(new=T) +plot(as.numeric(luad_act_siteVprot2[,3]), as.numeric(luad_act_siteVprot2[,1]), col="blue", xlim=c(-0.4, 0.9), ylim=c(-0.2,1), pch=21, bg=luad_act_siteVprot2$V5) +for(i in 1:nrow(luad_act_siteVprot2)){ + segments(luad_act_siteVprot2[i,2], luad_act_siteVprot2[i,1], luad_act_siteVprot2[i,3], luad_act_siteVprot2[i,1]) +} +``` + +for correlations, we need to focus on samples with data for both site and protein; require at least 30 samples with KSEA scores +```{r} +ov_act_siteVprot2 <- matrix(unlist(ov_kin_prot_actSite_Pcorr), ncol = 1, dimnames = list(names(ov_kin_prot_actSite_Pcorr), "site-prot_corr")) +ov_act_siteVprot2 <- ov_act_siteVprot2[sub("x.*", "", rownames(ov_act_siteVprot2)) %in% ov_ksea_act_site_kins, , drop=F] +ov_act_site_ksea_corr2 <- list() +ov_prot_ksea_corr2 <- list() +ov_act_siteVprot2 <- cbind(ov_act_siteVprot2, NA, NA) %>% as.data.frame +ov_act_siteVprot2$V4 <- "white" +ov_act_siteVprot2$V5 <- "white" +for(i in 1:nrow(ov_act_siteVprot2)){ + comm_samp <- intersect(colnames(ov_phos_kins1)[!is.na(ov_phos_kins1[rownames(ov_act_siteVprot2)[i], ])], colnames(ov_prot_kins)[!is.na(ov_prot_kins[sub("x.*","",rownames(ov_act_siteVprot2)[i]), ])]) + if(sum(!is.na(ov_scores$KSEA[sub("x.*","",rownames(ov_act_siteVprot2)[i]), comm_samp])) > 29){ + ov_act_site_ksea_corr2[[rownames(ov_act_siteVprot2)[i]]] <- rcorr(as.numeric(ov_phos_kins1[rownames(ov_act_siteVprot2)[i], comm_samp]), as.numeric(scale(as.numeric(ov_scores$KSEA[sub("x.*","",rownames(ov_act_siteVprot2)[i]), comm_samp])))) + ov_act_siteVprot2[i, 2] <- ov_act_site_ksea_corr2[[rownames(ov_act_siteVprot2)[i]]]$r[1,2] + if(ov_act_site_ksea_corr2[[rownames(ov_act_siteVprot2)[i]]]$P[1,2] < 0.05){ + ov_act_siteVprot2[i, 4] <- "red" + } + ov_prot_ksea_corr2[[rownames(ov_act_siteVprot2)[i]]] <- rcorr(as.numeric(ov_prot_kins[sub("x.*","",rownames(ov_act_siteVprot2)[i]), comm_samp]), as.numeric(scale(as.numeric(ov_scores$KSEA[sub("x.*","",rownames(ov_act_siteVprot2)[i]), comm_samp])))) + ov_act_siteVprot2[i, 3] <- ov_prot_ksea_corr2[[rownames(ov_act_siteVprot2)[i]]]$r[1,2] + if(ov_prot_ksea_corr2[[rownames(ov_act_siteVprot2)[i]]]$P[1,2] < 0.05){ + ov_act_siteVprot2[i, 5] <- "blue" + } + } +} +colnames(ov_act_siteVprot2)[2:3] <- c("act-site_ksea_corr", "prot_ksea_corr") +ov_act_siteVprot2 <- ov_act_siteVprot2[!is.na(ov_act_siteVprot2$`act-site_ksea_corr`), ] +ov_act_siteVprot2$diff <- ov_act_siteVprot2[,2] - ov_act_siteVprot2[,3] +plot(as.numeric(ov_act_siteVprot2[,1]), as.numeric(ov_act_siteVprot2[,6])) +abline(h=0) + +plot(as.numeric(ov_act_siteVprot2[,2]), as.numeric(ov_act_siteVprot2[,1]), col="red", xlim=c(-0.6, 0.8), ylim=c(-0.2,1), main="ov", pch=21, bg=ov_act_siteVprot2$V4) +par(new=T) +plot(as.numeric(ov_act_siteVprot2[,3]), as.numeric(ov_act_siteVprot2[,1]), col="blue", xlim=c(-0.6, 0.8), ylim=c(-0.2,1), pch=21, bg=ov_act_siteVprot2$V5) +for(i in 1:nrow(ov_act_siteVprot2)){ + segments(ov_act_siteVprot2[i,2], ov_act_siteVprot2[i,1], ov_act_siteVprot2[i,3], ov_act_siteVprot2[i,1]) +} +``` + +for correlations, we need to focus on samples with data for both site and protein; require at least 30 samples with KSEA scores +```{r} +pdac_act_siteVprot2 <- matrix(unlist(pdac_kin_prot_actSite_Pcorr), ncol = 1, dimnames = list(names(pdac_kin_prot_actSite_Pcorr), "site-prot_corr")) +pdac_act_siteVprot2 <- pdac_act_siteVprot2[sub("x.*", "", rownames(pdac_act_siteVprot2)) %in% pdac_ksea_act_site_kins, , drop=F] +pdac_act_site_ksea_corr2 <- list() +pdac_prot_ksea_corr2 <- list() +pdac_act_siteVprot2 <- cbind(pdac_act_siteVprot2, NA, NA) %>% as.data.frame +pdac_act_siteVprot2$V4 <- "white" +pdac_act_siteVprot2$V5 <- "white" +for(i in 1:nrow(pdac_act_siteVprot2)){ + comm_samp <- intersect(colnames(pdac_phos_kins1)[!is.na(pdac_phos_kins1[rownames(pdac_act_siteVprot2)[i], ])], colnames(pdac_prot_kins)[!is.na(pdac_prot_kins[sub("x.*","",rownames(pdac_act_siteVprot2)[i]), ])]) + if(sum(!is.na(pdac_scores$KSEA[sub("x.*","",rownames(pdac_act_siteVprot2)[i]), comm_samp])) > 29){ + pdac_act_site_ksea_corr2[[rownames(pdac_act_siteVprot2)[i]]] <- rcorr(as.numeric(pdac_phos_kins1[rownames(pdac_act_siteVprot2)[i], comm_samp]), as.numeric(scale(as.numeric(pdac_scores$KSEA[sub("x.*","",rownames(pdac_act_siteVprot2)[i]), comm_samp])))) + pdac_act_siteVprot2[i, 2] <- pdac_act_site_ksea_corr2[[rownames(pdac_act_siteVprot2)[i]]]$r[1,2] + if(pdac_act_site_ksea_corr2[[rownames(pdac_act_siteVprot2)[i]]]$P[1,2] < 0.05){ + pdac_act_siteVprot2[i, 4] <- "red" + } + pdac_prot_ksea_corr2[[rownames(pdac_act_siteVprot2)[i]]] <- rcorr(as.numeric(pdac_prot_kins[sub("x.*","",rownames(pdac_act_siteVprot2)[i]), comm_samp]), as.numeric(scale(as.numeric(pdac_scores$KSEA[sub("x.*","",rownames(pdac_act_siteVprot2)[i]), comm_samp])))) + pdac_act_siteVprot2[i, 3] <- pdac_prot_ksea_corr2[[rownames(pdac_act_siteVprot2)[i]]]$r[1,2] + if(pdac_prot_ksea_corr2[[rownames(pdac_act_siteVprot2)[i]]]$P[1,2] < 0.05){ + pdac_act_siteVprot2[i, 5] <- "blue" + } + } +} +colnames(pdac_act_siteVprot2)[2:3] <- c("act-site_ksea_corr", "prot_ksea_corr") +pdac_act_siteVprot2 <- pdac_act_siteVprot2[!is.na(pdac_act_siteVprot2$`act-site_ksea_corr`), ] +pdac_act_siteVprot2$diff <- pdac_act_siteVprot2[,2] - pdac_act_siteVprot2[,3] +plot(as.numeric(pdac_act_siteVprot2[,1]), as.numeric(pdac_act_siteVprot2[,6])) +abline(h=0) + +plot(as.numeric(pdac_act_siteVprot2[,2]), as.numeric(pdac_act_siteVprot2[,1]), col="red", xlim=c(-0.5, 0.8), ylim=c(-0.25,1), main="pdac", pch=21, bg=pdac_act_siteVprot2$V4) +par(new=T) +plot(as.numeric(pdac_act_siteVprot2[,3]), as.numeric(pdac_act_siteVprot2[,1]), col="blue", xlim=c(-0.5, 0.8), ylim=c(-0.25,1), pch=21, bg=pdac_act_siteVprot2$V5) +for(i in 1:nrow(pdac_act_siteVprot2)){ + segments(pdac_act_siteVprot2[i,2], pdac_act_siteVprot2[i,1], pdac_act_siteVprot2[i,3], pdac_act_siteVprot2[i,1]) +} +``` + +for correlations, we need to focus on samples with data for both site and protein; require at least 30 samples with KSEA scores +```{r} +ucec_act_siteVprot2 <- matrix(unlist(ucec_kin_prot_actSite_Pcorr), ncol = 1, dimnames = list(names(ucec_kin_prot_actSite_Pcorr), "site-prot_corr")) +ucec_act_siteVprot2 <- ucec_act_siteVprot2[sub("x.*", "", rownames(ucec_act_siteVprot2)) %in% ucec_ksea_act_site_kins, , drop=F] +ucec_act_site_ksea_corr2 <- list() +ucec_prot_ksea_corr2 <- list() +ucec_act_siteVprot2 <- cbind(ucec_act_siteVprot2, NA, NA) %>% as.data.frame +ucec_act_siteVprot2$V4 <- "white" +ucec_act_siteVprot2$V5 <- "white" +for(i in 1:nrow(ucec_act_siteVprot2)){ + comm_samp <- intersect(colnames(ucec_phos_kins1)[!is.na(ucec_phos_kins1[rownames(ucec_act_siteVprot2)[i], ])], colnames(ucec_prot_kins)[!is.na(ucec_prot_kins[sub("x.*","",rownames(ucec_act_siteVprot2)[i]), ])]) + if(sum(!is.na(ucec_scores$KSEA[sub("x.*","",rownames(ucec_act_siteVprot2)[i]), comm_samp])) > 29){ + ucec_act_site_ksea_corr2[[rownames(ucec_act_siteVprot2)[i]]] <- rcorr(as.numeric(ucec_phos_kins1[rownames(ucec_act_siteVprot2)[i], comm_samp]), as.numeric(scale(as.numeric(ucec_scores$KSEA[sub("x.*","",rownames(ucec_act_siteVprot2)[i]), comm_samp])))) + ucec_act_siteVprot2[i, 2] <- ucec_act_site_ksea_corr2[[rownames(ucec_act_siteVprot2)[i]]]$r[1,2] + if(ucec_act_site_ksea_corr2[[rownames(ucec_act_siteVprot2)[i]]]$P[1,2] < 0.05){ + ucec_act_siteVprot2[i, 4] <- "red" + } + ucec_prot_ksea_corr2[[rownames(ucec_act_siteVprot2)[i]]] <- rcorr(as.numeric(ucec_prot_kins[sub("x.*","",rownames(ucec_act_siteVprot2)[i]), comm_samp]), as.numeric(scale(as.numeric(ucec_scores$KSEA[sub("x.*","",rownames(ucec_act_siteVprot2)[i]), comm_samp])))) + ucec_act_siteVprot2[i, 3] <- ucec_prot_ksea_corr2[[rownames(ucec_act_siteVprot2)[i]]]$r[1,2] + if(ucec_prot_ksea_corr2[[rownames(ucec_act_siteVprot2)[i]]]$P[1,2] < 0.05){ + ucec_act_siteVprot2[i, 5] <- "blue" + } + } +} +colnames(ucec_act_siteVprot2)[2:3] <- c("act-site_ksea_corr", "prot_ksea_corr") +ucec_act_siteVprot2 <- ucec_act_siteVprot2[!is.na(ucec_act_siteVprot2$`act-site_ksea_corr`), ] +ucec_act_siteVprot2$diff <- ucec_act_siteVprot2[,2] - ucec_act_siteVprot2[,3] +plot(as.numeric(ucec_act_siteVprot2[,1]), as.numeric(ucec_act_siteVprot2[,6])) +abline(h=0) + +plot(as.numeric(ucec_act_siteVprot2[,2]), as.numeric(ucec_act_siteVprot2[,1]), col="red", xlim=c(-0.4, 0.8), ylim=c(-0.2,1), main="ucec", pch=21, bg=ucec_act_siteVprot2$V4) +par(new=T) +plot(as.numeric(ucec_act_siteVprot2[,3]), as.numeric(ucec_act_siteVprot2[,1]), col="blue", xlim=c(-0.4, 0.8), ylim=c(-0.2,1), pch=21, bg=ucec_act_siteVprot2$V5) +for(i in 1:nrow(ucec_act_siteVprot2)){ + segments(ucec_act_siteVprot2[i,2], ucec_act_siteVprot2[i,1], ucec_act_siteVprot2[i,3], ucec_act_siteVprot2[i,1]) +} +``` + +```{r} +ccrcc_kin_prot_actSite_Pcorr[order(unlist(ccrcc_kin_prot_actSite_Pcorr))] -> ccrcc_kin_prot_actSite_Pcorr +ccrcc_act_siteVprot <- matrix(unlist(ccrcc_kin_prot_actSite_Pcorr), ncol = 1, dimnames = list(names(ccrcc_kin_prot_actSite_Pcorr), "site-prot_corr")) +ccrcc_scores$KSEA <- ccrcc_scores$KSEA[rowSums(!is.na(ccrcc_scores$KSEA)) > 0, ] +ccrcc_act_siteVprot <- ccrcc_act_siteVprot[sub("x.*", "", rownames(ccrcc_act_siteVprot)) %in% rownames(ccrcc_scores$KSEA), , drop=F] +ccrcc_act_site_ksea_corr <- list() +ccrcc_prot_ksea_corr <- list() +ccrcc_act_siteVprot <- cbind(ccrcc_act_siteVprot, 0, 0) %>% as.data.frame +ccrcc_act_siteVprot$V4 <- "white" +ccrcc_act_siteVprot$V5 <- "white" +for(i in 1:nrow(ccrcc_act_siteVprot)){ + ccrcc_act_site_ksea_corr[[rownames(ccrcc_act_siteVprot)[i]]] <- rcorr(as.numeric(ccrcc_phos_kins1[rownames(ccrcc_act_siteVprot)[i], ]), scale(as.numeric(ccrcc_scores$KSEA[sub("x.*","",rownames(ccrcc_act_siteVprot)[i]), colnames(ccrcc_phos_kins1)]))) + ccrcc_act_siteVprot[i, 2] <- ccrcc_act_site_ksea_corr[[rownames(ccrcc_act_siteVprot)[i]]]$r[1,2] + if((ccrcc_act_site_ksea_corr[[rownames(ccrcc_act_siteVprot)[i]]]$P[1,2] < 0.05) & !is.na(ccrcc_act_site_ksea_corr[[rownames(ccrcc_act_siteVprot)[i]]]$P[1,2])){ + ccrcc_act_siteVprot[i, 4] <- "red" + } + ccrcc_prot_ksea_corr[[rownames(ccrcc_act_siteVprot)[i]]] <- rcorr(as.numeric(ccrcc_prot_kins[sub("x.*","",rownames(ccrcc_act_siteVprot)[i]), ]), scale(as.numeric(ccrcc_scores$KSEA[sub("x.*","",rownames(ccrcc_act_siteVprot)[i]), colnames(ccrcc_prot_kins)]))) + ccrcc_act_siteVprot[i, 3] <- ccrcc_prot_ksea_corr[[rownames(ccrcc_act_siteVprot)[i]]]$r[1,2] + if((ccrcc_prot_ksea_corr[[rownames(ccrcc_act_siteVprot)[i]]]$P[1,2] < 0.05) & !is.na(ccrcc_prot_ksea_corr[[rownames(ccrcc_act_siteVprot)[i]]]$P[1,2])){ + ccrcc_act_siteVprot[i, 5] <- "blue" + } +} +colnames(ccrcc_act_siteVprot)[2:3] <- c("act-site_ksea_corr", "prot_ksea_corr") +ccrcc_act_siteVprot$diff <- ccrcc_act_siteVprot[,2] - ccrcc_act_siteVprot[,3] +plot(as.numeric(ccrcc_act_siteVprot[,1]), as.numeric(ccrcc_act_siteVprot[,6])) +abline(h=0) +``` + +```{r} +plot(as.numeric(ccrcc_act_siteVprot[,2]), as.numeric(ccrcc_act_siteVprot[,1]), col="red", xlim=c(-0.35, 1), ylim=c(-0.2,1), main="CCRCC", pch=21, bg=ccrcc_act_siteVprot$V4) +par(new=T) +plot(as.numeric(ccrcc_act_siteVprot[,3]), as.numeric(ccrcc_act_siteVprot[,1]), col="blue", xlim=c(-0.35, 1), ylim=c(-0.2,1), pch=21, bg=ccrcc_act_siteVprot$V5) +for(i in 1:nrow(ccrcc_act_siteVprot)){ + segments(ccrcc_act_siteVprot[i,2], ccrcc_act_siteVprot[i,1], ccrcc_act_siteVprot[i,3], ccrcc_act_siteVprot[i,1]) +} +``` + +```{r} +gbm_kin_prot_actSite_Pcorr[order(unlist(gbm_kin_prot_actSite_Pcorr))] -> gbm_kin_prot_actSite_Pcorr +gbm_act_siteVprot <- matrix(unlist(gbm_kin_prot_actSite_Pcorr), ncol = 1, dimnames = list(names(gbm_kin_prot_actSite_Pcorr), "site-prot_corr")) +gbm_act_siteVprot <- gbm_act_siteVprot[sub("x.*", "", rownames(gbm_act_siteVprot)) %in% gbm_ksea_act_site_kins, , drop=F] +gbm_act_site_ksea_corr <- list() +gbm_prot_ksea_corr <- list() +gbm_act_siteVprot <- cbind(gbm_act_siteVprot, 0, 0) %>% as.data.frame +gbm_act_siteVprot$V4 <- "white" +gbm_act_siteVprot$V5 <- "white" +for(i in 1:nrow(gbm_act_siteVprot)){ + gbm_act_site_ksea_corr[[rownames(gbm_act_siteVprot)[i]]] <- rcorr(as.numeric(gbm_phos_kins1[rownames(gbm_act_siteVprot)[i], ]), scale(as.numeric(gbm_scores$KSEA[sub("x.*","",rownames(gbm_act_siteVprot)[i]), colnames(gbm_phos_kins1)]))) + gbm_act_siteVprot[i, 2] <- gbm_act_site_ksea_corr[[rownames(gbm_act_siteVprot)[i]]]$r[1,2] + if(gbm_act_site_ksea_corr[[rownames(gbm_act_siteVprot)[i]]]$P[1,2] < 0.05){ + gbm_act_siteVprot[i, 4] <- "red" + } + gbm_prot_ksea_corr[[rownames(gbm_act_siteVprot)[i]]] <- rcorr(as.numeric(gbm_prot_kins[sub("x.*","",rownames(gbm_act_siteVprot)[i]), ]), scale(as.numeric(gbm_scores$KSEA[sub("x.*","",rownames(gbm_act_siteVprot)[i]), colnames(gbm_prot_kins)]))) + gbm_act_siteVprot[i, 3] <- gbm_prot_ksea_corr[[rownames(gbm_act_siteVprot)[i]]]$r[1,2] + if(gbm_prot_ksea_corr[[rownames(gbm_act_siteVprot)[i]]]$P[1,2] < 0.05){ + gbm_act_siteVprot[i, 5] <- "blue" + } +} +colnames(gbm_act_siteVprot)[2:3] <- c("act-site_ksea_corr", "prot_ksea_corr") +gbm_act_siteVprot$diff <- gbm_act_siteVprot[,2] - gbm_act_siteVprot[,3] +plot(as.numeric(gbm_act_siteVprot[,1]), as.numeric(gbm_act_siteVprot[,6])) +abline(h=0) +``` + +```{r} +plot(as.numeric(gbm_act_siteVprot[,2]), as.numeric(gbm_act_siteVprot[,1]), col="red", xlim=c(-0.5, 1), ylim=c(-0.2,1), main="GBM", pch=21, bg=gbm_act_siteVprot$V4) +par(new=T) +plot(as.numeric(gbm_act_siteVprot[,3]), as.numeric(gbm_act_siteVprot[,1]), col="blue", xlim=c(-0.5, 1), ylim=c(-0.2,1), pch=21, bg=gbm_act_siteVprot$V5) +for(i in 1:nrow(gbm_act_siteVprot)){ + segments(gbm_act_siteVprot[i,2], gbm_act_siteVprot[i,1], gbm_act_siteVprot[i,3], gbm_act_siteVprot[i,1]) +} +``` + +```{r} +hnscc_kin_prot_actSite_Pcorr[order(unlist(hnscc_kin_prot_actSite_Pcorr))] -> hnscc_kin_prot_actSite_Pcorr +hnscc_act_siteVprot <- matrix(unlist(hnscc_kin_prot_actSite_Pcorr), ncol = 1, dimnames = list(names(hnscc_kin_prot_actSite_Pcorr), "site-prot_corr")) +hnscc_act_siteVprot <- hnscc_act_siteVprot[sub("x.*", "", rownames(hnscc_act_siteVprot)) %in% hnscc_ksea_act_site_kins, , drop=F] +hnscc_act_site_ksea_corr <- list() +hnscc_prot_ksea_corr <- list() +hnscc_act_siteVprot <- cbind(hnscc_act_siteVprot, 0, 0) %>% as.data.frame +hnscc_act_siteVprot$V4 <- "white" +hnscc_act_siteVprot$V5 <- "white" +for(i in 1:nrow(hnscc_act_siteVprot)){ + hnscc_act_site_ksea_corr[[rownames(hnscc_act_siteVprot)[i]]] <- rcorr(as.numeric(hnscc_phos_kins1[rownames(hnscc_act_siteVprot)[i], ]), scale(as.numeric(hnscc_scores$KSEA[sub("x.*","",rownames(hnscc_act_siteVprot)[i]), colnames(hnscc_phos_kins1)]))) + hnscc_act_siteVprot[i, 2] <- hnscc_act_site_ksea_corr[[rownames(hnscc_act_siteVprot)[i]]]$r[1,2] + if(hnscc_act_site_ksea_corr[[rownames(hnscc_act_siteVprot)[i]]]$P[1,2] < 0.05){ + hnscc_act_siteVprot[i, 4] <- "red" + } + hnscc_prot_ksea_corr[[rownames(hnscc_act_siteVprot)[i]]] <- rcorr(as.numeric(hnscc_prot_kins[sub("x.*","",rownames(hnscc_act_siteVprot)[i]), ]), scale(as.numeric(hnscc_scores$KSEA[sub("x.*","",rownames(hnscc_act_siteVprot)[i]), colnames(hnscc_prot_kins)]))) + hnscc_act_siteVprot[i, 3] <- hnscc_prot_ksea_corr[[rownames(hnscc_act_siteVprot)[i]]]$r[1,2] + if(hnscc_prot_ksea_corr[[rownames(hnscc_act_siteVprot)[i]]]$P[1,2] < 0.05){ + hnscc_act_siteVprot[i, 5] <- "blue" + } +} +colnames(hnscc_act_siteVprot)[2:3] <- c("act-site_ksea_corr", "prot_ksea_corr") +hnscc_act_siteVprot$diff <- hnscc_act_siteVprot[,2] - hnscc_act_siteVprot[,3] +plot(as.numeric(hnscc_act_siteVprot[,1]), as.numeric(hnscc_act_siteVprot[,6])) +abline(h=0) +``` + +```{r} +plot(as.numeric(hnscc_act_siteVprot[,2]), as.numeric(hnscc_act_siteVprot[,1]), col="red", xlim=c(-0.4, 1), ylim=c(-0.2,1), main="HNSCC", pch=21, bg=hnscc_act_siteVprot$V4) +par(new=T) +plot(as.numeric(hnscc_act_siteVprot[,3]), as.numeric(hnscc_act_siteVprot[,1]), col="blue", xlim=c(-0.4, 1), ylim=c(-0.2,1), pch=21, bg=hnscc_act_siteVprot$V5) +for(i in 1:nrow(hnscc_act_siteVprot)){ + segments(hnscc_act_siteVprot[i,2], hnscc_act_siteVprot[i,1], hnscc_act_siteVprot[i,3], hnscc_act_siteVprot[i,1]) +} +``` + +```{r} +lscc_kin_prot_actSite_Pcorr[order(unlist(lscc_kin_prot_actSite_Pcorr))] -> lscc_kin_prot_actSite_Pcorr +lscc_act_siteVprot <- matrix(unlist(lscc_kin_prot_actSite_Pcorr), ncol = 1, dimnames = list(names(lscc_kin_prot_actSite_Pcorr), "site-prot_corr")) +lscc_act_siteVprot <- lscc_act_siteVprot[sub("x.*", "", rownames(lscc_act_siteVprot)) %in% lscc_ksea_act_site_kins, , drop=F] +lscc_act_site_ksea_corr <- list() +lscc_prot_ksea_corr <- list() +lscc_act_siteVprot <- cbind(lscc_act_siteVprot, 0, 0) %>% as.data.frame +lscc_act_siteVprot$V4 <- "white" +lscc_act_siteVprot$V5 <- "white" +for(i in 1:nrow(lscc_act_siteVprot)){ + lscc_act_site_ksea_corr[[rownames(lscc_act_siteVprot)[i]]] <- rcorr(as.numeric(lscc_phos_kins1[rownames(lscc_act_siteVprot)[i], ]), scale(as.numeric(lscc_scores$KSEA[sub("x.*","",rownames(lscc_act_siteVprot)[i]), colnames(lscc_phos_kins1)]))) + lscc_act_siteVprot[i, 2] <- lscc_act_site_ksea_corr[[rownames(lscc_act_siteVprot)[i]]]$r[1,2] + if(lscc_act_site_ksea_corr[[rownames(lscc_act_siteVprot)[i]]]$P[1,2] < 0.05){ + lscc_act_siteVprot[i, 4] <- "red" + } + lscc_prot_ksea_corr[[rownames(lscc_act_siteVprot)[i]]] <- rcorr(as.numeric(lscc_prot_kins[sub("x.*","",rownames(lscc_act_siteVprot)[i]), ]), scale(as.numeric(lscc_scores$KSEA[sub("x.*","",rownames(lscc_act_siteVprot)[i]), colnames(lscc_prot_kins)]))) + lscc_act_siteVprot[i, 3] <- lscc_prot_ksea_corr[[rownames(lscc_act_siteVprot)[i]]]$r[1,2] + if(lscc_prot_ksea_corr[[rownames(lscc_act_siteVprot)[i]]]$P[1,2] < 0.05){ + lscc_act_siteVprot[i, 5] <- "blue" + } +} +colnames(lscc_act_siteVprot)[2:3] <- c("act-site_ksea_corr", "prot_ksea_corr") +lscc_act_siteVprot$diff <- lscc_act_siteVprot[,2] - lscc_act_siteVprot[,3] +plot(as.numeric(lscc_act_siteVprot[,1]), as.numeric(lscc_act_siteVprot[,6])) +abline(h=0) +``` + +```{r} +plot(as.numeric(lscc_act_siteVprot[,2]), as.numeric(lscc_act_siteVprot[,1]), col="red", xlim=c(-0.4, 1), ylim=c(-0.2,1), main="LSCC", pch=21, bg=lscc_act_siteVprot$V4) +par(new=T) +plot(as.numeric(lscc_act_siteVprot[,3]), as.numeric(lscc_act_siteVprot[,1]), col="blue", xlim=c(-0.4, 1), ylim=c(-0.2,1), pch=21, bg=lscc_act_siteVprot$V5) +for(i in 1:nrow(lscc_act_siteVprot)){ + segments(lscc_act_siteVprot[i,2], lscc_act_siteVprot[i,1], lscc_act_siteVprot[i,3], lscc_act_siteVprot[i,1]) +} +``` + +```{r} +luad_kin_prot_actSite_Pcorr[order(unlist(luad_kin_prot_actSite_Pcorr))] -> luad_kin_prot_actSite_Pcorr +luad_act_siteVprot <- matrix(unlist(luad_kin_prot_actSite_Pcorr), ncol = 1, dimnames = list(names(luad_kin_prot_actSite_Pcorr), "site-prot_corr")) +luad_act_siteVprot <- luad_act_siteVprot[sub("x.*", "", rownames(luad_act_siteVprot)) %in% luad_ksea_act_site_kins, , drop=F] +luad_act_site_ksea_corr <- list() +luad_prot_ksea_corr <- list() +luad_act_siteVprot <- cbind(luad_act_siteVprot, 0, 0) %>% as.data.frame +luad_act_siteVprot$V4 <- "white" +luad_act_siteVprot$V5 <- "white" +for(i in 1:nrow(luad_act_siteVprot)){ + luad_act_site_ksea_corr[[rownames(luad_act_siteVprot)[i]]] <- rcorr(as.numeric(luad_phos_kins1[rownames(luad_act_siteVprot)[i], ]), scale(as.numeric(luad_scores$KSEA[sub("x.*","",rownames(luad_act_siteVprot)[i]), colnames(luad_phos_kins1)]))) + luad_act_siteVprot[i, 2] <- luad_act_site_ksea_corr[[rownames(luad_act_siteVprot)[i]]]$r[1,2] + if(luad_act_site_ksea_corr[[rownames(luad_act_siteVprot)[i]]]$P[1,2] < 0.05){ + luad_act_siteVprot[i, 4] <- "red" + } + luad_prot_ksea_corr[[rownames(luad_act_siteVprot)[i]]] <- rcorr(as.numeric(luad_prot_kins[sub("x.*","",rownames(luad_act_siteVprot)[i]), ]), scale(as.numeric(luad_scores$KSEA[sub("x.*","",rownames(luad_act_siteVprot)[i]), colnames(luad_prot_kins)]))) + luad_act_siteVprot[i, 3] <- luad_prot_ksea_corr[[rownames(luad_act_siteVprot)[i]]]$r[1,2] + if(luad_prot_ksea_corr[[rownames(luad_act_siteVprot)[i]]]$P[1,2] < 0.05){ + luad_act_siteVprot[i, 5] <- "blue" + } +} +colnames(luad_act_siteVprot)[2:3] <- c("act-site_ksea_corr", "prot_ksea_corr") +luad_act_siteVprot$diff <- luad_act_siteVprot[,2] - luad_act_siteVprot[,3] +plot(as.numeric(luad_act_siteVprot[,1]), as.numeric(luad_act_siteVprot[,6])) +abline(h=0) +``` + +```{r} +plot(as.numeric(luad_act_siteVprot[,2]), as.numeric(luad_act_siteVprot[,1]), col="red", xlim=c(-0.4, 1), ylim=c(-0.2,1), main="LUAD", pch=21, bg=luad_act_siteVprot$V4) +par(new=T) +plot(as.numeric(luad_act_siteVprot[,3]), as.numeric(luad_act_siteVprot[,1]), col="blue", xlim=c(-0.4, 1), ylim=c(-0.2,1), pch=21, bg=luad_act_siteVprot$V5) +for(i in 1:nrow(luad_act_siteVprot)){ + segments(luad_act_siteVprot[i,2], luad_act_siteVprot[i,1], luad_act_siteVprot[i,3], luad_act_siteVprot[i,1]) +} +``` + +```{r} +ov_kin_prot_actSite_Pcorr[order(unlist(ov_kin_prot_actSite_Pcorr))] -> ov_kin_prot_actSite_Pcorr +ov_act_siteVprot <- matrix(unlist(ov_kin_prot_actSite_Pcorr), ncol = 1, dimnames = list(names(ov_kin_prot_actSite_Pcorr), "site-prot_corr")) +ov_act_siteVprot <- ov_act_siteVprot[sub("x.*", "", rownames(ov_act_siteVprot)) %in% ov_ksea_act_site_kins, , drop=F] +ov_act_site_ksea_corr <- list() +ov_prot_ksea_corr <- list() +ov_act_siteVprot <- cbind(ov_act_siteVprot, 0, 0) %>% as.data.frame +ov_act_siteVprot$V4 <- "white" +ov_act_siteVprot$V5 <- "white" +for(i in 1:nrow(ov_act_siteVprot)){ + ov_act_site_ksea_corr[[rownames(ov_act_siteVprot)[i]]] <- rcorr(as.numeric(ov_phos_kins1[rownames(ov_act_siteVprot)[i], ]), scale(as.numeric(ov_scores$KSEA[sub("x.*","",rownames(ov_act_siteVprot)[i]), colnames(ov_phos_kins1)]))) + ov_act_siteVprot[i, 2] <- ov_act_site_ksea_corr[[rownames(ov_act_siteVprot)[i]]]$r[1,2] + if(ov_act_site_ksea_corr[[rownames(ov_act_siteVprot)[i]]]$P[1,2] < 0.05){ + ov_act_siteVprot[i, 4] <- "red" + } + ov_prot_ksea_corr[[rownames(ov_act_siteVprot)[i]]] <- rcorr(as.numeric(ov_prot_kins[sub("x.*","",rownames(ov_act_siteVprot)[i]), ]), scale(as.numeric(ov_scores$KSEA[sub("x.*","",rownames(ov_act_siteVprot)[i]), colnames(ov_prot_kins)]))) + ov_act_siteVprot[i, 3] <- ov_prot_ksea_corr[[rownames(ov_act_siteVprot)[i]]]$r[1,2] + if(ov_prot_ksea_corr[[rownames(ov_act_siteVprot)[i]]]$P[1,2] < 0.05){ + ov_act_siteVprot[i, 5] <- "blue" + } +} +colnames(ov_act_siteVprot)[2:3] <- c("act-site_ksea_corr", "prot_ksea_corr") +ov_act_siteVprot$diff <- ov_act_siteVprot[,2] - ov_act_siteVprot[,3] +plot(as.numeric(ov_act_siteVprot[,1]), as.numeric(ov_act_siteVprot[,6])) +abline(h=0) +``` + +```{r} +plot(as.numeric(ov_act_siteVprot[,2]), as.numeric(ov_act_siteVprot[,1]), col="red", xlim=c(-0.55, 1), ylim=c(-0.2,1), main="OV", pch=21, bg=ov_act_siteVprot$V4) +par(new=T) +plot(as.numeric(ov_act_siteVprot[,3]), as.numeric(ov_act_siteVprot[,1]), col="blue", xlim=c(-0.55, 1), ylim=c(-0.2,1), pch=21, bg=ov_act_siteVprot$V5) +for(i in 1:nrow(ov_act_siteVprot)){ + segments(ov_act_siteVprot[i,2], ov_act_siteVprot[i,1], ov_act_siteVprot[i,3], ov_act_siteVprot[i,1]) +} +``` + +```{r} +pdac_kin_prot_actSite_Pcorr[order(unlist(pdac_kin_prot_actSite_Pcorr))] -> pdac_kin_prot_actSite_Pcorr +pdac_act_siteVprot <- matrix(unlist(pdac_kin_prot_actSite_Pcorr), ncol = 1, dimnames = list(names(pdac_kin_prot_actSite_Pcorr), "site-prot_corr")) +pdac_act_siteVprot <- pdac_act_siteVprot[sub("x.*", "", rownames(pdac_act_siteVprot)) %in% pdac_ksea_act_site_kins, , drop=F] +pdac_act_site_ksea_corr <- list() +pdac_prot_ksea_corr <- list() +pdac_act_siteVprot <- cbind(pdac_act_siteVprot, 0, 0) %>% as.data.frame +pdac_act_siteVprot$V4 <- "white" +pdac_act_siteVprot$V5 <- "white" +for(i in 1:nrow(pdac_act_siteVprot)){ + pdac_act_site_ksea_corr[[rownames(pdac_act_siteVprot)[i]]] <- rcorr(as.numeric(pdac_phos_kins1[rownames(pdac_act_siteVprot)[i], ]), scale(as.numeric(pdac_scores$KSEA[sub("x.*","",rownames(pdac_act_siteVprot)[i]), colnames(pdac_phos_kins1)]))) + pdac_act_siteVprot[i, 2] <- pdac_act_site_ksea_corr[[rownames(pdac_act_siteVprot)[i]]]$r[1,2] + if(pdac_act_site_ksea_corr[[rownames(pdac_act_siteVprot)[i]]]$P[1,2] < 0.05){ + pdac_act_siteVprot[i, 4] <- "red" + } + pdac_prot_ksea_corr[[rownames(pdac_act_siteVprot)[i]]] <- rcorr(as.numeric(pdac_prot_kins[sub("x.*","",rownames(pdac_act_siteVprot)[i]), ]), scale(as.numeric(pdac_scores$KSEA[sub("x.*","",rownames(pdac_act_siteVprot)[i]), colnames(pdac_prot_kins)]))) + pdac_act_siteVprot[i, 3] <- pdac_prot_ksea_corr[[rownames(pdac_act_siteVprot)[i]]]$r[1,2] + if(pdac_prot_ksea_corr[[rownames(pdac_act_siteVprot)[i]]]$P[1,2] < 0.05){ + pdac_act_siteVprot[i, 5] <- "blue" + } +} +colnames(pdac_act_siteVprot)[2:3] <- c("act-site_ksea_corr", "prot_ksea_corr") +pdac_act_siteVprot$diff <- pdac_act_siteVprot[,2] - pdac_act_siteVprot[,3] +plot(as.numeric(pdac_act_siteVprot[,1]), as.numeric(pdac_act_siteVprot[,6])) +abline(h=0) +``` + +```{r} +plot(as.numeric(pdac_act_siteVprot[,2]), as.numeric(pdac_act_siteVprot[,1]), col="red", xlim=c(-0.5, 1), ylim=c(-0.25,1), main="PDAC", pch=21, bg=pdac_act_siteVprot$V4) +par(new=T) +plot(as.numeric(pdac_act_siteVprot[,3]), as.numeric(pdac_act_siteVprot[,1]), col="blue", xlim=c(-0.5, 1), ylim=c(-0.25,1), pch=21, bg=pdac_act_siteVprot$V5) +for(i in 1:nrow(pdac_act_siteVprot)){ + segments(pdac_act_siteVprot[i,2], pdac_act_siteVprot[i,1], pdac_act_siteVprot[i,3], pdac_act_siteVprot[i,1]) +} +``` + +```{r} +ucec_kin_prot_actSite_Pcorr[order(unlist(ucec_kin_prot_actSite_Pcorr))] -> ucec_kin_prot_actSite_Pcorr +ucec_act_siteVprot <- matrix(unlist(ucec_kin_prot_actSite_Pcorr), ncol = 1, dimnames = list(names(ucec_kin_prot_actSite_Pcorr), "site-prot_corr")) +ucec_act_siteVprot <- ucec_act_siteVprot[sub("x.*", "", rownames(ucec_act_siteVprot)) %in% ucec_ksea_act_site_kins, , drop=F] +ucec_act_site_ksea_corr <- list() +ucec_prot_ksea_corr <- list() +ucec_act_siteVprot <- cbind(ucec_act_siteVprot, 0, 0) %>% as.data.frame +ucec_act_siteVprot$V4 <- "white" +ucec_act_siteVprot$V5 <- "white" +for(i in 1:nrow(ucec_act_siteVprot)){ + ucec_act_site_ksea_corr[[rownames(ucec_act_siteVprot)[i]]] <- rcorr(as.numeric(ucec_phos_kins1[rownames(ucec_act_siteVprot)[i], ]), scale(as.numeric(ucec_scores$KSEA[sub("x.*","",rownames(ucec_act_siteVprot)[i]), colnames(ucec_phos_kins1)]))) + ucec_act_siteVprot[i, 2] <- ucec_act_site_ksea_corr[[rownames(ucec_act_siteVprot)[i]]]$r[1,2] + if(ucec_act_site_ksea_corr[[rownames(ucec_act_siteVprot)[i]]]$P[1,2] < 0.05){ + ucec_act_siteVprot[i, 4] <- "red" + } + ucec_prot_ksea_corr[[rownames(ucec_act_siteVprot)[i]]] <- rcorr(as.numeric(ucec_prot_kins[sub("x.*","",rownames(ucec_act_siteVprot)[i]), ]), scale(as.numeric(ucec_scores$KSEA[sub("x.*","",rownames(ucec_act_siteVprot)[i]), colnames(ucec_prot_kins)]))) + ucec_act_siteVprot[i, 3] <- ucec_prot_ksea_corr[[rownames(ucec_act_siteVprot)[i]]]$r[1,2] + if(ucec_prot_ksea_corr[[rownames(ucec_act_siteVprot)[i]]]$P[1,2] < 0.05){ + ucec_act_siteVprot[i, 5] <- "blue" + } +} +colnames(ucec_act_siteVprot)[2:3] <- c("act-site_ksea_corr", "prot_ksea_corr") +ucec_act_siteVprot$diff <- ucec_act_siteVprot[,2] - ucec_act_siteVprot[,3] +plot(as.numeric(ucec_act_siteVprot[,1]), as.numeric(ucec_act_siteVprot[,6])) +abline(h=0) +``` + +```{r} +plot(as.numeric(ucec_act_siteVprot[,2]), as.numeric(ucec_act_siteVprot[,1]), col="red", xlim=c(-0.4, 1), ylim=c(-0.2,1), main="UCEC", pch=21, bg=ucec_act_siteVprot$V4) +par(new=T) +plot(as.numeric(ucec_act_siteVprot[,3]), as.numeric(ucec_act_siteVprot[,1]), col="blue", xlim=c(-0.4, 1), ylim=c(-0.2,1), pch=21, bg=ucec_act_siteVprot$V5) +for(i in 1:nrow(ucec_act_siteVprot)){ + segments(ucec_act_siteVprot[i,2], ucec_act_siteVprot[i,1], ucec_act_siteVprot[i,3], ucec_act_siteVprot[i,1]) +} +``` + +```{r} +brca_act_siteVprot2$V4[brca_act_siteVprot2$`act-site_ksea_corr` < 0] <- "white" +brca_act_siteVprot2$V5[brca_act_siteVprot2$prot_ksea_corr < 0] <- "white" +ccrcc_act_siteVprot2$V4[ccrcc_act_siteVprot2$`act-site_ksea_corr` < 0] <- "white" +ccrcc_act_siteVprot2$V5[ccrcc_act_siteVprot2$prot_ksea_corr < 0] <- "white" +coad_act_siteVprot2$V4[coad_act_siteVprot2$`act-site_ksea_corr` < 0] <- "white" +coad_act_siteVprot2$V5[coad_act_siteVprot2$prot_ksea_corr < 0] <- "white" +gbm_act_siteVprot2$V4[gbm_act_siteVprot2$`act-site_ksea_corr` < 0] <- "white" +gbm_act_siteVprot2$V5[gbm_act_siteVprot2$prot_ksea_corr < 0] <- "white" +hnscc_act_siteVprot2$V4[hnscc_act_siteVprot2$`act-site_ksea_corr` < 0] <- "white" +hnscc_act_siteVprot2$V5[hnscc_act_siteVprot2$prot_ksea_corr < 0] <- "white" +lscc_act_siteVprot2$V4[lscc_act_siteVprot2$`act-site_ksea_corr` < 0] <- "white" +lscc_act_siteVprot2$V5[lscc_act_siteVprot2$prot_ksea_corr < 0] <- "white" +luad_act_siteVprot2$V4[luad_act_siteVprot2$`act-site_ksea_corr` < 0] <- "white" +luad_act_siteVprot2$V5[luad_act_siteVprot2$prot_ksea_corr < 0] <- "white" +ov_act_siteVprot2$V4[ov_act_siteVprot2$`act-site_ksea_corr` < 0] <- "white" +ov_act_siteVprot2$V5[ov_act_siteVprot2$prot_ksea_corr < 0] <- "white" +pdac_act_siteVprot2$V4[pdac_act_siteVprot2$`act-site_ksea_corr` < 0] <- "white" +pdac_act_siteVprot2$V5[pdac_act_siteVprot2$prot_ksea_corr < 0] <- "white" +ucec_act_siteVprot2$V4[ucec_act_siteVprot2$`act-site_ksea_corr` < 0] <- "white" +ucec_act_siteVprot2$V5[ucec_act_siteVprot2$prot_ksea_corr < 0] <- "white" +brca_act_siteVprot2$color <- brca_act_siteVprot2$V4 +ccrcc_act_siteVprot2$color <- ccrcc_act_siteVprot2$V4 +coad_act_siteVprot2$color <- coad_act_siteVprot2$V4 +gbm_act_siteVprot2$color <- gbm_act_siteVprot2$V4 +hnscc_act_siteVprot2$color <- hnscc_act_siteVprot2$V4 +lscc_act_siteVprot2$color <- lscc_act_siteVprot2$V4 +luad_act_siteVprot2$color <- luad_act_siteVprot2$V4 +ov_act_siteVprot2$color <- ov_act_siteVprot2$V4 +pdac_act_siteVprot2$color <- pdac_act_siteVprot2$V4 +ucec_act_siteVprot2$color <- ucec_act_siteVprot2$V4 +for(i in 1:nrow(brca_act_siteVprot2)){ + if(brca_act_siteVprot2$V5[i]=="blue"){ + if(brca_act_siteVprot2$V4[i]=="red"){ + brca_act_siteVprot2$color[i] <- "purple" + } else { + brca_act_siteVprot2$color[i] <- "blue" + } + } +} +for(i in 1:nrow(ccrcc_act_siteVprot2)){ + if(ccrcc_act_siteVprot2$V5[i]=="blue"){ + if(ccrcc_act_siteVprot2$V4[i]=="red"){ + ccrcc_act_siteVprot2$color[i] <- "purple" + } else { + ccrcc_act_siteVprot2$color[i] <- "blue" + } + } +} +for(i in 1:nrow(coad_act_siteVprot2)){ + if(coad_act_siteVprot2$V5[i]=="blue"){ + if(coad_act_siteVprot2$V4[i]=="red"){ + coad_act_siteVprot2$color[i] <- "purple" + } else { + coad_act_siteVprot2$color[i] <- "blue" + } + } +} +for(i in 1:nrow(gbm_act_siteVprot2)){ + if(gbm_act_siteVprot2$V5[i]=="blue"){ + if(gbm_act_siteVprot2$V4[i]=="red"){ + gbm_act_siteVprot2$color[i] <- "purple" + } else { + gbm_act_siteVprot2$color[i] <- "blue" + } + } +} +for(i in 1:nrow(hnscc_act_siteVprot2)){ + if(hnscc_act_siteVprot2$V5[i]=="blue"){ + if(hnscc_act_siteVprot2$V4[i]=="red"){ + hnscc_act_siteVprot2$color[i] <- "purple" + } else { + hnscc_act_siteVprot2$color[i] <- "blue" + } + } +} +for(i in 1:nrow(lscc_act_siteVprot2)){ + if(lscc_act_siteVprot2$V5[i]=="blue"){ + if(lscc_act_siteVprot2$V4[i]=="red"){ + lscc_act_siteVprot2$color[i] <- "purple" + } else { + lscc_act_siteVprot2$color[i] <- "blue" + } + } +} +for(i in 1:nrow(luad_act_siteVprot2)){ + if(luad_act_siteVprot2$V5[i]=="blue"){ + if(luad_act_siteVprot2$V4[i]=="red"){ + luad_act_siteVprot2$color[i] <- "purple" + } else { + luad_act_siteVprot2$color[i] <- "blue" + } + } +} +for(i in 1:nrow(ov_act_siteVprot2)){ + if(ov_act_siteVprot2$V5[i]=="blue"){ + if(ov_act_siteVprot2$V4[i]=="red"){ + ov_act_siteVprot2$color[i] <- "purple" + } else { + ov_act_siteVprot2$color[i] <- "blue" + } + } +} +for(i in 1:nrow(pdac_act_siteVprot2)){ + if(pdac_act_siteVprot2$V5[i]=="blue"){ + if(pdac_act_siteVprot2$V4[i]=="red"){ + pdac_act_siteVprot2$color[i] <- "purple" + } else { + pdac_act_siteVprot2$color[i] <- "blue" + } + } +} +for(i in 1:nrow(ucec_act_siteVprot2)){ + if(ucec_act_siteVprot2$V5[i]=="blue"){ + if(ucec_act_siteVprot2$V4[i]=="red"){ + ucec_act_siteVprot2$color[i] <- "purple" + } else { + ucec_act_siteVprot2$color[i] <- "blue" + } + } +} +``` + +```{r} +plot(c(as.numeric(brca_act_siteVprot2[,1]), as.numeric(ccrcc_act_siteVprot2[,1]), as.numeric(coad_act_siteVprot2[,1]), as.numeric(gbm_act_siteVprot2[,1]), as.numeric(hnscc_act_siteVprot2[,1]), as.numeric(lscc_act_siteVprot2[,1]), as.numeric(luad_act_siteVprot2[,1]), as.numeric(ov_act_siteVprot2[,1]), as.numeric(pdac_act_siteVprot2[,1]),as.numeric(ucec_act_siteVprot2[,1])), c(as.numeric(brca_act_siteVprot2[,6]), as.numeric(ccrcc_act_siteVprot2[,6]), as.numeric(coad_act_siteVprot2[,6]), as.numeric(gbm_act_siteVprot2[,6]), as.numeric(hnscc_act_siteVprot2[,6]), as.numeric(lscc_act_siteVprot2[,6]), as.numeric(luad_act_siteVprot2[,6]), as.numeric(ov_act_siteVprot2[,6]), as.numeric(pdac_act_siteVprot2[,6]),as.numeric(ucec_act_siteVprot2[,6])), bg=c(brca_act_siteVprot2$color, ccrcc_act_siteVprot2$color, coad_act_siteVprot2$color, gbm_act_siteVprot2$color, hnscc_act_siteVprot2$color, lscc_act_siteVprot2$color, luad_act_siteVprot2$color, ov_act_siteVprot2$color, pdac_act_siteVprot2$color, ucec_act_siteVprot2$color), pch=21, xlab="correlation of activating site with host protein", ylab="difference in correlation with KSEA (activating site - protein", ylim=c(-1,1)) +abline(h=0) +``` + +```{r} +plot(c(as.numeric(brca_act_siteVprot2[,1]), as.numeric(ccrcc_act_siteVprot2[,1]), as.numeric(gbm_act_siteVprot2[,1]), as.numeric(hnscc_act_siteVprot2[,1]), as.numeric(lscc_act_siteVprot2[,1]), as.numeric(luad_act_siteVprot2[,1]), as.numeric(ucec_act_siteVprot2[,1])), c(as.numeric(brca_act_siteVprot2[,6]), as.numeric(ccrcc_act_siteVprot2[,6]), as.numeric(gbm_act_siteVprot2[,6]), as.numeric(hnscc_act_siteVprot2[,6]), as.numeric(lscc_act_siteVprot2[,6]), as.numeric(luad_act_siteVprot2[,6]), as.numeric(ucec_act_siteVprot2[,6])), bg=c(brca_act_siteVprot2$color, ccrcc_act_siteVprot2$color, gbm_act_siteVprot2$color, hnscc_act_siteVprot2$color, lscc_act_siteVprot2$color, luad_act_siteVprot2$color, ucec_act_siteVprot2$color), pch=21, xlab="correlation of activating site with host protein", ylab="difference in correlation with KSEA (activating site - protein") +abline(h=0) +``` + +```{r} +plot(c(brca_act_siteVprot2$`act-site_ksea_corr`, ccrcc_act_siteVprot2$`act-site_ksea_corr`, gbm_act_siteVprot2$`act-site_ksea_corr`, hnscc_act_siteVprot2$`act-site_ksea_corr`, lscc_act_siteVprot2$`act-site_ksea_corr`, luad_act_siteVprot2$`act-site_ksea_corr`, ucec_act_siteVprot2$`act-site_ksea_corr`), c(brca_act_siteVprot2$prot_ksea_corr, ccrcc_act_siteVprot2$prot_ksea_corr, gbm_act_siteVprot2$prot_ksea_corr, hnscc_act_siteVprot2$prot_ksea_corr, lscc_act_siteVprot2$prot_ksea_corr, luad_act_siteVprot2$prot_ksea_corr, ucec_act_siteVprot2$prot_ksea_corr)) +boxplot(c(brca_act_siteVprot2$`act-site_ksea_corr`, ccrcc_act_siteVprot2$`act-site_ksea_corr`, gbm_act_siteVprot2$`act-site_ksea_corr`, hnscc_act_siteVprot2$`act-site_ksea_corr`, lscc_act_siteVprot2$`act-site_ksea_corr`, luad_act_siteVprot2$`act-site_ksea_corr`, ucec_act_siteVprot2$`act-site_ksea_corr`), c(brca_act_siteVprot2$prot_ksea_corr, ccrcc_act_siteVprot2$prot_ksea_corr, gbm_act_siteVprot2$prot_ksea_corr, hnscc_act_siteVprot2$prot_ksea_corr, lscc_act_siteVprot2$prot_ksea_corr, luad_act_siteVprot2$prot_ksea_corr, ucec_act_siteVprot2$prot_ksea_corr), names = c("activating site-KSEA corr", "protein-KSEA corr"), main="Wilcoxon paired, one-tailed p=0.029") +wilcox.test(c(brca_act_siteVprot2$`act-site_ksea_corr`, ccrcc_act_siteVprot2$`act-site_ksea_corr`, gbm_act_siteVprot2$`act-site_ksea_corr`, hnscc_act_siteVprot2$`act-site_ksea_corr`, lscc_act_siteVprot2$`act-site_ksea_corr`, luad_act_siteVprot2$`act-site_ksea_corr`, ucec_act_siteVprot2$`act-site_ksea_corr`), c(brca_act_siteVprot2$prot_ksea_corr, ccrcc_act_siteVprot2$prot_ksea_corr, gbm_act_siteVprot2$prot_ksea_corr, hnscc_act_siteVprot2$prot_ksea_corr, lscc_act_siteVprot2$prot_ksea_corr, luad_act_siteVprot2$prot_ksea_corr, ucec_act_siteVprot2$prot_ksea_corr), alternative = "greater", paired = T) +wilcox.test(c(brca_act_siteVprot2$`act-site_ksea_corr`, ccrcc_act_siteVprot2$`act-site_ksea_corr`, gbm_act_siteVprot2$`act-site_ksea_corr`, hnscc_act_siteVprot2$`act-site_ksea_corr`, lscc_act_siteVprot2$`act-site_ksea_corr`, luad_act_siteVprot2$`act-site_ksea_corr`, ucec_act_siteVprot2$`act-site_ksea_corr`), c(brca_act_siteVprot2$prot_ksea_corr, ccrcc_act_siteVprot2$prot_ksea_corr, gbm_act_siteVprot2$prot_ksea_corr, hnscc_act_siteVprot2$prot_ksea_corr, lscc_act_siteVprot2$prot_ksea_corr, luad_act_siteVprot2$prot_ksea_corr, ucec_act_siteVprot2$prot_ksea_corr), alternative = "greater", paired = T) +``` + +```{r} +wilcox.test(c(brca_act_siteVprot2$`act-site_ksea_corr`[brca_act_siteVprot2$`site-prot_corr` <= 0.2], ccrcc_act_siteVprot2$`act-site_ksea_corr`[ccrcc_act_siteVprot2$`site-prot_corr` <= 0.2], gbm_act_siteVprot2$`act-site_ksea_corr`[gbm_act_siteVprot2$`site-prot_corr` <= 0.2], hnscc_act_siteVprot2$`act-site_ksea_corr`[hnscc_act_siteVprot2$`site-prot_corr` <= 0.2], lscc_act_siteVprot2$`act-site_ksea_corr`[lscc_act_siteVprot2$`site-prot_corr` <= 0.2], luad_act_siteVprot2$`act-site_ksea_corr`[luad_act_siteVprot2$`site-prot_corr` <= 0.2], ucec_act_siteVprot2$`act-site_ksea_corr`[ucec_act_siteVprot2$`site-prot_corr` <= 0.2]), c(brca_act_siteVprot2$prot_ksea_corr[brca_act_siteVprot2$`site-prot_corr` <= 0.2], ccrcc_act_siteVprot2$prot_ksea_corr[ccrcc_act_siteVprot2$`site-prot_corr` <= 0.2], gbm_act_siteVprot2$prot_ksea_corr[gbm_act_siteVprot2$`site-prot_corr` <= 0.2], hnscc_act_siteVprot2$prot_ksea_corr[hnscc_act_siteVprot2$`site-prot_corr` <= 0.2], lscc_act_siteVprot2$prot_ksea_corr[lscc_act_siteVprot2$`site-prot_corr` <= 0.2], luad_act_siteVprot2$prot_ksea_corr[luad_act_siteVprot2$`site-prot_corr` <= 0.2], ucec_act_siteVprot2$prot_ksea_corr[ucec_act_siteVprot2$`site-prot_corr` <= 0.2]), alternative = "greater", paired = T) +boxplot(c(brca_act_siteVprot2$`act-site_ksea_corr`[brca_act_siteVprot2$`site-prot_corr` <= 0.2], ccrcc_act_siteVprot2$`act-site_ksea_corr`[ccrcc_act_siteVprot2$`site-prot_corr` <= 0.2], gbm_act_siteVprot2$`act-site_ksea_corr`[gbm_act_siteVprot2$`site-prot_corr` <= 0.2], hnscc_act_siteVprot2$`act-site_ksea_corr`[hnscc_act_siteVprot2$`site-prot_corr` <= 0.2], lscc_act_siteVprot2$`act-site_ksea_corr`[lscc_act_siteVprot2$`site-prot_corr` <= 0.2], luad_act_siteVprot2$`act-site_ksea_corr`[luad_act_siteVprot2$`site-prot_corr` <= 0.2], ucec_act_siteVprot2$`act-site_ksea_corr`[ucec_act_siteVprot2$`site-prot_corr` <= 0.2]), c(brca_act_siteVprot2$prot_ksea_corr[brca_act_siteVprot2$`site-prot_corr` <= 0.2], ccrcc_act_siteVprot2$prot_ksea_corr[ccrcc_act_siteVprot2$`site-prot_corr` <= 0.2], gbm_act_siteVprot2$prot_ksea_corr[gbm_act_siteVprot2$`site-prot_corr` <= 0.2], hnscc_act_siteVprot2$prot_ksea_corr[hnscc_act_siteVprot2$`site-prot_corr` <= 0.2], lscc_act_siteVprot2$prot_ksea_corr[lscc_act_siteVprot2$`site-prot_corr` <= 0.2], luad_act_siteVprot2$prot_ksea_corr[luad_act_siteVprot2$`site-prot_corr` <= 0.2], ucec_act_siteVprot2$prot_ksea_corr[ucec_act_siteVprot2$`site-prot_corr` <= 0.2]), names = c("activating site-KSEA corr", "protein-KSEA corr"), main="Wilcoxon paired, one-tailed p=0.034") + +``` + +```{r} +all_act_siteVprot2_corr <- rbind(brca_act_siteVprot, ccrcc_act_siteVprot, coad_act_siteVprot, gbm_act_siteVprot, hnscc_act_siteVprot, lscc_act_siteVprot, luad_act_siteVprot, ov_act_siteVprot, pdac_act_siteVprot, ucec_act_siteVprot) +table(c(brca_act_siteVprot$color, ccrcc_act_siteVprot$color, coad_act_siteVprot$color, gbm_act_siteVprot$color, hnscc_act_siteVprot$color, lscc_act_siteVprot$color, luad_act_siteVprot$color, ov_act_siteVprot$color, pdac_act_siteVprot$color, ucec_act_siteVprot$color)) +table(all_act_siteVprot_corr$color[all_act_siteVprot_corr$`site-prot_corr` <= 0]) +table(all_act_siteVprot_corr$color[all_act_siteVprot_corr$`site-prot_corr` <= 0.2 & all_act_siteVprot_corr$`site-prot_corr` > 0]) +table(all_act_siteVprot_corr$color[all_act_siteVprot_corr$`site-prot_corr` <= 0.4 & all_act_siteVprot_corr$`site-prot_corr` > 0.2]) +table(all_act_siteVprot_corr$color[all_act_siteVprot_corr$`site-prot_corr` <= 0.6 & all_act_siteVprot_corr$`site-prot_corr` > 0.4]) +table(all_act_siteVprot_corr$color[all_act_siteVprot_corr$`site-prot_corr` > 0.6]) +``` + + +```{r} +all_act_siteVprot_corr <- rbind(brca_act_siteVprot, ccrcc_act_siteVprot, gbm_act_siteVprot, hnscc_act_siteVprot, lscc_act_siteVprot, luad_act_siteVprot, ucec_act_siteVprot) +table(c(brca_act_siteVprot$color, ccrcc_act_siteVprot$color, gbm_act_siteVprot$color, hnscc_act_siteVprot$color, lscc_act_siteVprot$color, luad_act_siteVprot$color, ucec_act_siteVprot$color)) +table(all_act_siteVprot_corr$color[all_act_siteVprot_corr$`site-prot_corr` <= 0]) +table(all_act_siteVprot_corr$color[all_act_siteVprot_corr$`site-prot_corr` <= 0.2 & all_act_siteVprot_corr$`site-prot_corr` > 0]) +table(all_act_siteVprot_corr$color[all_act_siteVprot_corr$`site-prot_corr` <= 0.4 & all_act_siteVprot_corr$`site-prot_corr` > 0.2]) +table(all_act_siteVprot_corr$color[all_act_siteVprot_corr$`site-prot_corr` <= 0.6 & all_act_siteVprot_corr$`site-prot_corr` > 0.4]) +table(all_act_siteVprot_corr$color[all_act_siteVprot_corr$`site-prot_corr` > 0.6]) +``` \ No newline at end of file diff --git a/workflow/scripts/tumor_based_benchmark/outlieR.R b/workflow/scripts/tumor_based_benchmark/outlieR.R new file mode 100644 index 0000000..770f393 --- /dev/null +++ b/workflow/scripts/tumor_based_benchmark/outlieR.R @@ -0,0 +1,84 @@ +outlieR <- function(input_df, ref_samples, test_samples, scaling_sample_set1=NA, scaling_sample_set2=NA, mfg=NA, genesAsRownames=T, gene_column=NULL, z_thresh=2.3265, z_method="normal", scaling_factor=1){ + if(genesAsRownames==F){ + rownames(input_df) <- input_df[ , gene_column] + } + ref_samples <- intersect(ref_samples, colnames(input_df)) + if(!is.na(mfg)){ + if(sum(!is.na(input_df[mfg, ref_samples])) > 2){ + input_df <- input_df[mfg, , drop=F] + } else { + warning('not enough reference data for mfg') + return(NA) + } + } + + #make dataframe containing just values for reference samples + ref_df <- input_df[ , ref_samples, drop=F] + + test_samples <- intersect(test_samples, colnames(input_df)) + test_df <- as.matrix(input_df[ , test_samples, drop=F]) + + if(is.na(scaling_sample_set1)){ + scaling_samples <- ref_samples + scaling_df <- ref_df + scaling_sets <- 1 + } else { + if(is.na(scaling_sample_set2)){ + scaling_samples <- intersect(scaling_sample_set1, colnames(input_df)) + scaling_df <- input_df[ , scaling_samples] + scaling_sets <- 1 + } else { + scaling_sample_set1 <- intersect(scaling_sample_set1, colnames(input_df)) + scaling_sample_set2 <- intersect(scaling_sample_set2, colnames(input_df)) + scaling_samples <- c(scaling_sample_set1, scaling_sample_set2) + scaling_df <- input_df[ , scaling_samples] + scaling_sets <- 2 + } + } + + #need data for at least 3 samples in this dataframe to properly define a distribution; drop genes that don't have at least 3 reference measurements + ref_df <- ref_df[rowSums(!is.na(ref_df))>2, , drop=F] + scaling_df <- scaling_df[rownames(ref_df), , drop=F] + input_df <- input_df[rownames(ref_df), , drop=F] + test_df <- test_df[rownames(ref_df), , drop=F] + + #if method chosen is normal, calculate Z-scores based off of mean and SD and run Wilk-Shapiro test on reference data frame to test if the distribution is actually normal (normal_distribution = 1) + if(z_method=="normal"){ + ref_df$mean <- rowMeans(ref_df, na.rm = T) + ref_df <- transform(as.data.frame(ref_df), SD=apply(scaling_df, 1, sd, na.rm = TRUE)) + normality_test <- apply(ref_df[ , 1:(ncol(ref_df)-1)], 1, shapiro.test) + ref_df$normality_test_pval <- 0 + for(i in 1:nrow(ref_df)){ + ref_df$normality_test_pval[i] <- normality_test[[rownames(ref_df)[i]]]$p.value + } + #test_df <- as.matrix(input_df[ , test_samples]) + #rownames(test_df) <- rownames(input_df) + outlier_Zscores <- (test_df - ref_df$mean)/(ref_df$SD) + outlier_Zscores <- as.data.frame(outlier_Zscores) + outlier_Zscores$normal_distribution <- 0 + outlier_Zscores$normal_distribution <- as.numeric(ref_df$normality_test_pval >= 0.05) + #if method chosen is medMAD, calculate Z-scores based off of median and MAD + } else if(z_method=="medMAD"){ + ref_df <- transform(as.data.frame(ref_df), median=apply(ref_df, 1, median, na.rm = TRUE)) + if(scaling_sets==1){ + ref_df <- transform(as.data.frame(ref_df), MAD=apply(scaling_df, 1, mad, na.rm = TRUE)) + } else { + med_s1 <- apply(scaling_df[ , scaling_sample_set1], 1, median, na.rm = TRUE) + med_s2 <- apply(scaling_df[ , scaling_sample_set2], 1, median, na.rm = TRUE) + med_devs1 <- scaling_df[ , scaling_sample_set1] - med_s1 + med_devs2 <- scaling_df[ , scaling_sample_set2] - med_s2 + ref_df <- transform(as.data.frame(ref_df), MAD=apply(cbind(med_devs1, med_devs2), 1, median, na.rm = TRUE)) + } + #test_df <- as.matrix(input_df[ , intersect(test_samples, colnames(input_df))]) + #rownames(test_df) <- rownames(input_df) + outlier_Zscores <- (test_df - ref_df$median)/(ref_df$MAD * scaling_factor) + outlier_Zscores <- as.data.frame(outlier_Zscores) + } + + #determine number of high and low outliers (with Zscores above or below z_thresh, respectively) + #outlier_Zscores <- as.data.frame(outlier_Zscores) + outlier_Zscores$Number_of_high_outliers <- rowSums(outlier_Zscores[ , test_samples] > z_thresh, na.rm = T) + outlier_Zscores$Number_of_low_outliers <- rowSums(outlier_Zscores[ , test_samples] < -z_thresh, na.rm = T) + + return(outlier_Zscores) +} diff --git a/workflow/scripts/tumor_based_benchmark/outlieR_plus.R b/workflow/scripts/tumor_based_benchmark/outlieR_plus.R new file mode 100644 index 0000000..dd2260d --- /dev/null +++ b/workflow/scripts/tumor_based_benchmark/outlieR_plus.R @@ -0,0 +1,138 @@ +outlieR_plus <- function(input_df, reference_samples, testing_samples, z_thresh="default", z_method=NA, type="outlieR", scaling_factor=1.48, quant=0.95, genesAsRownames=T, gene_column=NULL, ...){ + OP_output <- list() + if(!type %in% c("outlieR","COPA","OS","ORA","Tstat")){ + stop("type must be set to valid method: outlieR(default), COPA, OS, ORA, Tstat") + } + reference_samples <- intersect(reference_samples, colnames(input_df)) + testing_samples <- intersect(testing_samples, colnames(input_df)) + all_samples <- union(reference_samples, testing_samples) + if(genesAsRownames==F){ + rownames(input_df) <- input_df[ , gene_column] + } + input_df <- input_df[ , all_samples] + if(is.na(z_method)){ + if(type!="Tstat"){ + z_method <- "medMAD" + } else { + z_method <- "normal" + } + } + if(z_method=="normal"){ + scaling_factor <- 1 + } + if(z_thresh=="default"){ + if(type %in% c("outlieR","Tstat","COPA")){ + z_thresh <- 2.3265 + } else { + z_thresh <- 2.3265 * 1.48 + } + } + if(type=="outlieR"){ + # first calculate Z-scores for Z-score matrix + OP_output[["Zscores"]] <- outlieR(input_df = input_df, ref_samples = reference_samples, test_samples = all_samples, z_method = z_method, z_thresh = z_thresh, scaling_factor=scaling_factor, ...) + } else if(type=="Tstat"){ + OP_output[["Zscores"]] <- outlieR(input_df = input_df, ref_samples = all_samples, test_samples = all_samples, z_method = z_method, z_thresh = z_thresh, scaling_factor = scaling_factor, ...) + } else if(type=="COPA"){ + OP_output[["Zscores"]] <- outlieR(input_df = input_df, ref_samples = all_samples, test_samples = all_samples, z_method = z_method, z_thresh = z_thresh, ...) + } else if(type=="OS"){ + OP_output[["Zscores"]] <- outlieR(input_df = input_df, ref_samples = all_samples, scaling_sample_set1 = reference_samples, test_samples = all_samples, z_method = z_method, z_thresh = z_thresh, ...) + } else if(type=="ORA"){ + OP_output[["Zscores"]] <- outlieR(input_df = input_df, ref_samples = all_samples, test_samples = all_samples, z_method = z_method, z_thresh = z_thresh, ...) + } + + # now identify outliers and generate outlier matrix (1 if outlier, 0 if not) + OP_output[["outliers"]] <- as.data.frame(matrix(NA, nrow = nrow(input_df), ncol = length(all_samples), dimnames = list(rownames(input_df), all_samples))) + if(type=="outlieR"|type=="Tstat"){ + OP_output[["outliers"]] <- OP_output[["Zscores"]][ , all_samples] > z_thresh + class(OP_output[["outliers"]]) <- "numeric" + OP_output[["outliers"]] <- OP_output[["outliers"]] - (OP_output[["Zscores"]][ , all_samples] < -z_thresh) + } else if(type=="COPA"){ + for(i in 1:nrow(OP_output[["outliers"]])){ + thresh1 <- quantile(OP_output[["Zscores"]][i, testing_samples], quant, na.rm = T) + thresh2 <- quantile(OP_output[["Zscores"]][i, testing_samples], 1-quant, na.rm = T) + OP_output[["outliers"]][i, ] <- as.numeric(OP_output[["Zscores"]][i, all_samples] > thresh1) + OP_output[["outliers"]][i, ] <- OP_output[["outliers"]][i, ] - as.numeric(OP_output[["Zscores"]][i, all_samples] < thresh2) + } + } else if(type=="OS"){ + a <- 1 + for(i in 1:nrow(OP_output[["outliers"]])){ + Q1 <- as.numeric(quantile(OP_output[["Zscores"]][i, all_samples], 0.25, na.rm = T)) + Q3 <- as.numeric(quantile(OP_output[["Zscores"]][i, all_samples], 0.75, na.rm = T)) + iqr <- Q3 - Q1 + thresh1 <- (a * iqr) + Q3 + thresh2 <- Q1 - (a * iqr) + OP_output[["outliers"]][i, ] <- as.numeric(OP_output[["Zscores"]][i, all_samples] > thresh1) + OP_output[["outliers"]][i, ] <- OP_output[["outliers"]][i, ] - as.numeric(OP_output[["Zscores"]][i, all_samples] < thresh2) + } + } else if(type=="ORA"){ + a <- 1.5 + for(i in 1:nrow(OP_output[["outliers"]])){ + Q1 <- as.numeric(quantile(input_df[rownames(OP_output[["outliers"]])[i], all_samples], 0.25, na.rm = T)) + Q3 <- as.numeric(quantile(input_df[rownames(OP_output[["outliers"]])[i], all_samples], 0.75, na.rm = T)) + iqr <- Q3 - Q1 + thresh1 <- (a * iqr) + Q3 + thresh2 <- Q1 - (a * iqr) + OP_output[["outliers"]][i, ] <- as.numeric(input_df[rownames(OP_output[["outliers"]])[i], all_samples] > thresh1) + OP_output[["outliers"]][i, ] <- OP_output[["outliers"]][i, ] - as.numeric(input_df[rownames(OP_output[["outliers"]])[i], all_samples] < thresh2) + } + } + + OP_output[["DEscores"]] <- as.data.frame(matrix(NA, ncol = 2, nrow = nrow(OP_output[["outliers"]]), dimnames = list(rownames(OP_output[["outliers"]]), c("PositiveOutlierScores","NegativeOutlierScores")))) + if(type=="outlieR"){ + OP_output[["DEscores"]]$PositiveOutlierScores <- rowSums(OP_output[["outliers"]][ , testing_samples]==1, na.rm = T)/rowSums(!is.na(OP_output[["outliers"]][ , testing_samples])) - rowSums(OP_output[["outliers"]][ , reference_samples]==1, na.rm = T)/rowSums(!is.na(OP_output[["outliers"]][ , reference_samples])) + OP_output[["DEscores"]]$NegativeOutlierScores <- rowSums(OP_output[["outliers"]][ , testing_samples]==(-1), na.rm = T)/rowSums(!is.na(OP_output[["outliers"]][ , testing_samples])) - rowSums(OP_output[["outliers"]][ , reference_samples]==(-1), na.rm = T)/rowSums(!is.na(OP_output[["outliers"]][ , reference_samples])) + } else if(type=="OS"){ + Zout <- OP_output[["Zscores"]][ , colnames(OP_output[["outliers"]])] * as.numeric(OP_output[["outliers"]]==1) + OP_output[["DEscores"]]$PositiveOutlierScores <- rowSums(Zout[ , testing_samples], na.rm = T)/rowSums(!is.na(OP_output[["outliers"]][ , testing_samples])) + Zout <- OP_output[["Zscores"]][ , colnames(OP_output[["outliers"]])] * as.numeric(OP_output[["outliers"]]==(-1)) + OP_output[["DEscores"]]$NegativeOutlierScores <- rowSums(Zout[ , testing_samples], na.rm = T)/rowSums(!is.na(OP_output[["outliers"]][ , testing_samples])) + } else if(type=="Tstat"){ + if(z_method=="normal"){ + Xm <- rowMeans(input_df[, reference_samples], na.rm = T) + Ym <- rowMeans(input_df[ , testing_samples], na.rm = T) + Xv <- apply(input_df[ , reference_samples], 1, function(x) sum((x-mean(x, na.rm=T))^2)) + Yv <- apply(input_df[ , testing_samples], 1, function(x) sum((x-mean(x, na.rm=T))^2)) + Ns <- rowSums(!is.na(input_df)) - 2 + OP_output[["DEscores"]]$PositiveOutlierScores <- (Ym - Xm)/sqrt((Xv + Yv)/(Ns + 0.0000000001) + 0.0000000001) + } else { + Xm <- apply(input_df[ , reference_samples], 1, median, na.rm = T) + Ym <- apply(input_df[ , testing_samples], 1, median, na.rm = T) + Xv <- apply(input_df[ , reference_samples], 1, function(x) abs(x-median(x, na.rm=T))) + Yv <- apply(input_df[ , testing_samples], 1, function(x) abs(x-median(x, na.rm=T))) + mads <- apply(cbind(t(Xv), t(Yv)), 1, median, na.rm=T) + OP_output[["DEscores"]]$PositiveOutlierScores <- (Ym - Xm)/(mads + 0.0000000001) + } + OP_output[["DEscores"]]$NegativeOutlierScores <- OP_output[["DEscores"]]$PositiveOutlierScores + } else if(type=="ORA"){ + nO <- cbind(rowSums(OP_output[["outliers"]][ , testing_samples]==1), rowSums(OP_output[["outliers"]]==1)) + nO[nO[ , 1]==0, 1] <- 1 + OP_output[["DEscores"]]$PositiveOutlierScores <- apply(nO, 1, function(x) phyper(x[1] - 1, length(testing_samples), length(all_samples) - length(testing_samples), x[2], lower.tail = F)) + OP_output[["DEscores"]]$PositiveOutlierScores <- -log10(OP_output[["DEscores"]]$PositiveOutlierScores) + OP_output[["DEscores"]]$PositiveOutlierScores[nO[ , 2]==0] <- 0 + nO <- cbind(rowSums(OP_output[["outliers"]][ , testing_samples] == -1), rowSums(OP_output[["outliers"]] == -1)) + nO[nO[ , 1]==0, 1] <- 1 + OP_output[["DEscores"]]$NegativeOutlierScores <- apply(nO, 1, function(x) phyper(x[1] - 1, length(testing_samples), length(all_samples) - length(testing_samples), x[2], lower.tail = F)) + OP_output[["DEscores"]]$NegativeOutlierScores <- -log10(OP_output[["DEscores"]]$NegativeOutlierScores) + OP_output[["DEscores"]]$NegativeOutlierScores[nO[ , 2]==0] <- 0 + } else if(type=="COPA"){ + OP_output[["DEscores"]]$PositiveOutlierScores <- apply(OP_output[["Zscores"]][ , testing_samples], 1, quantile, quant) + OP_output[["DEscores"]]$NegativeOutlierScores <- apply(OP_output[["Zscores"]][ , testing_samples], 1, quantile, 1-quant) + } + #direction indicates whether or not the DE score for a given method is positive or negative for negative outliers + if(type=="outlieR"|type=="ORA"){ + direction <- "pos" + } else { + direction <- "neg" + } + OP_output[["DEscores"]]$best <- OP_output[["DEscores"]]$PositiveOutlierScores + #if direction is negative, set the best outlier score to the negative outlier score when the magnitude of this score is greater than the positive outlier score + if(direction=="neg"){ + OP_output[["DEscores"]]$best[!is.na( OP_output[["DEscores"]]$best) & (-OP_output[["DEscores"]]$NegativeOutlierScores > OP_output[["DEscores"]]$PositiveOutlierScores)] <- OP_output[["DEscores"]]$NegativeOutlierScores[!is.na( OP_output[["DEscores"]]$best) & ((-OP_output[["DEscores"]]$NegativeOutlierScores) > OP_output[["DEscores"]]$PositiveOutlierScores)] + } else if(direction=="pos"){ + #if direction is positive, set the best outlier score to -the negative outlier score when this score is greater than the positive outlier score + OP_output[["DEscores"]]$best[!is.na( OP_output[["DEscores"]]$best) & (OP_output[["DEscores"]]$NegativeOutlierScores > OP_output[["DEscores"]]$PositiveOutlierScores)] <- -OP_output[["DEscores"]]$NegativeOutlierScores[!is.na( OP_output[["DEscores"]]$best) & ((OP_output[["DEscores"]]$NegativeOutlierScores) > OP_output[["DEscores"]]$PositiveOutlierScores)] + #negative outlier scores don't make sense for this best score calculation if the direction is positive (generated when outlier frequency is higher in control group; there are a few instances where both the positive outliers and negative outliers are negative >> set to 0 for best score) + OP_output[["DEscores"]]$best[!is.na( OP_output[["DEscores"]]$best) & (OP_output[["DEscores"]]$NegativeOutlierScores < 0) & (OP_output[["DEscores"]]$PositiveOutlierScores < 0)] <- 0 + } + return(OP_output) +} diff --git a/workflow/scripts/tumor_based_benchmark/plot_outlieR.R b/workflow/scripts/tumor_based_benchmark/plot_outlieR.R new file mode 100644 index 0000000..b383802 --- /dev/null +++ b/workflow/scripts/tumor_based_benchmark/plot_outlieR.R @@ -0,0 +1,51 @@ +plotOutlieR <- function(outlieR_Zscores, outlieRplus=F, input_df, genesAsRownames=T, gene_column=NULL, ref_samples, test_samples, mfg, bin_interval=1, Z_interval=1, scale_fit=1, font_size=1, ylimit="default", test_col="default", ref_col="default"){ + if(genesAsRownames==F){ + rownames(input_df) <- input_df[ , gene_column] + } + if(test_col=="default"){ + test_col <- rgb(0,139,0,120, maxColorValue = 255) + } + if(ref_col=="default"){ + ref_col <- rgb(210,105,30,160, maxColorValue = 255) + } + if(genesAsRownames==F){ + rownames(input_df) <- input_df[ , gene_column] + } + if(outlieRplus==F){ + outlieR_df <- outlieR_Zscores + } else { + outlieR_df <- outlieR_Zscores[[1]] + } + outlieR_df <- outlieR_df[mfg, intersect(colnames(outlieR_df),test_samples), drop=F] + ref_samples <- intersect(ref_samples, colnames(input_df)) + test_samples <- intersect(test_samples, colnames(input_df)) + ref_df <- as.matrix(input_df[mfg, ref_samples, drop=F]) + distr_min <- min(as.numeric(input_df[mfg, c(test_samples, ref_samples)]), na.rm = T) + distr_max <- max(as.numeric(input_df[mfg, c(test_samples, ref_samples)]), na.rm = T) + distr_diff <- distr_max - distr_min + breaks <- seq(floor(distr_min), ceiling(distr_max), by=bin_interval) + if(ylimit=="default"){ + x = hist(as.matrix(ref_df[mfg, ]), col=ref_col, xlim = c(distr_min - 0.2*distr_diff, distr_max + 0.2*distr_diff), xlab = mfg, breaks = breaks, main = "", cex.axis=font_size, cex.lab=font_size) + } else { + x = hist(as.matrix(ref_df[mfg, ]), col=ref_col, xlim = c(distr_min - 0.2*distr_diff, distr_max + 0.2*distr_diff), xlab = mfg, breaks = breaks, main = "", cex.axis=font_size, cex.lab=font_size, ylim = c(0, ylimit)) + } + xfit <- seq(distr_min - 0.2*distr_diff, distr_max + 0.2*distr_diff, length=50) + ref_mean <- mean(as.numeric(ref_df[mfg, ])) + ref_sd <- sd(as.numeric(ref_df[mfg, ])) + yfit <- dnorm(xfit, mean = ref_mean, sd = ref_sd) + yfit <- scale_fit * yfit * diff(x$mids[1:2]) * length(ref_samples) + lines(xfit, yfit, col="black", lwd=2) + if(ylimit=="default"){ + hist(as.matrix(input_df[mfg, test_samples]), col=test_col, xlim = c(distr_min - 0.2*distr_diff, distr_max + 0.2*distr_diff), breaks=breaks, add=T, cex.axis=font_size, cex.lab=font_size) + } else { + hist(as.matrix(input_df[mfg, test_samples]), col=test_col, xlim = c(distr_min - 0.2*distr_diff, distr_max + 0.2*distr_diff), breaks=breaks, add=T, cex.axis=font_size, cex.lab=font_size, ylim = c(0, ylimit)) + } + Z_min <- round(min(outlieR_df[mfg, ])) + Z_max <- round(max(outlieR_df[mfg, ])) + Z_set <- seq(Z_min, Z_max, by=Z_interval) + for(i in 1:length(Z_set)){ + abline(v=(ref_mean+ref_sd*Z_set[i]), col="red", lwd=2.5) + text(ref_mean+ref_sd*Z_set[i]-0.2*ref_sd, max(x$counts)-0.3, as.character(Z_set[i]), col = "red", cex = font_size) + } + text(distr_min - 0.15*distr_diff, max(x$counts)-0.1, "Z-score", col="red", cex = 1.2 * font_size) +}