From 3a7dee4e77de04f74d132d51b04fabb85773ecc5 Mon Sep 17 00:00:00 2001 From: Alex Bartlett Date: Mon, 22 Apr 2024 17:32:58 -0400 Subject: [PATCH 1/3] use test data, start adding emma code --- .../templates/rnaseq/skeleton/params_de.R | 8 +-- .../templates/rnaseq/skeleton/params_qc.R | 11 ++++ .../rnaseq/skeleton/reports/DEG/DEG.Rmd | 61 +++++++++++++------ .../skeleton/reports/DEG/run_markdown.R | 18 ++++-- 4 files changed, 69 insertions(+), 29 deletions(-) create mode 100644 inst/rmarkdown/templates/rnaseq/skeleton/params_qc.R diff --git a/inst/rmarkdown/templates/rnaseq/skeleton/params_de.R b/inst/rmarkdown/templates/rnaseq/skeleton/params_de.R index 591a8cc..8b77c89 100644 --- a/inst/rmarkdown/templates/rnaseq/skeleton/params_de.R +++ b/inst/rmarkdown/templates/rnaseq/skeleton/params_de.R @@ -8,8 +8,8 @@ analyst = 'person in the core' # project params root = "../" date = "YYYYMMDD" -column = "treatment" -subset_column = 'cell' -metadata_fn = "../meta/samplesheet.csv" -counts_fn = '../data/tximport-counts.csv' +column = "sample_type" +subset_column = NA +metadata_fn = "/n/data1/cores/bcbio/platform/test-data/rnaseq/GSE251845_colon_n3_tumor_normal/final/2024-04-18_samples/coldata.csv" +counts_fn = '/n/data1/cores/bcbio/platform/test-data/rnaseq/GSE251845_colon_n3_tumor_normal/final/2024-04-18_samples/counts/tximport-counts.csv' basedir <- 'reports' diff --git a/inst/rmarkdown/templates/rnaseq/skeleton/params_qc.R b/inst/rmarkdown/templates/rnaseq/skeleton/params_qc.R new file mode 100644 index 0000000..716e0b3 --- /dev/null +++ b/inst/rmarkdown/templates/rnaseq/skeleton/params_qc.R @@ -0,0 +1,11 @@ +# info params +project = "Interon_rnaseq_hbc04916" +PI = 'Donggi Paik, Ph.D.' +experiment = '8 human cell lines from multiple origins. Treatment are 9 conditions including untreated.' +aim = 'Determine treatment effect within cell line and in combination with TNF' +analyst = 'Lorena Pantano, PhD' + + +metadata_fn="../meta/hbc04916_metadata_bcbio.csv" +se_object="../counts/bcbio-se.rds" +cols_to_show=c("cell","treatment") diff --git a/inst/rmarkdown/templates/rnaseq/skeleton/reports/DEG/DEG.Rmd b/inst/rmarkdown/templates/rnaseq/skeleton/reports/DEG/DEG.Rmd index 6139615..6d6a998 100644 --- a/inst/rmarkdown/templates/rnaseq/skeleton/reports/DEG/DEG.Rmd +++ b/inst/rmarkdown/templates/rnaseq/skeleton/reports/DEG/DEG.Rmd @@ -17,17 +17,17 @@ output: editor_options: chunk_output_type: console params: - numerator: foo - denominator: bar - subset_value: nah - params_file: params_de.R + numerator: tumor + denominator: normal + subset_value: NA + params_file: ./inst/rmarkdown/templates/rnaseq/skeleton/params_de.R --- -```{r echo = F} +```{r load_params, echo = F} source(params$params_file) ``` -```{r, cache = FALSE, message = FALSE, warning=FALSE, echo=FALSE,} +```{r load_libraries, cache = FALSE, message = FALSE, warning=FALSE, echo=FALSE,} library(rtracklayer) library(DESeq2) library(tidyverse) @@ -39,6 +39,7 @@ library(msigdbr) library(fgsea) library(org.Hs.eg.db) library(knitr) +library(EnhancedVolcano) ggplot2::theme_set(theme_light(base_size = 14)) opts_chunk[["set"]]( @@ -55,7 +56,7 @@ opts_chunk[["set"]]( fig.height = 4) ``` -```{r sanitize-datatable} +```{r sanitize_datatable} sanitize_datatable = function(df, ...) { # remove dashes which cause wrapping DT::datatable(df, ..., rownames=gsub("-", "_", rownames(df)), @@ -72,7 +73,7 @@ sanitize_datatable = function(df, ...) { - Aim: `r aim` - Comparison: `r paste0(params$subset_value, ': ', params$numerator, ' vs. ', params$denominator)` -```{r create-filenames} +```{r create_filenames} filenames = str_interp("${params$subset_value}_${params$numerator}_vs_${params$denominator}") contrasts = c(column,params$numerator,params$denominator) @@ -88,12 +89,14 @@ name_pathways_fn=file.path(root, ``` -```{r read-counts-data} -coldata=read.csv(metadata_fn, row.names = 1) +```{r read_counts_data} +coldata=read.csv(metadata_fn) stopifnot(column %in% names(coldata)) # use only some samples, by default use all -coldata <- coldata[coldata[[paste(subset_column)]] == params$subset_value, ] +if (!is.na(subset_column)){ + coldata <- coldata[coldata[[paste(subset_column)]] == params$subset_value, ] +} coldata <- coldata[coldata[[paste(column)]] %in% c(params$numerator, params$denominator), ] rownames(coldata) <- coldata$description coldata[[column]] = relevel(as.factor(coldata[[column]]), params$denominator) @@ -115,7 +118,7 @@ rdata = AnnotationDbi::select(org.Hs.eg.db, rownames(counts), 'SYMBOL', 'ENSEMBL When performing differential expression analysis, it is important to ensure that any detected differences are truly a result of the experimental comparison being made and not any additional variability in the data. -```{r before-RUV} +```{r before_RUV} dds_before <- DESeqDataSetFromMatrix(counts, coldata, design = ~1) @@ -126,7 +129,7 @@ degPCA(assay(vsd_before), colData(vsd_before), ``` -```{r do-RUV} +```{r do_RUV} design <- coldata[[column]] names(design) <- coldata$name diffs <- makeGroups(design) @@ -144,7 +147,7 @@ formula <- as.formula(paste0("~ ", ) ``` -```{r after-RUV} +```{r after_RUV} ## Check if sample name matches stopifnot(all(names(counts) == rownames(new_cdata))) @@ -158,11 +161,21 @@ degPCA(ruvset$normalizedCounts, new_cdata, # Differential Expression -Because it is difficult to accurately detect and quantify the expression of lowly expressed genes, differences in their expression between treatment conditions can be unduly exaggerated. We correct for this so that gene LFC is not dependent overall on basal gene expression level. +Differential gene expression analysis of count data was performed using the Bioconductor R package, DESeq2, which fits the count data to a negative binomial model. + +Before fitting the model, we often look at a metric called dispersion, which is a measure for variance which also takes into consideration mean expression. A dispersion value is estimated for each individual gene, then 'shrunken' to a more accurate value based on expected variation for a typical gene exhibiting that level of expression. Finally, the shrunken dispersion value is used in the final GLM fit. + +We use the below dispersion plot, which should show an inverse relationship between dispersion and mean expression, to get an idea of whether our data is a good fit for the model. ```{r DE} de <- DESeq(dds_after) +DESeq2::plotDispEsts(de) +``` + +Because it is difficult to accurately detect and quantify the expression of lowly expressed genes, differences in their expression between treatment conditions can be unduly exaggerated after the model is fit. We correct for this so that gene LFC is not dependent overall on basal gene expression level. + +```{r lfc_shrink} # resultsNames(de) # check the order is right resLFC = results(de, contrast=contrasts) resLFCS <- lfcShrink(de, coef=resultsNames(de)[ncol(vars)+2], type="apeglm") @@ -180,14 +193,24 @@ show <- as.data.frame(res_mod[1:10, c("lfc", "padj", "gene_name")]) degMA(as.DEGSet(resLFC)) + ggtitle('Before LFC Shrinking') ``` -```{r} +```{r after_lfc_shrink} degMA(as.DEGSet(resLFCS), limit = 2) + ggtitle('After LFC Shrinking') ``` -This volcano plot shows the genes that are significantly up- and down-regulated as a result of the difference in treatment. -```{r} -degVolcano(res_mod[,c('lfc', 'padj')], plot_text = show) +This volcano plot shows the genes that are significantly up- and down-regulated as a result of the analysis comparison. The points highlighted in red are genes that have padj < 0.05 and a log2-fold change > 1. Points in blue have a padj < 0.05 and a log2-fold change < 1 and points in green have a padj > 0.05 and a log2-fold change > 2. Grey points are non-significant. The dashed lines correspond to the cutoff values of log2 foldchance and padj that we have chosen. + +```{r volcano_plot, fig.height=6} +# degVolcano(res_mod[,c('lfc', 'padj')], plot_text = show) +EnhancedVolcano(res_mod, + lab= res_mod$gene_name, + pCutoff = 1.345719e-03, + selectLab = c(res_sig$gene_name[1:15]), + FCcutoff = 0.5, + x = 'lfc', + y = 'padj', + title="Volcano Tumor vs. Normal", + subtitle = "", xlim=c(-5,5)) ``` diff --git a/inst/rmarkdown/templates/rnaseq/skeleton/reports/DEG/run_markdown.R b/inst/rmarkdown/templates/rnaseq/skeleton/reports/DEG/run_markdown.R index 4902560..b877334 100644 --- a/inst/rmarkdown/templates/rnaseq/skeleton/reports/DEG/run_markdown.R +++ b/inst/rmarkdown/templates/rnaseq/skeleton/reports/DEG/run_markdown.R @@ -1,18 +1,24 @@ library(rmarkdown) -render_de <- function(subset_value, numerator, denominator){ - rmarkdown::render(input = "DEG.Rmd", - output_dir = "reports", +render_de <- function(numerator, denominator, subset_value = NA, + params_file = '../../params_de.R'){ + + rmarkdown::render(input = "./inst/rmarkdown/templates/rnaseq/skeleton/reports/DEG/DEG.Rmd", + output_dir = "./inst/rmarkdown/templates/rnaseq/skeleton/reports/", output_format = "html_document", - output_file = paste0('DE_', subset_value, '_', numerator, '_vs_', denominator, '.html'), + output_file = ifelse(!is.na(subset_value), + paste0('DE_', subset_value, '_', numerator, '_vs_', denominator, '.html'), + paste0('DE_', numerator, '_vs_', denominator, '.html') + ), clean = TRUE, envir = new.env(), params = list( subset_value = subset_value, numerator = numerator, - denominator = denominator + denominator = denominator, + params_file = params_file ) ) } -render_de('HDFn', "TNF", "untreated") +render_de("tumor", "normal") From 8c9b26b54b741d3b826733c90cbbb0a446a6b8ff Mon Sep 17 00:00:00 2001 From: Alex Bartlett Date: Tue, 23 Apr 2024 12:05:34 -0400 Subject: [PATCH 2/3] qc and de with new loc of test data --- .../templates/rnaseq/skeleton/params_de.R | 8 +-- .../templates/rnaseq/skeleton/params_qc.R | 15 ++--- .../rnaseq/skeleton/reports/DEG/DEG.Rmd | 53 ++++++++------- .../skeleton/reports/DEG/run_markdown.R | 2 +- .../rnaseq/skeleton/reports/QC/QC.Rmd | 64 ++++++++++--------- .../rnaseq/skeleton/reports/QC/run_markdown.R | 9 +++ .../rnaseq/skeleton/results/placeholder | 0 7 files changed, 83 insertions(+), 68 deletions(-) create mode 100644 inst/rmarkdown/templates/rnaseq/skeleton/reports/QC/run_markdown.R create mode 100644 inst/rmarkdown/templates/rnaseq/skeleton/results/placeholder diff --git a/inst/rmarkdown/templates/rnaseq/skeleton/params_de.R b/inst/rmarkdown/templates/rnaseq/skeleton/params_de.R index 8b77c89..c55baed 100644 --- a/inst/rmarkdown/templates/rnaseq/skeleton/params_de.R +++ b/inst/rmarkdown/templates/rnaseq/skeleton/params_de.R @@ -6,10 +6,10 @@ aim = 'short description' analyst = 'person in the core' # project params -root = "../" +root = "../.." date = "YYYYMMDD" column = "sample_type" subset_column = NA -metadata_fn = "/n/data1/cores/bcbio/platform/test-data/rnaseq/GSE251845_colon_n3_tumor_normal/final/2024-04-18_samples/coldata.csv" -counts_fn = '/n/data1/cores/bcbio/platform/test-data/rnaseq/GSE251845_colon_n3_tumor_normal/final/2024-04-18_samples/counts/tximport-counts.csv' -basedir <- 'reports' +metadata_fn = "/n/data1/cores/bcbio/platform/bcbioR-test-data/rnaseq/bcbio/coldata.csv" +counts_fn = '/n/data1/cores/bcbio/platform/bcbioR-test-data/rnaseq/bcbio/tximport-counts.csv' +basedir <- 'results' diff --git a/inst/rmarkdown/templates/rnaseq/skeleton/params_qc.R b/inst/rmarkdown/templates/rnaseq/skeleton/params_qc.R index 716e0b3..894469a 100644 --- a/inst/rmarkdown/templates/rnaseq/skeleton/params_qc.R +++ b/inst/rmarkdown/templates/rnaseq/skeleton/params_qc.R @@ -1,11 +1,10 @@ # info params -project = "Interon_rnaseq_hbc04916" -PI = 'Donggi Paik, Ph.D.' -experiment = '8 human cell lines from multiple origins. Treatment are 9 conditions including untreated.' -aim = 'Determine treatment effect within cell line and in combination with TNF' -analyst = 'Lorena Pantano, PhD' +project = "name_hbcXXXXX" +PI = 'person name' +experiment = 'short description' +aim = 'short description' +analyst = 'person in the core' -metadata_fn="../meta/hbc04916_metadata_bcbio.csv" -se_object="../counts/bcbio-se.rds" -cols_to_show=c("cell","treatment") +metadata_fn="/n/data1/cores/bcbio/platform/bcbioR-test-data/rnaseq/bcbio/coldata.csv" +se_object="/n/data1/cores/bcbio/platform/bcbioR-test-data/rnaseq/bcbio/bcbio-se.rds" diff --git a/inst/rmarkdown/templates/rnaseq/skeleton/reports/DEG/DEG.Rmd b/inst/rmarkdown/templates/rnaseq/skeleton/reports/DEG/DEG.Rmd index 6d6a998..79fbfa8 100644 --- a/inst/rmarkdown/templates/rnaseq/skeleton/reports/DEG/DEG.Rmd +++ b/inst/rmarkdown/templates/rnaseq/skeleton/reports/DEG/DEG.Rmd @@ -74,7 +74,13 @@ sanitize_datatable = function(df, ...) { - Comparison: `r paste0(params$subset_value, ': ', params$numerator, ' vs. ', params$denominator)` ```{r create_filenames} -filenames = str_interp("${params$subset_value}_${params$numerator}_vs_${params$denominator}") + +if (!is.na(subset_value)){ + filenames = str_interp("${params$subset_value}_${params$numerator}_vs_${params$denominator}") +} else { + filenames = str_interp("${params$numerator}_vs_${params$denominator}") +} + contrasts = c(column,params$numerator,params$denominator) name_expression_fn=file.path(root, @@ -182,7 +188,7 @@ resLFCS <- lfcShrink(de, coef=resultsNames(de)[ncol(vars)+2], type="apeglm") res <- as.data.frame(resLFCS) %>% rownames_to_column('gene_id') %>% left_join(rdata, by = 'gene_id') %>% - relocate(gene_name) %>% rename(lfc = log2FoldChange) %>% + relocate(gene_name) %>% dplyr::rename(lfc = log2FoldChange) %>% mutate(pi = abs(lfc) * -log10(padj)) %>% arrange(-pi) res_sig <- res %>% filter(padj < 0.05) %>% arrange(padj) @@ -228,15 +234,6 @@ universe=res %>% filter(!is.na(padj)) %>% pull(gene_id) mapping = AnnotationDbi::select(org.Hs.eg.db, universe, 'ENTREZID', 'ENSEMBL') -ranks_df <- res %>% - filter(padj < 0.01, !is.na(padj)) %>% # only if enough DE genes - arrange((lfc)) %>% - left_join(mapping, by = c("gene_id"="ENSEMBL")) %>% - rename(entrez_id=ENTREZID) %>% - filter(!is.na(entrez_id) & !is.na(padj)) -ranks <- ranks_df$lfc -names(ranks) <- ranks_df$entrez_id - all_in_life=list( msigdbr(species = "human", category = "H") %>% mutate(gs_subcat="Hallmark"), msigdbr(species = "human", category = "C2", subcategory = "CP:REACTOME"), @@ -286,19 +283,27 @@ pathways_ora_all %>% sanitize_datatable() ```{r write-files} counts_norm=ruvset$normalizedCounts %>% as.data.frame() %>% - rownames_to_column("gene_id") -write_csv(counts_norm %>% - mutate(subset = params$subset_value, - comparison = str_interp("${params$numerator}_vs_${params$denominator}")), - name_expression_fn) -write_csv(res %>% - mutate(subset = params$subset_value, - comparison = str_interp("${params$numerator}_vs_${params$denominator}")), - name_deg_fn) -write_csv(pathways_long %>% - mutate(subset = params$subset_value, - comparison = str_interp("${params$numerator}_vs_${params$denominator}")), - name_pathways_fn) + rownames_to_column("gene_id") %>% + mutate(comparison = str_interp("${params$numerator}_vs_${params$denominator}")) + +res_for_writing <- res %>% + mutate(comparison = str_interp("${params$numerator}_vs_${params$denominator}")) + +pathways_for_writing <- pathways_long %>% + mutate(comparison = str_interp("${params$numerator}_vs_${params$denominator}")) + +if (!is.na(subset_value)){ + counts_norm <- counts_norm %>% + mutate(subset = params$subset_value) + res_for_writing <- res_for_writing %>% + mutate(subset = params$subset_value) + pathways_for_writing <- pathways_for_writing %>% + mutate(subset = params$subset_value) +} + +write_csv(counts_norm, name_expression_fn) +write_csv(res_for_writing, name_deg_fn) +write_csv(pathways_for_writing, name_pathways_fn) ``` # R session diff --git a/inst/rmarkdown/templates/rnaseq/skeleton/reports/DEG/run_markdown.R b/inst/rmarkdown/templates/rnaseq/skeleton/reports/DEG/run_markdown.R index b877334..0f8b74d 100644 --- a/inst/rmarkdown/templates/rnaseq/skeleton/reports/DEG/run_markdown.R +++ b/inst/rmarkdown/templates/rnaseq/skeleton/reports/DEG/run_markdown.R @@ -4,7 +4,7 @@ render_de <- function(numerator, denominator, subset_value = NA, params_file = '../../params_de.R'){ rmarkdown::render(input = "./inst/rmarkdown/templates/rnaseq/skeleton/reports/DEG/DEG.Rmd", - output_dir = "./inst/rmarkdown/templates/rnaseq/skeleton/reports/", + output_dir = "./inst/rmarkdown/templates/rnaseq/skeleton/reports/DEG/", output_format = "html_document", output_file = ifelse(!is.na(subset_value), paste0('DE_', subset_value, '_', numerator, '_vs_', denominator, '.html'), diff --git a/inst/rmarkdown/templates/rnaseq/skeleton/reports/QC/QC.Rmd b/inst/rmarkdown/templates/rnaseq/skeleton/reports/QC/QC.Rmd index 1ded0c5..eae3c5f 100644 --- a/inst/rmarkdown/templates/rnaseq/skeleton/reports/QC/QC.Rmd +++ b/inst/rmarkdown/templates/rnaseq/skeleton/reports/QC/QC.Rmd @@ -17,7 +17,7 @@ output: editor_options: chunk_output_type: console params: - params_file: code/params_qc.R + params_file: ./inst/rmarkdown/templates/rnaseq/skeleton/params_qc.R --- @@ -34,6 +34,7 @@ library(ggrepel) library(pheatmap) library(RColorBrewer) library(DT) +library(pheatmap) ggplot2::theme_set(theme_light(base_size = 14)) opts_chunk[["set"]]( cache = FALSE, @@ -113,9 +114,10 @@ sanitize_datatable = function(df, ...) { # Samples and metadata ```{r load_metadata} -meta_df=read_csv(metadata_fn) +meta_df=read_csv(metadata_fn) %>% mutate(sample = tolower(description)) %>% + dplyr::select(-description) -ggplot(meta_df, aes(cell, fill=treatment)) + +ggplot(meta_df, aes(sample_type, fill = sample_type)) + geom_bar() + ylab("") + xlab("") + scale_fill_brewer(palette = "Set1") ``` @@ -126,13 +128,13 @@ se <- readRDS(se_object) #local metrics <- metadata(se)$metrics %>% - full_join(meta_df, by = c("sample" = "sample")) + full_join(meta_df , by = c("sample" = "sample")) meta_sm <- meta_df %>% as.data.frame() %>% column_to_rownames("sample") -meta_sm[,cols_to_show] %>% sanitize_datatable() +meta_sm %>% sanitize_datatable() ``` @@ -144,9 +146,9 @@ Here, we want to see consistency and a minimum of 20 million reads. ```{r plot_total_reads} metrics %>% - ggplot(aes(x = cell, - y = total_reads, - color = treatment )) + + ggplot(aes(x = sample_type, + y = total_reads, + color = sample_type)) + geom_point(alpha=0.5) + coord_flip() + scale_y_continuous(name = "million reads") + @@ -168,8 +170,9 @@ The genomic mapping rate represents the percentage of reads mapping to the refer ```{r plot_mapping_rate} metrics$mapped_reads_pct <- round(metrics$mapped_reads/metrics$total_reads*100,1) metrics %>% - ggplot(aes(x = cell, - y = mapped_reads_pct, color = treatment)) + + ggplot(aes(x = sample_type, + y = mapped_reads_pct, + color = sample_type)) + geom_point() + coord_flip() + scale_color_brewer(palette = "Set1") + @@ -193,8 +196,8 @@ genes_detected <- summarise(genes_detected, metrics <- metrics %>% left_join(genes_detected, by = c("sample" = "name")) -ggplot(metrics,aes(x = cell, - y = n_genes, color = treatment)) + +ggplot(metrics,aes(x = sample_type, + y = n_genes, color = sample_type)) + geom_point() + coord_flip() + scale_color_brewer(palette = "Set1") + @@ -213,7 +216,7 @@ This plot shows how complex the samples are. We expect samples with more reads t metrics %>% ggplot(aes(x = total_reads, y = n_genes, - color = treatment)) + + color = sample_type)) + geom_point()+ scale_x_log10() + scale_color_brewer(palette = "Set1") + @@ -227,9 +230,9 @@ Here we are looking for consistency, and exonic mapping rates around 70% or 75% ```{r plot_exonic_mapping_rate} metrics %>% - ggplot(aes(x = cell, + ggplot(aes(x = sample_type, y = exonic_rate * 100, - color = treatment)) + + color = sample_type)) + geom_point() + ylab("Exonic rate %") + ggtitle("Exonic mapping rate") + @@ -247,9 +250,9 @@ Here, we expect a low intronic mapping rate (≤ 15% - 20%) ```{r plot_intronic_mapping_rate} metrics %>% - ggplot(aes(x = cell, + ggplot(aes(x = sample_type, y = intronic_rate * 100, - color = treatment)) + + color = sample_type)) + geom_point() + ylab("Intronic rate %") + ggtitle("Intronic mapping rate") + @@ -267,9 +270,9 @@ Here, we expect a low intergenic mapping rate, which is true for all samples. ```{r plot_intergenic_mapping_rate} metrics %>% - ggplot(aes(x = cell, + ggplot(aes(x = sample_type, y = intergenic_rate * 100, - color = treatment)) + + color = sample_type)) + geom_point() + ylab("Intergenic rate %") + ggtitle("Intergenic mapping rate") + @@ -286,9 +289,9 @@ Samples should have a ribosomal RNA (rRNA) "contamination" rate below 10% # for some bad samples it could be > 50% rrna_ylim <- max(round(metrics$r_rna_rate*100, 2)) + 10 metrics %>% - ggplot(aes(x = cell, + ggplot(aes(x = sample_type, y = r_rna_rate * 100, - color = treatment)) + + color = sample_type)) + geom_point() + ylab("rRNA rate, %")+ ylim(0, rrna_ylim) + @@ -303,9 +306,9 @@ There should be little bias, i.e. the values should be close to 1, or at least c ```{r plot_53_bias} metrics %>% - ggplot(aes(x = cell, + ggplot(aes(x = sample_type, y = x5_3_bias, - color = treatment)) + + color = sample_type)) + geom_point() + ggtitle("5'-3' bias") + coord_flip() + @@ -319,7 +322,7 @@ metrics %>% We expect consistency in the box plots here between the samples, i.e. the distribution of counts across the genes is similar ```{r plot_counts_per_gene} -metrics_small <- metrics %>% dplyr::select(sample, treatment) +metrics_small <- metrics %>% dplyr::select(sample, sample_type) metrics_small <- left_join(sample_names, metrics_small) counts <- @@ -330,9 +333,9 @@ counts <- counts <- left_join(counts, metrics, by = c("name" = "sample")) -ggplot(counts, aes(cell, +ggplot(counts, aes(sample_type, log2(counts+1), - fill = treatment)) + + fill = sample_type)) + geom_boxplot() + scale_fill_brewer(palette = "Set1") + ggtitle("Counts per gene, all non-zero genes") + @@ -355,7 +358,6 @@ raw_counts <- assays(se)[["raw"]] %>% filter(rowSums(.)!=0) %>% as.matrix() -colnames(raw_counts) <- str_replace(colnames(raw_counts), "^x", "") vst <- vst(raw_counts) #fix samples names @@ -363,9 +365,9 @@ coldat_for_pca <- as.data.frame(metrics) rownames(coldat_for_pca) <- coldat_for_pca$sample coldat_for_pca <- coldat_for_pca[colnames(raw_counts),] pca1 <- degPCA(vst, coldat_for_pca, - condition = "treatment", data = T)[["plot"]] + condition = "sample_type", data = T)[["plot"]] pca2 <- degPCA(vst, coldat_for_pca, - condition = "cell", data = T)[["plot"]] + condition = "sample_type", data = T, pc1="PC3", pc2="PC4")[["plot"]] pca1 + scale_color_brewer(palette = "Set1") pca2 + scale_color_brewer(palette = "Set1") @@ -377,13 +379,13 @@ variables=degCovariates(vst, coldat_for_pca) ``` -```{r clustering fig, fig.width = 10, fig.asp = .62, eval=FALSE} +```{r clustering fig, fig.width = 10, fig.asp = .62} ## Hierarchical clustering vst_cor <- cor(vst) -p <- pheatmap(vst_cor, annotation =coldat_for_pca[,c("cell", "treatment"), drop=F], show_rownames = T, show_colnames = T) +p <- pheatmap(vst_cor, annotation =coldat_for_pca[,c("sample_type"), drop=F], show_rownames = T, show_colnames = T) p ``` diff --git a/inst/rmarkdown/templates/rnaseq/skeleton/reports/QC/run_markdown.R b/inst/rmarkdown/templates/rnaseq/skeleton/reports/QC/run_markdown.R new file mode 100644 index 0000000..e9e648f --- /dev/null +++ b/inst/rmarkdown/templates/rnaseq/skeleton/reports/QC/run_markdown.R @@ -0,0 +1,9 @@ +library(rmarkdown) + +rmarkdown::render("./inst/rmarkdown/templates/rnaseq/skeleton/reports/QC/QC.Rmd", + output_dir = "./inst/rmarkdown/templates/rnaseq/skeleton/reports/QC/", + clean = TRUE, + output_format = "html_document", + params = list( + params_file = '../../params_qc.R') + ) diff --git a/inst/rmarkdown/templates/rnaseq/skeleton/results/placeholder b/inst/rmarkdown/templates/rnaseq/skeleton/results/placeholder new file mode 100644 index 0000000..e69de29 From 299e9996a69bd4c8a0a77c8c033318259b07ef3c Mon Sep 17 00:00:00 2001 From: Alex Bartlett Date: Tue, 23 Apr 2024 14:30:41 -0400 Subject: [PATCH 3/3] github paths? --- inst/rmarkdown/templates/rnaseq/skeleton/params_de.R | 4 ++-- inst/rmarkdown/templates/rnaseq/skeleton/params_qc.R | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/inst/rmarkdown/templates/rnaseq/skeleton/params_de.R b/inst/rmarkdown/templates/rnaseq/skeleton/params_de.R index c55baed..a04b1b3 100644 --- a/inst/rmarkdown/templates/rnaseq/skeleton/params_de.R +++ b/inst/rmarkdown/templates/rnaseq/skeleton/params_de.R @@ -10,6 +10,6 @@ root = "../.." date = "YYYYMMDD" column = "sample_type" subset_column = NA -metadata_fn = "/n/data1/cores/bcbio/platform/bcbioR-test-data/rnaseq/bcbio/coldata.csv" -counts_fn = '/n/data1/cores/bcbio/platform/bcbioR-test-data/rnaseq/bcbio/tximport-counts.csv' +metadata_fn = "https://raw.githubusercontent.com/bcbio/bcbioR-test-data/main/rnaseq/bcbio/coldata.csv" +counts_fn = 'https://raw.githubusercontent.com/bcbio/bcbioR-test-data/main/rnaseq/bcbio/tximport-counts.csv' basedir <- 'results' diff --git a/inst/rmarkdown/templates/rnaseq/skeleton/params_qc.R b/inst/rmarkdown/templates/rnaseq/skeleton/params_qc.R index 894469a..505561d 100644 --- a/inst/rmarkdown/templates/rnaseq/skeleton/params_qc.R +++ b/inst/rmarkdown/templates/rnaseq/skeleton/params_qc.R @@ -6,5 +6,5 @@ aim = 'short description' analyst = 'person in the core' -metadata_fn="/n/data1/cores/bcbio/platform/bcbioR-test-data/rnaseq/bcbio/coldata.csv" -se_object="/n/data1/cores/bcbio/platform/bcbioR-test-data/rnaseq/bcbio/bcbio-se.rds" +metadata_fn='https://raw.githubusercontent.com/bcbio/bcbioR-test-data/main/rnaseq/bcbio/coldata.csv' +se_object=url("https://raw.githubusercontent.com/bcbio/bcbioR-test-data/main/rnaseq/bcbio/bcbio-se.rds")