diff --git a/CHANGELOG.md b/CHANGELOG.md index dd113edc..5ce69e42 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,20 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## v1.2.0 - 2023-04-19 + +### `Added` + +- [[#97](https://github.com/nf-core/differentialabundance/issues/97)] - Allow for subsetting of samples for specific contrasts ([@pinin4fjords](https://github.com/pinin4fjords), reported by [@danhalligan-hx](https://github.com/danhalligan-hx), review by [@WackerO](https://github.com/WackerO)) +- [[#105](https://github.com/nf-core/differentialabundance/pull/105)] - Enabled multiple GMT/GMX files for GSEA ([@WackerO](https://github.com/WackerO), reported by [@grst](https://github.com/grst), review by [@pinin4fjords](https://github.com/pinin4fjords)) +- [[#108](https://github.com/nf-core/differentialabundance/issues/108)] - Add shiny app generation (starting feature set) ([@pinin4fjords](https://github.com/pinin4fjords), review by [@WackerO](https://github.com/WackerO)) +- [[#110](https://github.com/nf-core/differentialabundance/pull/110)] - Add shiny app outputs to tower.yml ([@pinin4fjords](https://github.com/pinin4fjords), review by [@WackerO](https://github.com/WackerO), [@maxulysse](https://github.com/maxulysse)) + +### `Fixed` + +- [[#95](https://github.com/nf-core/differentialabundance/issues/95)] - Pipeline doesn't check for gene sets file specification when GSEA is activated ([@pinin4fjords](https://github.com/pinin4fjords), reported by [@danhalligan-hx](https://github.com/danhalligan-hx), review by [@FriederikeHanssen](https://github.com/FriederikeHanssen)) +- [[#93](https://github.com/nf-core/differentialabundance/issues/93)] - Shouldn't be re-using the single exploratory palette across multiple informative variables ([@pinin4fjords](https://github.com/pinin4fjords), review by [@matthdsm](https://github.com/matthdsm)) + ## v1.1.1 - 2023-03-02 ### `Fixed` diff --git a/README.md b/README.md index 5fb87362..2cb1013d 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,8 @@ On release, automated continuous integration tests run the pipeline on a full-si 3. Run differential analysis over all contrasts specified. 4. Optionally run a differential gene set analysis. 5. Generate exploratory and differential analysis plots for interpretation. -6. Build an HTML report based on R markdown, with interactive plots (where possible) and tables. +6. Optionally build and (if specified) deploy a Shiny app for fully interactive mining of results. +7. Build an HTML report based on R markdown, with interactive plots (where possible) and tables. ## Quick Start @@ -73,6 +74,26 @@ Affymetrix microarray: -profile affy, ``` +### Reporting + +The pipeline reports its outcomes in two forms. + +#### Markdown-derived HTML report + +![screenshot of the markdown report](docs/images/markdown_report.png "Markdown report") + +The primary workflow output is an HTML-format report produced from an [R markdown template](assets/differentialabundance_report.Rmd). This leverages helper functions from [shinyngs](https://github.com/pinin4fjords/shinyngs) to produce rich plots and tables, but does not provide significant interactivity. + +#### Shiny-based data mining app + +A second optional output is produced by leveraging [shinyngs](https://github.com/pinin4fjords/shinyngs) to build an interactive Shiny application. This allows more interaction with the data, setting of thresholds etc. + +![screenshot of the ShinyNGS contrast table](docs/images/shinyngs_contrast_table.png "ShinyNGS contrast table") + +![screenshot of the ShinyNGS gene plot](docs/images/shinyngs_gene_plot.png "ShinyNGS gene plot") + +By default the application is provided as an R script and associated serialised data structure, which you can use to quickly start the application locally. With proper configuration the app can also be deployed to [shinyapps.io](https://www.shinyapps.io/) - though this requires you to have an account on that service (free tier available). + ## Documentation The nf-core/differentialabundance pipeline comes with documentation about the pipeline [usage](https://nf-co.re/differentialabundance/usage), [parameters](https://nf-co.re/differentialabundance/parameters) and [output](https://nf-co.re/differentialabundance/output). diff --git a/assets/differentialabundance_report.Rmd b/assets/differentialabundance_report.Rmd index 1858490a..0da0b5a6 100644 --- a/assets/differentialabundance_report.Rmd +++ b/assets/differentialabundance_report.Rmd @@ -20,6 +20,10 @@ params: study_type: NULL study_name: NULL study_abundance_type: NULL + report_file: NULL, + report_title: NULL, + report_author: NULL, + report_description: NULL, observations_type: NULL observations: NULL # GSE156533.samplesheet.csv observations_id_col: NULL @@ -86,6 +90,7 @@ params: differential_max_pval: NULL differential_max_qval: NULL differential_palette_name: NULL + differential_subset_to_contrast_samples: NULL deseq2_test: NULL deseq2_fit_type: NULL deseq2_sf_type: NULL @@ -139,59 +144,6 @@ library(DT) datatable(NULL) ``` -```{r, echo=FALSE} - -# this function will be available via shinyngs in a release soon but we can use it here for now -anova_pca_metadata <- function(pca_coords, pcameta, fraction_explained){ - # Use 10 components or however many fewer is produced by the PCA - - last_pc <- 10 - if (ncol(pca_coords) < last_pc) { - last_pc <- ncol(pca_coords) - } - - # Remove non-useful variables (those with 1 value, or N values where N is the - # number of samples) - - pcameta <- pcameta[, chooseGroupingVariables(pcameta), drop = FALSE] - - # Run anova for all PCA against the selected meta vars - - run_anova <- function(meta_col, pc){ - fit <- aov(pca_coords[, pc] ~ factor(pcameta[, meta_col])) - smry <- summary(fit)[[1]] - - if ("Pr(>F)" %in% names(smry)) { - smry[["Pr(>F)"]][[1]] - }else{ - NA - } - } - - pvals <- outer( - 1:ncol(pcameta), - 1:last_pc, - Vectorize(run_anova) - ) - - # Name dimensions - - dimnames(pvals) <- list( - colnames(pcameta), - paste( - paste("PC", 1:last_pc, sep = ""), - " (", - fraction_explained[1:last_pc], - "%)", - sep = "" - ) - ) - - pvals -} -``` - - ```{r, include=FALSE} versions <- unlist(yaml.load_file(file.path(params$input_dir, params$versions_file)), recursive = FALSE) params_table <- data.frame(Parameter = names(unlist(params)), Value = unlist(params), row.names = NULL) @@ -215,11 +167,13 @@ make_params_table <- function(name, pattern = NULL, remove_pattern = FALSE){ print( htmltools::tagList(datatable(subparams, caption = paste("Parameters used for", name), rownames = FALSE, options = list(dom = dom)) )) } +report_title <- paste0('Differential ', params$features_type, ' abundance report', ifelse(is.null(params$report_title), '', paste0(': ', params$report_title))) +report_subtitle <- paste0(ifelse(is.null(params$report_author), '', paste0('By ', params$report_author, ', ')), 'differentialabundance workflow version', versions[["Workflow.nf-core/differentialabundance"]]) ``` --- -title: "Differential `r params$features_type` abundance report" -subtitle: differentialabundance workflow version `r versions[["Workflow.nf-core/differentialabundance"]]` +title: "`r report_title`" +subtitle: `r report_subtitle` --- @@ -530,6 +484,7 @@ for (assay_type in rev(names(assay_data))){ observations[[iv]], levels = unique(observations[[iv]]) ) + pcaColorScale <- makeColorScale(length(unique(observations[[iv]])), palette = params$exploratory_palette_name) # Make plotting data combining PCA coords with coloring groups etc @@ -551,7 +506,7 @@ for (assay_type in rev(names(assay_data))){ ylab = labels[2], colorby = plotdata$colorby, plot_type = plot_types[[d]], - palette = groupColorScale, + palette = pcaColorScale, legend_title = prettifyVariablename(iv), labels = plotdata$name, show_labels = TRUE @@ -621,6 +576,8 @@ for (assay_type in rev(names(assay_data))){ cat(paste0("\n##### ", prettifyVariablename(assay_type), " (", iv, ")\n")) variable_genes <- selectVariableGenes(matrix = assay_data[[assay_type]], ntop = params$exploratory_n_features) + dendroColorScale <- makeColorScale(length(unique(observations[[iv]])), palette = params$exploratory_palette_name) + p <- clusteringDendrogram( 2^assay_data[[assay_type]][variable_genes, ], observations[, iv, drop = FALSE], @@ -633,7 +590,7 @@ for (assay_type in rev(names(assay_data))){ params$features_type, "s\n(", params$exploratory_clustering_method, " clustering, ", params$exploratory_cor_method, " correlation)"), cluster_method = params$exploratory_clustering_method, - palette = groupColorScale, + palette = dendroColorScale, labelspace = 0.25 ) # Defaults in shinyngs make the text in this plot a bit big for the report, so @@ -812,17 +769,22 @@ if (any(unlist(params[paste0(possible_gene_set_methods, '_run')]))){ if (unlist(params[paste0(gene_set_method, '_run')])){ cat("\n### ", toupper(gene_set_method) ," {.tabset}\n") - reference_gsea_tables <- paste0(contrasts$id, '.gsea_report_for_', contrasts$reference, '.tsv') - target_gsea_tables <- paste0(contrasts$id, '.gsea_report_for_', contrasts$target, '.tsv') + for (gmt_file in simpleSplit(params$gsea_gene_sets)) { + gmt_name <- basename(tools::file_path_sans_ext(gmt_file)) - for (i in 1:nrow(contrasts)){ - cat("\n#### ", contrast_descriptions[i], "\n") - - target_gsea_results <- read_metadata(target_gsea_tables[i])[,c(-2,-3)] - print( htmltools::tagList(datatable(target_gsea_results, caption = paste0("\nTarget (", contrasts$target[i], ")\n"), rownames = FALSE) )) - - ref_gsea_results <- read_metadata(reference_gsea_tables[i])[,c(-2,-3)] - print( htmltools::tagList(datatable(ref_gsea_results, caption = paste0("\nReference (", contrasts$reference[i], ")\n"), rownames = FALSE) )) + cat("\n#### ", gmt_name ," {.tabset}\n") + reference_gsea_tables <- paste0(contrasts$id, ".", gmt_name, '.gsea_report_for_', contrasts$reference, '.tsv') + target_gsea_tables <- paste0(contrasts$id, ".", gmt_name, '.gsea_report_for_', contrasts$target, '.tsv') + + for (i in 1:nrow(contrasts)){ + cat("\n##### ", contrast_descriptions[i], "\n") + + target_gsea_results <- read_metadata(target_gsea_tables[i])[,c(-2,-3)] + print( htmltools::tagList(datatable(target_gsea_results, caption = paste0("\nTarget (", contrasts$target[i], ")\n"), rownames = FALSE) )) + + ref_gsea_results <- read_metadata(reference_gsea_tables[i])[,c(-2,-3)] + print( htmltools::tagList(datatable(ref_gsea_results, caption = paste0("\nReference (", contrasts$reference[i], ")\n"), rownames = FALSE) )) + } } } } diff --git a/conf/affy.config b/conf/affy.config index e11f2c49..1ed2da47 100644 --- a/conf/affy.config +++ b/conf/affy.config @@ -35,4 +35,8 @@ params { differential_qval_column = "adj.P.Val" differential_feature_id_column = "probe_id" differential_feature_name_column = "SYMBOL" + + // A small amount of upstream work is required to get the app building + // working for arrays + shinyngs_build_app = true } diff --git a/conf/modules.config b/conf/modules.config index 23ecb059..9f116820 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -141,10 +141,10 @@ process { "--vst_nsub $params.deseq2_vst_nsub", "--shrink_lfc $params.deseq2_shrink_lfc", "--cores $params.deseq2_cores", - "--contrast_variable \"$meta.variable\"", - "--reference_level \"$meta.reference\"", - "--treatment_level \"$meta.target\"", - "--blocking_variables \"$meta.blocking\"" + "--subset_to_contrast_samples $params.differential_subset_to_contrast_samples", + ((meta.blocking == null) ? '' : "--blocking_variables $meta.blocking"), + ((meta.exclude_samples_col == null) ? '' : "--exclude_samples_col $meta.exclude_samples_col"), + ((meta.exclude_samples_values == null) ? '' : "--exclude_samples_values $meta.exclude_samples_values") ].join(' ').trim() } } @@ -182,23 +182,23 @@ process { "--p.value ${params.limma_p_value}", "--lfc ${params.limma_lfc}", "--confint ${params.limma_confint}", - "--contrast_variable \"$meta.variable\"", - "--reference_level \"$meta.reference\"", - "--treatment_level \"$meta.target\"", - "--blocking_variables \"$meta.blocking\"" + "--subset_to_contrast_samples $params.differential_subset_to_contrast_samples", + ((meta.blocking == null) ? '' : "--blocking_variables $meta.blocking"), + ((meta.exclude_samples_col == null) ? '' : "--exclude_samples_col $meta.exclude_samples_col"), + ((meta.exclude_samples_values == null) ? '' : "--exclude_samples_values $meta.exclude_samples_values") ].join(' ').trim() } } withName: GSEA_GSEA { - ext.prefix = { "${meta.id}." } + ext.prefix = { "${meta.id}.${gene_sets.baseName}." } publishDir = [ [ - path: { "${params.outdir}/tables/gsea/${meta.id}" }, + path: { "${params.outdir}/tables/gsea/${meta.id}/${gene_sets.baseName}" }, mode: params.publish_dir_mode, pattern: '*gsea_report_for_*.tsv' ], [ - path: { "${params.outdir}/plots/gsea/${meta.id}" }, + path: { "${params.outdir}/plots/gsea/${meta.id}/${gene_sets.baseName}" }, mode: params.publish_dir_mode, pattern: '*.png' ] @@ -260,6 +260,33 @@ process { ].join(' ').trim() } } + withName: SHINYNGS_APP { + secret = (params.shinyngs_deploy_to_shinyapps_io) ? [ 'SHINYAPPS_TOKEN', 'SHINYAPPS_SECRET' ]: null + + publishDir = [ + path: { "${params.outdir}/shinyngs_app" }, + mode: params.publish_dir_mode, + ] + memory = { check_max( 12.GB * task.attempt, 'memory' ) } + ext.args = { [ + "--assay_names \"${params.exploratory_assay_names}\"", + "--sample_id_col \"${params.observations_id_col}\"", + "--feature_id_col \"${params.features_id_col}\"", + "--diff_feature_id_col \"${params.differential_feature_id_column}\"", + "--fold_change_column \"${params.differential_fc_column}\"", + "--pval_column \"${params.differential_pval_column}\"", + "--qval_column \"${params.differential_qval_column}\"", + "--unlog_foldchanges \"${params.differential_foldchanges_logged}\"", + ((params.report_title == null) ? '' : "--title \"$params.report_title\""), + ((params.report_author == null) ? '' : "--author \"$params.report_author\""), + ((params.report_description == null) ? '' : "--description \"$params.report_description\""), + ((params.shinyngs_guess_unlog_matrices) ? "--guess_unlog_matrices" : ''), + ((params.shinyngs_deploy_to_shinyapps_io) ? "--deploy_app" : ''), + ((params.shinyngs_shinyapps_account == null) ? '' : "--shinyapps_account \"$params.shinyngs_shinyapps_account\""), + ((params.shinyngs_shinyapps_app_name == null) ? '' : "--shinyapps_name \"$params.shinyngs_shinyapps_app_name\"") + ].join(' ').trim() } + } + withName: CUSTOM_DUMPSOFTWAREVERSIONS { publishDir = [ path: { "${params.outdir}/pipeline_info" }, @@ -269,8 +296,8 @@ process { } withName: RMARKDOWNNOTEBOOK { - conda = "bioconda::r-shinyngs=1.5.5" - container = { "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/r-shinyngs:1.5.5--r42hdfd78af_0':'quay.io/biocontainers/r-shinyngs:1.5.5--r42hdfd78af_0' }" } + conda = "bioconda::r-shinyngs=1.7.1" + container = { "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/r-shinyngs:1.7.1--r42hdfd78af_1':'quay.io/biocontainers/r-shinyngs:1.7.1--r42hdfd78af_1' }" } publishDir = [ path: { "${params.outdir}/report" }, mode: params.publish_dir_mode, diff --git a/conf/test_full.config b/conf/test_full.config index 6e14761e..cbd8e212 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -27,6 +27,11 @@ params { // Change palette exploratory_palette_name = 'Dark2' + // Set reporting parameters + report_title = "full tests" + report_author = "nf-core elves" + report_description = "This is a full-sized test dataset contributed by Oskar Wacker" + // Activate GSEA gsea_run = true gsea_gene_sets = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/mus_musculus/gene_set_analysis/mh.all.v2022.1.Mm.symbols.gmt' diff --git a/docs/images/markdown_report.png b/docs/images/markdown_report.png new file mode 100644 index 00000000..d936db5b Binary files /dev/null and b/docs/images/markdown_report.png differ diff --git a/docs/images/shinyngs_contrast_table.png b/docs/images/shinyngs_contrast_table.png new file mode 100644 index 00000000..9e6400f9 Binary files /dev/null and b/docs/images/shinyngs_contrast_table.png differ diff --git a/docs/images/shinyngs_gene_plot.png b/docs/images/shinyngs_gene_plot.png new file mode 100644 index 00000000..1ba5c646 Binary files /dev/null and b/docs/images/shinyngs_gene_plot.png differ diff --git a/docs/images/workflow.png b/docs/images/workflow.png index 31741835..6fe5e03e 100644 Binary files a/docs/images/workflow.png and b/docs/images/workflow.png differ diff --git a/docs/images/workflow.svg b/docs/images/workflow.svg index 8eb19914..30f4a8fe 100644 --- a/docs/images/workflow.svg +++ b/docs/images/workflow.svg @@ -2,9 +2,9 @@ + + Affymetrix microarray HTML + + + + + + HTML + - Filter matrix R Markdown notebook + Build Shiny app Observation (sample) Observation (sample) annotations + id="tspan2026">annotations Contrast definitions + id="tspan2028">Contrast definitions Raw intensities - + id="tspan2030">Raw intensities Raw abundance matrix + id="tspan2032">Raw abundance matrix Feature annotations + id="tspan2034">Feature annotations + + + + + + + + + + diff --git a/docs/output.md b/docs/output.md index 4b6ed261..4bf07ea8 100644 --- a/docs/output.md +++ b/docs/output.md @@ -66,6 +66,18 @@ Most plots are included in the HTML report (see above), but are also included in The `differential` folder is likely to be the core result set for most users, containing the main tables of differential statistics. +## Shiny app + +- `shinyngs_app/` + - `[study name]`: + - `data.rds`: serialized R object which can be used to generate a Shiny application + - `app.R`: minimal R script that will source the data object and generate the app + +The app must be run in an environment with [ShinyNGS](https://github.com/pinin4fjords/shinyngs) installed, or you can see the workflow parameters to deploy to shinyapps.io (see usage documentation). + +
+Output files + ### Pipeline information
diff --git a/docs/usage.md b/docs/usage.md index e381f88e..960c4cb6 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -77,7 +77,11 @@ The necessary fields in order are: - `variable` - which column from the observations information will be used to define groups - `reference` - the base/ reference level for the comparison. If features have higher values in this group than target they will generate negative fold changes - `target` - the target/ non-reference level for the comparison. If features have higher values in this group than the reference they will generate positive fold changes + +You can optionally supply: + - `blocking` - semicolon-delimited, any additional variables (also observation columns) that should be modelled alongside the contrast variable +- `exclude_samples_col` and `exclude_samples_values` - the former being a valid column in the samples sheet, the latter a semicolon-delimited list of values in that column which should be used to select samples prior to differential modelling. This is helpful where certain samples need to be exluded prior to analysis of a given contrast. The file can be tab or comma separated. @@ -91,7 +95,7 @@ The file can be tab or comma separated. This is usually the easiest way to supply annotations for RNA-seq features. It should match the GTF used in nf-core/rnaseq if that workflow was used to produce the input expression matrix. -### annotation package identifiers for Affymetrix arrays +### Annotation package identifiers for Affymetrix arrays For `-profile affy`, default behaviour is to derive an annotation table while running the affy/justrma module based on the CDF name discovered there. @@ -105,6 +109,72 @@ To override the above options, you may also supply your own features table as a By default, if you don't provide features, for non-array data the workflow will fall back to attempting to use the matrix itself as a source of feature annotations. For this to work you must make sure to set the `features_id_col`, `features_name_col` and `features_metadata_cols` parameters to the appropriate values, for example by setting them to 'gene_id' if that is the identifier column on the matrix. This will cause the gene ID to be used everywhere rather than more accessible gene symbols (as can be derived from the GTF), but the workflow should run. +## Shiny app generation + +The pipeline is capable of building, and even deploying (to [shinyapps.io](https://www.shinyapps.io/)) for you a Shiny app built with [ShinyNGS](https://github.com/pinin4fjords/shinyngs). + +This is enabled with: + +```bash +--shinyngs_build_app true +``` + +... which is the default. By default the app is not deployed, but just output to the output folder under `shinyngs_app/[study_name]`. + +You have 3 choices in running that application: + +1. Run locally +2. Have shinyapps.io host it for you +3. Host on a Shiny server + +### 1. Run locally + +You can start the application locally (in an environment where [ShinyNGS](https://github.com/pinin4fjords/shinyngs) is installed) like: + +```bash +cd [output directory]/[study id] +Rscript app.R +``` + +This will give you a local URI to access in your browser: + +``` +Listening on http://127.0.0.1:3326 +``` + +### 2. Shinyapps.io deployment + +shinyapps.io is a hosting solution supplied by Posit (formerly RStudio) which gives you quick and easy access to hosting for Shiny applications. There is a free tier, though you'll have to pay for features such as authentication and improved resources. + +You can upload your app to shinyapps.io youself, or deploy directly to shinyapps.io with this workflow, for which a few things need to happen: + +#### Account and app setup + +At https://www.shinyapps.io/, create an account, add a token (via Account -> Tokens) and note your secret and token. + +You let Nextflow know about these via secrets: + +```bash +nextflow secrets set SHINYAPPS_TOKEN [token] +nextflow secrets set SHINYAPPS_SECRET [secret] +``` + +#### Configuration + +You then need to activate the deployment in your parameters, and supply both your account name and an app name: + +```bash +--shinyngs_deploy_to_shinyapps_io \ +--shinyngs_shinyapps_account '[account name]' \ +--shinyngs_shinyapps_app_name '[app name]' +``` + +With this configuration in place deployment should happen automatically every time you run your workflow. + +### 3. Run your own Shiny server + +There is also a [Shiny server application](https://posit.co/download/shiny-server/), which you can install on your own infrastruture and use to host applications yourself. + ## Running the pipeline The typical command for running the pipeline is as follows: diff --git a/modules.json b/modules.json index df5cfbb1..8079b287 100644 --- a/modules.json +++ b/modules.json @@ -17,27 +17,27 @@ }, "custom/dumpsoftwareversions": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "git_sha": "7101db4432d3268b7fcb5b8f75fa0a022dc5561b", "installed_by": ["modules"] }, "custom/matrixfilter": { "branch": "master", - "git_sha": "091496fc34f36df69ca037ed965af262384b5db4", + "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", "installed_by": ["modules"] }, "custom/tabulartogseacls": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", "installed_by": ["modules"] }, "custom/tabulartogseagct": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", "installed_by": ["modules"] }, "deseq2/differential": { "branch": "master", - "git_sha": "dbffa8da6e9f065a5b7240255c51cbd839f5df43", + "git_sha": "e05db1b791ba8202853f275438fcc67c563ca479", "installed_by": ["modules"] }, "gsea/gsea": { @@ -47,32 +47,38 @@ }, "gunzip": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", "installed_by": ["modules"] }, "limma/differential": { "branch": "master", - "git_sha": "dbffa8da6e9f065a5b7240255c51cbd839f5df43", + "git_sha": "4805d97c29f1a3cdfc26a828796296e2d58076e1", "installed_by": ["modules"] }, "rmarkdownnotebook": { "branch": "master", - "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", "installed_by": ["modules"] }, + "shinyngs/app": { + "branch": "master", + "git_sha": "880d634b0d5aead9447ae29e3e02e0e31ca7ae7f", + "installed_by": ["modules"], + "patch": "modules/nf-core/shinyngs/app/shinyngs-app.diff" + }, "shinyngs/staticdifferential": { "branch": "master", - "git_sha": "c2f6d050ce2f4db1436a3616cd0dad0929e6ffcd", + "git_sha": "880d634b0d5aead9447ae29e3e02e0e31ca7ae7f", "installed_by": ["modules"] }, "shinyngs/staticexploratory": { "branch": "master", - "git_sha": "c2f6d050ce2f4db1436a3616cd0dad0929e6ffcd", + "git_sha": "880d634b0d5aead9447ae29e3e02e0e31ca7ae7f", "installed_by": ["modules"] }, "shinyngs/validatefomcomponents": { "branch": "master", - "git_sha": "c2f6d050ce2f4db1436a3616cd0dad0929e6ffcd", + "git_sha": "880d634b0d5aead9447ae29e3e02e0e31ca7ae7f", "installed_by": ["modules"] }, "untar": { diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf index 3df21765..800a6099 100644 --- a/modules/nf-core/custom/dumpsoftwareversions/main.nf +++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf @@ -2,10 +2,10 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { label 'process_single' // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda "bioconda::multiqc=1.13" + conda "bioconda::multiqc=1.14" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.13--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.13--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : + 'quay.io/biocontainers/multiqc:1.14--pyhdfd78af_0' }" input: path versions diff --git a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py old mode 100755 new mode 100644 index e55b8d43..da033408 --- a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py +++ b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py @@ -4,11 +4,10 @@ """Provide functions to merge multiple versions.yml files.""" +import yaml import platform from textwrap import dedent -import yaml - def _make_versions_html(versions): """Generate a tabular HTML output of all versions for MultiQC.""" diff --git a/modules/nf-core/custom/matrixfilter/meta.yml b/modules/nf-core/custom/matrixfilter/meta.yml index bb23fee7..337af6d6 100644 --- a/modules/nf-core/custom/matrixfilter/meta.yml +++ b/modules/nf-core/custom/matrixfilter/meta.yml @@ -7,6 +7,7 @@ keywords: tools: - "matrixfilter": description: "filter a matrix based on a minimum value and numbers of samples" + tool_dev_url: "https://github.com/nf-core/modules/blob/master/modules/nf-core/custom/matrixfilter/main.nf" input: - meta: diff --git a/modules/nf-core/custom/tabulartogseacls/meta.yml b/modules/nf-core/custom/tabulartogseacls/meta.yml index 06491345..20b0122b 100644 --- a/modules/nf-core/custom/tabulartogseacls/meta.yml +++ b/modules/nf-core/custom/tabulartogseacls/meta.yml @@ -6,8 +6,9 @@ keywords: - convert - tabular tools: - - "custom": + - custom: description: "Make a GSEA class file (.cls) from tabular inputs" + tool_dev_url: "https://github.com/nf-core/modules/blob/master/modules/nf-core/custom/tabulartogseacls/main.nf" input: - meta: diff --git a/modules/nf-core/custom/tabulartogseagct/meta.yml b/modules/nf-core/custom/tabulartogseagct/meta.yml index 8521f310..5de072e0 100644 --- a/modules/nf-core/custom/tabulartogseagct/meta.yml +++ b/modules/nf-core/custom/tabulartogseagct/meta.yml @@ -4,8 +4,9 @@ keywords: - gsea - gct tools: - - "tabulartogseagct": + - tabulartogseagct: description: "Convert a TSV or CSV with features by row and observations by column to a GCT format file as consumed by GSEA" + tool_dev_url: "https://github.com/nf-core/modules/blob/master/modules/nf-core/custom/tabulartogseagct/main.nf" input: - meta: diff --git a/modules/nf-core/deseq2/differential/main.nf b/modules/nf-core/deseq2/differential/main.nf index 303fbdbd..fe6d53d1 100644 --- a/modules/nf-core/deseq2/differential/main.nf +++ b/modules/nf-core/deseq2/differential/main.nf @@ -8,7 +8,8 @@ process DESEQ2_DIFFERENTIAL { 'quay.io/biocontainers/bioconductor-deseq2:1.34.0--r41hc247a5b_3' }" input: - tuple val(meta), path(samplesheet), path(counts) + tuple val(meta), val(contrast_variable), val(reference), val(target) + tuple val(meta2), path(samplesheet), path(counts) tuple val(control_genes_meta), path(control_genes_file) output: diff --git a/modules/nf-core/deseq2/differential/meta.yml b/modules/nf-core/deseq2/differential/meta.yml index 899ee71a..a32c3cb8 100644 --- a/modules/nf-core/deseq2/differential/meta.yml +++ b/modules/nf-core/deseq2/differential/meta.yml @@ -16,6 +16,33 @@ tools: licence: "['LGPL >=3']" input: + - meta: + type: map + description: | + Groovy Map containing contrast information. This can be used at the + workflow level to pass optional parameters to the module, e.g. + [ id:'contrast1', blocking:'patient' ] passed in as ext.args like: + '--blocking_variable $meta.blocking'. + - contrast_variable: + type: string + description: | + The column in the sample sheet that should be used to define groups for + comparison + - reference: + type: string + description: | + The value within the contrast_variable column of the sample sheet that + should be used to derive the reference samples + - target: + type: string + description: | + The value within the contrast_variable column of the sample sheet that + should be used to derive the target samples + - meta2: + type: map + description: | + Groovy map containing study-wide metadata related to the sample sheet + and matrix - sample: type: file description: | @@ -23,14 +50,8 @@ input: - counts: type: file description: | - Raw TSV or CSV format expression matrix as output from the nf-core RNA-seq workflow - - meta: - type: map - description: | - Groovy Map containing contrast information, which can be referred to in - calls at the pipeline level e.g. [ variable:'treatment', reference:'treated', - control:'saline', blocking:'' ] passed in as ext.args like: '--reference_level - $meta.reference --treatment_level $meta.target' + Raw TSV or CSV format expression matrix as output from the nf-core + RNA-seq workflow - control_genes: type: file description: | diff --git a/modules/nf-core/deseq2/differential/templates/deseq_de.R b/modules/nf-core/deseq2/differential/templates/deseq_de.R index 7df4ab3d..f1fcd847 100755 --- a/modules/nf-core/deseq2/differential/templates/deseq_de.R +++ b/modules/nf-core/deseq2/differential/templates/deseq_de.R @@ -95,14 +95,17 @@ round_dataframe_columns <- function(df, columns = NULL, digits = 8){ opt <- list( count_file = '$counts', sample_file = '$samplesheet', - contrast_variable = NULL, - reference_level = NULL, - treatment_level = NULL, + contrast_variable = '$contrast_variable', + reference_level = '$reference', + target_level = '$target', blocking_variables = NULL, control_genes_file = '$control_genes_file', sizefactors_from_controls = FALSE, gene_id_col = "gene_id", sample_id_col = "experiment_accession", + subset_to_contrast_samples = FALSE, + exclude_samples_col = NULL, + exclude_samples_values = NULL, test = "Wald", fit_type = "parametric", sf_type = 'ratio', @@ -140,7 +143,7 @@ for ( ao in names(args_opt)){ # Check if required parameters have been provided -required_opts <- c('contrast_variable', 'reference_level', 'treatment_level') +required_opts <- c('contrast_variable', 'reference_level', 'target_level') missing <- required_opts[unlist(lapply(opt[required_opts], is.null)) | ! required_opts %in% names(opt)] if (length(missing) > 0){ @@ -254,6 +257,33 @@ if (!contrast_variable %in% colnames(sample.sheet)) { } } +# Optionally, subset to only the samples involved in the contrast + +if (opt\$subset_to_contrast_samples){ + sample_selector <- sample.sheet[[contrast_variable]] %in% c(opt\$target_level, opt\$reference_level) + selected_samples <- sample.sheet[sample_selector, opt\$sample_id_col] + count.table <- count.table[, selected_samples] + sample.sheet <- sample.sheet[selected_samples, ] +} + +# Optionally, remove samples with specified values in a given field (probably +# don't use this as well as the above) + +if ((! is.null(opt\$exclude_samples_col)) && (! is.null(opt\$exclude_samples_values))){ + exclude_values = unlist(strsplit(opt\$exclude_samples_values, split = ';')) + + if (! opt\$exclude_samples_col %in% colnames(sample.sheet)){ + stop(paste(opt\$exclude_samples_col, ' specified to subset samples is not a valid sample sheet column')) + } + + print(paste0('Excluding samples with values of ', opt\$exclude_samples_values, ' in ', opt\$exclude_samples_col)) + sample_selector <- ! sample.sheet[[opt\$exclude_samples_col]] %in% exclude_values + + selected_samples <- sample.sheet[sample_selector, opt\$sample_id_col] + count.table <- count.table[, selected_samples] + sample.sheet <- sample.sheet[selected_samples, ] +} + # Now specify the model. Use cell-means style so we can be explicit with the # contrasts @@ -269,7 +299,8 @@ for (v in c(blocking.vars, contrast_variable)) { sample.sheet[[v]] <- as.factor(sample.sheet[[v]]) } -# Variable of interest goes last, see https://bioconductor.org/packages/release/bioc/vignettes/DESeq2/inst/doc/DESeq2.html#multi-factor-designs +# Variable of interest goes last, see +# https://bioconductor.org/packages/release/bioc/vignettes/DESeq2/inst/doc/DESeq2.html#multi-factor-designs model <- paste(model, contrast_variable, sep = ' + ') @@ -318,7 +349,7 @@ comp.results <- minmu = opt\$minmu, contrast = c( contrast_variable, - c(opt\$treatment_level, opt\$reference_level) + c(opt\$target_level, opt\$reference_level) ) ) @@ -327,7 +358,7 @@ if (opt\$shrink_lfc){ type = 'ashr', contrast = c( contrast_variable, - c(opt\$treatment_level, opt\$reference_level) + c(opt\$target_level, opt\$reference_level) ) ) } @@ -338,12 +369,12 @@ if (opt\$shrink_lfc){ ################################################ ################################################ -prefix_part_names <- c('contrast_variable', 'reference_level', 'treatment_level', 'blocking_variables') +prefix_part_names <- c('contrast_variable', 'reference_level', 'target_level', 'blocking_variables') prefix_parts <- unlist(lapply(prefix_part_names, function(x) gsub("[^[:alnum:]]", "_", opt[[x]]))) output_prefix <- paste(prefix_parts[prefix_parts != ''], collapse = '-') contrast.name <- - paste(opt\$treatment_level, opt\$reference_level, sep = "_vs_") + paste(opt\$target_level, opt\$reference_level, sep = "_vs_") cat("Saving results for ", contrast.name, " ...\n", sep = "") # Differential expression table- note very limited rounding for consistency of diff --git a/modules/nf-core/gunzip/meta.yml b/modules/nf-core/gunzip/meta.yml index 4d2ebc84..2e0e4054 100644 --- a/modules/nf-core/gunzip/meta.yml +++ b/modules/nf-core/gunzip/meta.yml @@ -5,29 +5,29 @@ keywords: - compression tools: - gunzip: - description: | - gzip is a file format and a software application used for file compression and decompression. - documentation: https://www.gnu.org/software/gzip/manual/gzip.html - licence: ["GPL-3.0-or-later"] + description: | + gzip is a file format and a software application used for file compression and decompression. + documentation: https://www.gnu.org/software/gzip/manual/gzip.html + licence: ["GPL-3.0-or-later"] input: - meta: - type: map - description: | - Optional groovy Map containing meta information - e.g. [ id:'test', single_end:false ] + type: map + description: | + Optional groovy Map containing meta information + e.g. [ id:'test', single_end:false ] - archive: - type: file - description: File to be compressed/uncompressed - pattern: "*.*" + type: file + description: File to be compressed/uncompressed + pattern: "*.*" output: - gunzip: - type: file - description: Compressed/uncompressed file - pattern: "*.*" + type: file + description: Compressed/uncompressed file + pattern: "*.*" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@joseespinosa" - "@drpatelh" diff --git a/modules/nf-core/limma/differential/main.nf b/modules/nf-core/limma/differential/main.nf index 216200ba..1f3f47f8 100644 --- a/modules/nf-core/limma/differential/main.nf +++ b/modules/nf-core/limma/differential/main.nf @@ -8,7 +8,8 @@ process LIMMA_DIFFERENTIAL { 'quay.io/biocontainers/bioconductor-limma:3.54.0--r42hc0cfd56_0' }" input: - tuple val(meta), path(samplesheet), path(intensities) + tuple val(meta), val(contrast_variable), val(reference), val(target) + tuple val(meta2), path(samplesheet), path(intensities) output: tuple val(meta), path("*.limma.results.tsv") , emit: results diff --git a/modules/nf-core/limma/differential/meta.yml b/modules/nf-core/limma/differential/meta.yml index 99f0bb0d..c3f5a64a 100644 --- a/modules/nf-core/limma/differential/meta.yml +++ b/modules/nf-core/limma/differential/meta.yml @@ -19,10 +19,30 @@ input: - meta: type: map description: | - Groovy Map containing contrast information, which can be referred to in - calls at the pipeline level e.g. [ variable:'treatment', reference:'treated', - control:'saline', blocking:'' ] passed in as ext.args like: '--reference_level - $meta.reference --treatment_level $meta.target' + Groovy Map containing contrast information. This can be used at the + workflow level to pass optional parameters to the module, e.g. + [ id:'contrast1', blocking:'patient' ] passed in as ext.args like: + '--blocking_variable $meta.blocking'. + - contrast_variable: + type: string + description: | + The column in the sample sheet that should be used to define groups for + comparison + - reference: + type: string + description: | + The value within the contrast_variable column of the sample sheet that + should be used to derive the reference samples + - target: + type: string + description: | + The value within the contrast_variable column of the sample sheet that + should be used to derive the target samples + - meta2: + type: map + description: | + Groovy map containing study-wide metadata related to the sample sheet + and matrix - samplesheeet: type: file description: | diff --git a/modules/nf-core/limma/differential/templates/limma_de.R b/modules/nf-core/limma/differential/templates/limma_de.R index 2c7bf3bb..47d0424f 100755 --- a/modules/nf-core/limma/differential/templates/limma_de.R +++ b/modules/nf-core/limma/differential/templates/limma_de.R @@ -67,12 +67,15 @@ read_delim_flexible <- function(file, header = TRUE, row.names = NULL, check.nam opt <- list( count_file = '$intensities', sample_file = '$samplesheet', - contrast_variable = NULL, - reference_level = NULL, - treatment_level = NULL, + contrast_variable = '$contrast_variable', + reference_level = '$reference', + target_level = '$target', blocking_variables = NULL, probe_id_col = "probe_id", sample_id_col = "experiment_accession", + subset_to_contrast_samples = FALSE, + exclude_samples_col = NULL, + exclude_samples_values = NULL, ndups = NULL, # lmFit spacing = NULL, # lmFit block = NULL, # lmFit @@ -108,7 +111,7 @@ for ( ao in names(args_opt)){ # Check if required parameters have been provided -required_opts <- c('contrast_variable', 'reference_level', 'treatment_level') +required_opts <- c('contrast_variable', 'reference_level', 'target_level') missing <- required_opts[unlist(lapply(opt[required_opts], is.null)) | ! required_opts %in% names(opt)] if (length(missing) > 0){ @@ -182,7 +185,7 @@ if (length(missing_samples) > 0) { )) } else{ # Save any non-count data, will gene metadata etc we might need later - noncount.table <- + nonintensities.table <- intensities.table[, !colnames(intensities.table) %in% rownames(sample.sheet), drop = FALSE] intensities.table <- intensities.table[, rownames(sample.sheet)] } @@ -207,7 +210,7 @@ if (!contrast_variable %in% colnames(sample.sheet)) { } else if (any(!c(opt\$reflevel, opt\$treatlevel) %in% sample.sheet[[contrast_variable]])) { stop( paste( - 'Please choose reference and treatment levels that are present in the', + 'Please choose reference and target levels that are present in the', contrast_variable, 'column of the sample sheet' ) @@ -224,6 +227,32 @@ if (!contrast_variable %in% colnames(sample.sheet)) { ) } } +# Optionally, subset to only the samples involved in the contrast + +if (opt\$subset_to_contrast_samples){ + sample_selector <- sample.sheet[[contrast_variable]] %in% c(opt\$target_level, opt\$reference_level) + selected_samples <- sample.sheet[sample_selector, opt\$sample_id_col] + intensities.table <- intensities.table[, selected_samples] + sample.sheet <- sample.sheet[selected_samples, ] +} + +# Optionally, remove samples with specified values in a given field (probably +# don't use this as well as the above) + +if ((! is.null(opt\$exclude_samples_col)) && (! is.null(opt\$exclude_samples_values))){ + exclude_values = unlist(strsplit(opt\$exclude_samples_values, split = ';')) + + if (! opt\$exclude_samples_col %in% colnames(sample.sheet)){ + stop(paste(opt\$exclude_samples_col, ' specified to subset samples is not a valid sample sheet column')) + } + + print(paste0('Excluding samples with values of ', opt\$exclude_samples_values, ' in ', opt\$exclude_samples_col)) + sample_selector <- ! sample.sheet[[opt\$exclude_samples_col]] %in% exclude_values + + selected_samples <- sample.sheet[sample_selector, opt\$sample_id_col] + intensities.table <- intensities.table[, selected_samples] + sample.sheet <- sample.sheet[selected_samples, ] +} # Now specify the model. Use cell-means style so we can be explicit with the # contrasts @@ -277,7 +306,7 @@ if (! is.null(opt\$correlation)){ fit <- do.call(lmFit, lmfit_args) # Contrasts bit -contrast <- paste(paste(contrast_variable, c(opt\$treatment_level, opt\$reference_level), sep='.'), collapse='-') +contrast <- paste(paste(contrast_variable, c(opt\$target_level, opt\$reference_level), sep='.'), collapse='-') contrast.matrix <- makeContrasts(contrasts=contrast, levels=design) fit2 <- contrasts.fit(fit, contrast.matrix) @@ -311,12 +340,12 @@ comp.results <- do.call(topTable, toptable_args)[rownames(intensities.table),] ################################################ ################################################ -prefix_part_names <- c('contrast_variable', 'reference_level', 'treatment_level', 'blocking_variables') +prefix_part_names <- c('contrast_variable', 'reference_level', 'target_level', 'blocking_variables') prefix_parts <- unlist(lapply(prefix_part_names, function(x) gsub("[^[:alnum:]]", "_", opt[[x]]))) output_prefix <- paste(prefix_parts[prefix_parts != ''], collapse = '-') contrast.name <- - paste(opt\$treatment_level, opt\$reference_level, sep = "_vs_") + paste(opt\$target_level, opt\$reference_level, sep = "_vs_") cat("Saving results for ", contrast.name, " ...\n", sep = "") # Differential expression table- note very limited rounding for consistency of diff --git a/modules/nf-core/rmarkdownnotebook/meta.yml b/modules/nf-core/rmarkdownnotebook/meta.yml index 8d0f9d28..08336169 100644 --- a/modules/nf-core/rmarkdownnotebook/meta.yml +++ b/modules/nf-core/rmarkdownnotebook/meta.yml @@ -10,7 +10,7 @@ tools: homepage: https://rmarkdown.rstudio.com/ documentation: https://rmarkdown.rstudio.com/lesson-1.html tool_dev_url: https://github.com/rstudio/rmarkdown - doi: "" + licence: GPL-3 params: diff --git a/modules/nf-core/shinyngs/app/main.nf b/modules/nf-core/shinyngs/app/main.nf new file mode 100644 index 00000000..b5c51234 --- /dev/null +++ b/modules/nf-core/shinyngs/app/main.nf @@ -0,0 +1,59 @@ + +process SHINYNGS_APP { + tag "$meta.id" + label 'process_single' + + // To be able to pass the necessary secrets for shinyapps.io deployment, + // this process must be configured by placing a statement like the + // following in the nextflow.config: + // + // withName: SHINYNGS_APP { + // secret = [ 'SHINYAPPS_TOKEN', 'SHINYAPPS_SECRET' ] + // } + // + // Those values must then be set in your Nextflow secrets. + + conda "bioconda::r-shinyngs=1.7.1" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/r-shinyngs:1.7.1--r42hdfd78af_1': + 'quay.io/biocontainers/r-shinyngs:1.7.1--r42hdfd78af_1' }" + + input: + tuple val(meta), path(sample), path(feature_meta), path(assay_files) // Experiment-level info + tuple val(meta2), path(contrasts), path(differential_results) // Differential info: contrasts and differential stats + val(contrast_stats_assay) + + output: + tuple val(meta), path("*/data.rds"), path("*/app.R") , emit: app + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // For full list of available args see + // https://github.com/pinin4fjords/shinyngs/blob/develop/exec/make_app_from_files.R + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: meta.id + + """ + cp $feature_meta fixed_$feature_meta + sed -i.bak s/${params.features_name_col}/gene_name/ fixed_$feature_meta + + make_app_from_files.R \\ + --sample_metadata $sample \\ + --feature_metadata fixed_$feature_meta \\ + --assay_files ${assay_files.join(',')} \\ + --contrast_file $contrasts \\ + --contrast_stats_assay $contrast_stats_assay \\ + --differential_results ${differential_results.join(',')} \\ + --output_dir $prefix \\ + $args \\ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + r-base: \$(echo \$(R --version 2>&1) | sed 's/^.*R version //; s/ .*\$//') + r-shinyngs: \$(Rscript -e "library(shinyngs); cat(as.character(packageVersion('shinyngs')))") + END_VERSIONS + """ +} diff --git a/modules/nf-core/shinyngs/app/meta.yml b/modules/nf-core/shinyngs/app/meta.yml new file mode 100644 index 00000000..a695351d --- /dev/null +++ b/modules/nf-core/shinyngs/app/meta.yml @@ -0,0 +1,72 @@ +name: "shinyngs_app" +description: build and deploy Shiny apps for interactively mining differential abundance data +keywords: + - differential + - expression + - rna-seq + - deseq2 + +tools: + - "shinyngs": + description: "Provides Shiny applications for various array and NGS applications. Currently very RNA-seq centric, with plans for expansion." + homepage: "https://github.com/pinin4fjords/shinyngs" + documentation: "https://rawgit.com/pinin4fjords/shinyngs/master/vignettes/shinyngs.html" + tool_dev_url: "https://github.com/pinin4fjords/shinyngs" + licence: "['AGPL v3']" + +input: + - meta: + type: map + description: | + Groovy Map containing information on experiment, at a minimum an id. + e.g. [ id:'test' ] + - meta2: + type: map + description: | + Groovy Map containing information on experiment, at a minimum an id. To match meta. + e.g. [ id:'test' ] + - sample: + type: file + description: | + CSV-format sample sheet with sample metadata + - feature_meta: + type: file + description: | + TSV-format feature (e.g. gene) metadata + - assay_files: + type: list + description: | + List of TSV-format matrix files representing different measures for the same samples (e.g. raw and normalised). + - contrasts: + type: file + description: | + CSV-format file with four columns identifying the sample sheet variable, reference level, treatment level, and optionally a comma-separated list of covariates used as blocking factors. + - differential_results: + type: list + description: | + List of TSV-format differential analysis outputs, one per row of the contrasts file + +output: + - meta: + type: map + description: | + Groovy Map containing information on experiment. + e.g. [ id:'test' ] + - data: + type: file + description: | + A shinyngs ExploratorySummarizedExperiment + object serialized with saveRDS(). + pattern: "app/data.rds" + - app: + type: file + description: | + The mini R script required build an application from data.rds. + pattern: "app/app.R" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@pinin4fjords" diff --git a/modules/nf-core/shinyngs/app/shinyngs-app.diff b/modules/nf-core/shinyngs/app/shinyngs-app.diff new file mode 100644 index 00000000..bf8a864a --- /dev/null +++ b/modules/nf-core/shinyngs/app/shinyngs-app.diff @@ -0,0 +1,19 @@ +Changes in module 'nf-core/shinyngs/app' +--- modules/nf-core/shinyngs/app/main.nf ++++ modules/nf-core/shinyngs/app/main.nf +@@ -37,9 +37,12 @@ + def prefix = task.ext.prefix ?: meta.id + + """ ++ cp $feature_meta fixed_$feature_meta ++ sed -i.bak s/${params.features_name_col}/gene_name/ fixed_$feature_meta ++ + make_app_from_files.R \\ + --sample_metadata $sample \\ +- --feature_metadata $feature_meta \\ ++ --feature_metadata fixed_$feature_meta \\ + --assay_files ${assay_files.join(',')} \\ + --contrast_file $contrasts \\ + --contrast_stats_assay $contrast_stats_assay \\ + +************************************************************ diff --git a/modules/nf-core/shinyngs/staticdifferential/main.nf b/modules/nf-core/shinyngs/staticdifferential/main.nf index b9bfcf10..bef46399 100644 --- a/modules/nf-core/shinyngs/staticdifferential/main.nf +++ b/modules/nf-core/shinyngs/staticdifferential/main.nf @@ -2,10 +2,10 @@ process SHINYNGS_STATICDIFFERENTIAL { tag "$meta.id" label 'process_single' - conda "bioconda::r-shinyngs=1.5.6" + conda "bioconda::r-shinyngs=1.7.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/r-shinyngs:1.5.6--r42hdfd78af_0': - 'quay.io/biocontainers/r-shinyngs:1.5.6--r42hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/r-shinyngs:1.7.1--r42hdfd78af_1': + 'quay.io/biocontainers/r-shinyngs:1.7.1--r42hdfd78af_1' }" input: tuple val(meta), path(differential_result) // Differential info: contrast and differential stats diff --git a/modules/nf-core/shinyngs/staticexploratory/main.nf b/modules/nf-core/shinyngs/staticexploratory/main.nf index b81a5e07..c1572087 100644 --- a/modules/nf-core/shinyngs/staticexploratory/main.nf +++ b/modules/nf-core/shinyngs/staticexploratory/main.nf @@ -2,10 +2,10 @@ process SHINYNGS_STATICEXPLORATORY { tag "$meta.id" label 'process_single' - conda "bioconda::r-shinyngs=1.5.6" + conda "bioconda::r-shinyngs=1.7.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/r-shinyngs:1.5.6--r42hdfd78af_0': - 'quay.io/biocontainers/r-shinyngs:1.5.6--r42hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/r-shinyngs:1.7.1--r42hdfd78af_1': + 'quay.io/biocontainers/r-shinyngs:1.7.1--r42hdfd78af_1' }" input: tuple val(meta), path(sample), path(feature_meta), path(assay_files) diff --git a/modules/nf-core/shinyngs/validatefomcomponents/main.nf b/modules/nf-core/shinyngs/validatefomcomponents/main.nf index b845f288..7a488b2e 100644 --- a/modules/nf-core/shinyngs/validatefomcomponents/main.nf +++ b/modules/nf-core/shinyngs/validatefomcomponents/main.nf @@ -2,10 +2,10 @@ process SHINYNGS_VALIDATEFOMCOMPONENTS { tag "$sample" label 'process_single' - conda "bioconda::r-shinyngs=1.5.6" + conda "bioconda::r-shinyngs=1.7.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/r-shinyngs:1.5.6--r42hdfd78af_0': - 'quay.io/biocontainers/r-shinyngs:1.5.6--r42hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/r-shinyngs:1.7.1--r42hdfd78af_1': + 'quay.io/biocontainers/r-shinyngs:1.7.1--r42hdfd78af_1' }" input: tuple val(meta), path(sample), path(assay_files) diff --git a/nextflow.config b/nextflow.config index 8afb243a..59098513 100644 --- a/nextflow.config +++ b/nextflow.config @@ -18,10 +18,15 @@ params { matrix = null control_features = null sizefactors_from_controls = null - report_file = "${projectDir}/assets/differentialabundance_report.Rmd" + + // Reporting logo_file = "${projectDir}/docs/images/nf-core-differentialabundance_logo_light.png" css_file = "${projectDir}/assets/nf-core_style.css" citations_file = "${projectDir}/CITATIONS.md" + report_file = "${projectDir}/assets/differentialabundance_report.Rmd" + report_title = null + report_author = null + report_description = null // Sample sheet options observations_type = 'sample' @@ -65,18 +70,19 @@ params { exploratory_palette_name = 'Set1' // Differential options - differential_file_suffix = ".deseq2.results.tsv" - differential_feature_id_column = "gene_id" - differential_feature_name_column = "gene_name" - differential_fc_column = "log2FoldChange" - differential_pval_column = "pvalue" - differential_qval_column = "padj" - differential_min_fold_change = 2 - differential_max_pval = 1 - differential_max_qval = 0.05 - differential_foldchanges_logged = true - differential_palette_name = 'Set1' - + differential_file_suffix = ".deseq2.results.tsv" + differential_feature_id_column = "gene_id" + differential_feature_name_column = "gene_name" + differential_fc_column = "log2FoldChange" + differential_pval_column = "pvalue" + differential_qval_column = "padj" + differential_min_fold_change = 2 + differential_max_pval = 1 + differential_max_qval = 0.05 + differential_foldchanges_logged = true + differential_palette_name = 'Set1' + differential_subset_to_contrast_samples = false + // DESeq2-specific options deseq2_test = "Wald" deseq2_fit_type = "parametric" @@ -135,6 +141,17 @@ params { gsea_gene_sets = null + // ShinyNGS + shinyngs_build_app = true + shinyngs_guess_unlog_matrices = true + + // Note: for shinyapps deployment, in addition to setting these values, + // SHINYAPPS_TOKEN and SHINYAPPS_SECRET must be available to the + // environment, probably via Nextflow secrets + shinyngs_deploy_to_shinyapps_io = false + shinyngs_shinyapps_account = null + shinyngs_shinyapps_app_name = null + // References genome = null igenomes_base = 's3://ngi-igenomes/igenomes' @@ -313,7 +330,7 @@ manifest { description = 'Differential abundance analysis' mainScript = 'main.nf' nextflowVersion = '!>=22.10.1' - version = '1.1.1' + version = '1.2.0' doi = '10.5281/zenodo.7568000' } diff --git a/nextflow_schema.json b/nextflow_schema.json index afaa6b42..d489a034 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -410,6 +410,11 @@ "help_text": "Check the content of `RColorBrewer::brewer.pal.info` from an R terminal for valid palette names.", "description": "Valid R palette name", "fa_icon": "fas fa-palette" + }, + "differential_subset_to_contrast_samples": { + "type": "boolean", + "fa_icon": "far fa-object-group", + "description": "In differential analysis (DEseq2 or Limma), subset to the contrast samples before modelling variance?" } }, "required": [ @@ -778,12 +783,51 @@ "gsea_gene_sets": { "type": "string", "default": "None", - "description": "Gene sets in GMT or GMX-format", + "description": "Gene sets in GMT or GMX-format (multiple comma-separated input files are possible)", "fa_icon": "fas fa-bars" } }, "fa_icon": "fas fa-layer-group" }, + "shiny_app_settings": { + "title": "Shiny app settings", + "type": "object", + "description": "", + "default": "", + "properties": { + "shinyngs_build_app": { + "type": "boolean", + "default": true, + "description": "Should a Shiny app be built?", + "help_text": "At a minimum this will trigger generation of files you can quickly use to spin up a shiny app locally. But you can also use the 'shinyapps' settings to deploy an app straight to shinyapps.io.", + "fa_icon": "fas fa-wrench" + }, + "shinyngs_deploy_to_shinyapps_io": { + "type": "boolean", + "description": "Should the app be deployed to shinyapps.io?", + "fa_icon": "fas fa-file-import" + }, + "shinyngs_shinyapps_account": { + "type": "string", + "default": "None", + "description": "Your shinyapps.io account name", + "fa_icon": "fas fa-user" + }, + "shinyngs_shinyapps_app_name": { + "type": "string", + "default": "None", + "description": "The name of the app to push to in your shinyapps.io account", + "fa_icon": "fas fa-file-signature" + }, + "shinyngs_guess_unlog_matrices": { + "type": "boolean", + "default": true, + "description": "Should we guess the log status of matrices and unlog for the app?", + "help_text": "In the app context, it's usually helpful if things are not in log scale, so that e.g. fold changes make some sense with respect to observed values. This flag will cause the shinyngs app-building script to make a guess based on observed values as to the log status of input matrices, and adjust the loading accordingly." + } + }, + "fa_icon": "fab fa-app-store-ios" + }, "reporting_options": { "title": "Reporting options", "type": "object", @@ -821,6 +865,24 @@ "default": "CITATIONS.md", "description": "A markdown file containing citations to include in the fiinal report", "fa_icon": "fas fa-ad" + }, + "report_title": { + "type": "string", + "default": "None", + "fa_icon": "fas fa-heading", + "description": "A title for reporting outputs" + }, + "report_author": { + "type": "string", + "default": "None", + "fa_icon": "fas fa-user-edit", + "description": "An author for reporting outputs" + }, + "report_description": { + "type": "string", + "default": "None", + "fa_icon": "fas fa-feather", + "description": "A description for reporting outputs" } }, "required": ["report_file", "logo_file", "css_file"] @@ -1051,6 +1113,9 @@ { "$ref": "#/definitions/gsea" }, + { + "$ref": "#/definitions/shiny_app_settings" + }, { "$ref": "#/definitions/reporting_options" }, diff --git a/tower.yml b/tower.yml index d7d4bf43..098d132c 100644 --- a/tower.yml +++ b/tower.yml @@ -17,3 +17,7 @@ reports: display: "DESeq2 processed counts table" "**/plots/qc/*.dispersion.png": display: "DESeq2 dispersion plot" + "**/shinyngs_app/data.rds": + display: "Data for Shiny app" + "**/shinyngs_app/app.R": + display: "Launch script for Shiny app" diff --git a/workflows/differentialabundance.nf b/workflows/differentialabundance.nf index 80096b56..f71e45b0 100644 --- a/workflows/differentialabundance.nf +++ b/workflows/differentialabundance.nf @@ -19,8 +19,8 @@ if (params.input) { ch_input = Channel.of([ exp_meta, params.input ]) } else { e if (params.study_type == 'affy_array'){ if (params.affy_cel_files_archive) { ch_celfiles = Channel.of([ exp_meta, file(params.affy_cel_files_archive, checkIfExists: true) ]) - } else { - exit 1, 'CEL files archive not specified!' + } else { + error("CEL files archive not specified!") } } else{ @@ -30,13 +30,20 @@ if (params.study_type == 'affy_array'){ matrix_file = file(params.matrix, checkIfExists: true) ch_in_raw = Channel.of([ exp_meta, matrix_file]) } else { - exit 1, 'Input matrix not specified!' + error("Input matrix not specified!") } } // Check optional parameters if (params.control_features) { ch_control_features = file(params.control_features, checkIfExists: true) } else { ch_control_features = [[],[]] } -if (params.gsea_run) { gene_sets_file = file(params.gsea_gene_sets, checkIfExists: true) } else { gene_sets_file = [] } +if (params.gsea_run) { + if (params.gsea_gene_sets){ + gene_sets_files = params.gsea_gene_sets.split(",") + ch_gene_sets = Channel.of(gene_sets_files).map { file(it, checkIfExists: true) } + } else { + error("GSEA activated but gene set file not specified!") + } +} report_file = file(params.report_file, checkIfExists: true) logo_file = file(params.logo_file, checkIfExists: true) @@ -70,6 +77,7 @@ include { TABULAR_TO_GSEA_CHIP } from '../modules/local/tabular_to_gsea_chip' include { GUNZIP as GUNZIP_GTF } from '../modules/nf-core/gunzip/main' include { UNTAR } from '../modules/nf-core/untar/main.nf' include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' +include { SHINYNGS_APP } from '../modules/nf-core/shinyngs/app/main' include { SHINYNGS_STATICEXPLORATORY as PLOT_EXPLORATORY } from '../modules/nf-core/shinyngs/staticexploratory/main' include { SHINYNGS_STATICDIFFERENTIAL as PLOT_DIFFERENTIAL } from '../modules/nf-core/shinyngs/staticdifferential/main' include { SHINYNGS_VALIDATEFOMCOMPONENTS as VALIDATOR } from '../modules/nf-core/shinyngs/validatefomcomponents/main' @@ -223,7 +231,7 @@ workflow DIFFERENTIALABUNDANCE { if (!it.id){ it.id = it.values().join('_') } - it + tuple(it, it.variable, it.reference, it.target) } // Firstly Filter the input matrix @@ -235,16 +243,15 @@ workflow DIFFERENTIALABUNDANCE { // Prepare inputs for differential processes - ch_differential_inputs = ch_contrasts.combine( - VALIDATOR.out.sample_meta - .join(CUSTOM_MATRIXFILTER.out.filtered) // -> meta, samplesheet, filtered matrix - .map{ it.tail() } - ) + ch_samples_and_matrix = VALIDATOR.out.sample_meta + .join(CUSTOM_MATRIXFILTER.out.filtered) // -> meta, samplesheet, filtered matrix + .first() if (params.study_type == 'affy_array'){ LIMMA_DIFFERENTIAL ( - ch_differential_inputs + ch_contrasts, + ch_samples_and_matrix ) ch_differential = LIMMA_DIFFERENTIAL.out.results @@ -261,7 +268,8 @@ workflow DIFFERENTIALABUNDANCE { // annotations DESEQ2_DIFFERENTIAL ( - ch_differential_inputs, + ch_contrasts, + ch_samples_and_matrix, ch_control_features ) @@ -291,8 +299,6 @@ workflow DIFFERENTIALABUNDANCE { // changes/ p values from DESeq2 if (params.gsea_run){ - - ch_gene_sets = Channel.from(gene_sets_file) // For GSEA, we need to convert normalised counts to a GCT format for // input, and process the sample sheet to generate class definitions @@ -300,16 +306,23 @@ workflow DIFFERENTIALABUNDANCE { CUSTOM_TABULARTOGSEAGCT ( ch_norm ) - ch_contrasts_and_samples = ch_contrasts.combine( VALIDATOR.out.sample_meta.map { it[1] } ) + // TODO: update CUSTOM_TABULARTOGSEACLS for value channel input per new + // guidlines (rather than meta usage employed here) + + ch_contrasts_and_samples = ch_contrasts + .map{it[0]} // revert back to contrasts meta map + .combine( VALIDATOR.out.sample_meta.map { it[1] } ) + CUSTOM_TABULARTOGSEACLS(ch_contrasts_and_samples) TABULAR_TO_GSEA_CHIP( VALIDATOR.out.feature_meta.map{ it[1] }, [params.features_id_col, params.features_name_col] ) - + // The normalised matrix does not always have a contrast meta, so we // need a combine rather than a join here + // Also add file name to metamap for easy access from modules.config ch_gsea_inputs = CUSTOM_TABULARTOGSEAGCT.out.gct .map{ it.tail() } @@ -342,7 +355,7 @@ workflow DIFFERENTIALABUNDANCE { ch_contrast_variables = ch_contrasts .map{ - [ "id": it.variable ] + [ "id": it[1] ] } .unique() @@ -410,6 +423,31 @@ workflow DIFFERENTIALABUNDANCE { ) } + if (params.shinyngs_build_app){ + + // Make (and optionally deploy) the shinyngs app + + // Make a new contrasts file from the differential metas to guarantee the + // same order as the differential results + + ch_app_differential = ch_differential.first().map{it[0].keySet().join(',')} + .concat( + ch_differential.map{it[0].values().join(',')} + ) + .collectFile(name: 'contrasts.csv', newLine: true, sort: false) + .map{ + tuple(exp_meta, it) + } + .combine(ch_differential.map{it[1]}.collect().map{[it]}) + + SHINYNGS_APP( + ch_all_matrices, // meta, samples, features, [ matrices ] + ch_app_differential, // meta, contrasts, [differential results] + params.exploratory_assay_names.split(',').findIndexOf { it == params.exploratory_final_assay } + 1 + ) + ch_versions = ch_versions.mix(SHINYNGS_APP.out.versions) + } + // Make a params list - starting with the input matrices and the relevant // params to use in reporting @@ -419,9 +457,9 @@ workflow DIFFERENTIALABUNDANCE { // Condition params reported on study type - def params_pattern = ~/^(study|observations|features|filtering|exploratory|differential|deseq2|gsea).*/ + def params_pattern = ~/^(report|study|observations|features|filtering|exploratory|differential|deseq2|gsea).*/ if (params.study_type == 'affy_array'){ - params_pattern = ~/^(study|observations|features|filtering|exploratory|differential|affy|limma|gsea).*/ + params_pattern = ~/^(report|study|observations|features|filtering|exploratory|differential|affy|limma|gsea).*/ } ch_report_params = ch_report_input_files