From d45c4aa0f268ee9178f8634689e21868063f13aa Mon Sep 17 00:00:00 2001 From: bjlang <> Date: Fri, 26 Jul 2024 15:57:01 +0200 Subject: [PATCH 1/6] Allow caching for -resume --- workflows/differentialabundance.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/differentialabundance.nf b/workflows/differentialabundance.nf index 020c0747..a7aae2bd 100644 --- a/workflows/differentialabundance.nf +++ b/workflows/differentialabundance.nf @@ -262,7 +262,7 @@ workflow DIFFERENTIALABUNDANCE { } ch_features = ch_features_matrix .map{ meta, matrix -> - matrix.copyTo(matrix_as_anno_filename) + matrix.mklink(options = ['overwrite':true], matrix_as_anno_filename) [ meta, file(matrix_as_anno_filename) ] } } From 20c6cc6f849f35a683e2c693387fa4db538eef6d Mon Sep 17 00:00:00 2001 From: bjlang <> Date: Fri, 26 Jul 2024 16:26:13 +0200 Subject: [PATCH 2/6] Use recommended format for optional parameters --- workflows/differentialabundance.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/differentialabundance.nf b/workflows/differentialabundance.nf index a7aae2bd..3145dc89 100644 --- a/workflows/differentialabundance.nf +++ b/workflows/differentialabundance.nf @@ -262,7 +262,7 @@ workflow DIFFERENTIALABUNDANCE { } ch_features = ch_features_matrix .map{ meta, matrix -> - matrix.mklink(options = ['overwrite':true], matrix_as_anno_filename) + matrix.mklink(matrix_as_anno_filename, overwrite:true) [ meta, file(matrix_as_anno_filename) ] } } From 5a3727b465603cd87560562ce0e9d76d08ef92c1 Mon Sep 17 00:00:00 2001 From: bjlang <> Date: Fri, 26 Jul 2024 17:47:12 +0200 Subject: [PATCH 3/6] Skip copying matrix if not necessary to allow resuming pipeline execution --- workflows/differentialabundance.nf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/workflows/differentialabundance.nf b/workflows/differentialabundance.nf index 3145dc89..c1f9b5b9 100644 --- a/workflows/differentialabundance.nf +++ b/workflows/differentialabundance.nf @@ -262,7 +262,8 @@ workflow DIFFERENTIALABUNDANCE { } ch_features = ch_features_matrix .map{ meta, matrix -> - matrix.mklink(matrix_as_anno_filename, overwrite:true) + matrix_copy = file(matrix_as_anno_filename) + matrix_copy.exists() && matrix.getText().md5().equals(matrix_copy.getText().md5()) ?: matrix.copyTo(matrix_as_anno_filename) [ meta, file(matrix_as_anno_filename) ] } } From 0a3927acbe4743c893529de9a983949febd03856 Mon Sep 17 00:00:00 2001 From: bjlang <> Date: Tue, 30 Jul 2024 12:01:02 +0200 Subject: [PATCH 4/6] Keep original matrix file name in the annotation copy to improve resumability --- workflows/differentialabundance.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/differentialabundance.nf b/workflows/differentialabundance.nf index c1f9b5b9..98e2772a 100644 --- a/workflows/differentialabundance.nf +++ b/workflows/differentialabundance.nf @@ -254,7 +254,7 @@ workflow DIFFERENTIALABUNDANCE { // Otherwise we can just use the matrix input; save it to the workdir so that it does not // just appear wherever the user runs the pipeline - matrix_as_anno_filename = "${workflow.workDir}/matrix_as_anno.${matrix_file.getExtension()}" + matrix_as_anno_filename = "${workflow.workDir}/${matrix_file.getBaseName()}_as_anno.${matrix_file.getExtension()}" if (params.study_type == 'maxquant'){ ch_features_matrix = ch_in_norm } else { From 05cfb3a0ed80420151b0770cc08885d14ea72e54 Mon Sep 17 00:00:00 2001 From: bjlang <> Date: Tue, 27 Aug 2024 13:38:03 +0200 Subject: [PATCH 5/6] Move main_coda logic into differentialabundance as experimental study type --- main_coda.nf | 40 -------------- nextflow.config | 4 ++ nextflow_coda.config | 55 ------------------- nextflow_schema.json | 21 ++++++- nextflow_schema_coda.json | 25 --------- subworkflows/local/correlation/main.nf | 20 +++---- subworkflows/local/differential/main.nf | 8 +-- subworkflows/local/enrichment/main.nf | 9 +-- subworkflows/local/experimental/main.nf | 15 ++--- subworkflows/local/variable_selection/main.nf | 16 ++---- workflows/differentialabundance.nf | 41 ++++++++++++-- 11 files changed, 82 insertions(+), 172 deletions(-) delete mode 100644 main_coda.nf delete mode 100644 nextflow_coda.config delete mode 100644 nextflow_schema_coda.json diff --git a/main_coda.nf b/main_coda.nf deleted file mode 100644 index c35682f5..00000000 --- a/main_coda.nf +++ /dev/null @@ -1,40 +0,0 @@ -// include { PROPR_PROPR } from '../../../modules/nf-core/propr/propr/main' -// include { PROPR_PROPD } from '../../../modules/nf-core/propr/propd/main' -// include { PROPR_GREA } from '../../../modules/nf-core/propr/grea/main' -// include { MYGENE } from '../../../modules/nf-core/mygene/main' -include { EXPERIMENTAL } from './subworkflows/local/experimental/main.nf' -include { fromSamplesheet } from 'plugin/nf-validation' - - -// These are local files from my Bachelor Thesis project, I am creating the ch_samples_and_matrix -// manually for testing but it should be be provided by the processing section of nf-core/differentialabundance -Counts_ch = Channel.fromPath(params.matrix) - -Sample_ch = Channel.fromPath(params.input) - .map{ it -> [[id: 'YMC'], it]} - -ch_samples_and_matrix = Sample_ch.combine(Counts_ch) - -// Convert the samplesheet.csv in a channel with the proper format -ch_tools = Channel.fromSamplesheet('tools') - - -// TO DO: This should be modified to run one path per default, not all -if (params.pathway == "all") { - ch_tools - .set{ ch_tools_single } -} else { - ch_tools - .filter{ - it[0]["pathway_name"] == params.pathway // TO DO: change pathway to path also in the tools_samplesheet file - } - //.view() - .set{ ch_tools_single } -} -ch_tools_single.view() - -workflow { - EXPERIMENTAL(ch_samples_and_matrix, ch_tools_single) - EXPERIMENTAL.out.output.view() -} - diff --git a/nextflow.config b/nextflow.config index 8ddd6b3e..967ab185 100644 --- a/nextflow.config +++ b/nextflow.config @@ -21,6 +21,10 @@ params { control_features = null sizefactors_from_controls = false + // Experimental analysis options + tools = null + pathway = null + // Reporting logo_file = "$projectDir/docs/images/nf-core-differentialabundance_logo_light.png" css_file = "$projectDir/assets/nf-core_style.css" diff --git a/nextflow_coda.config b/nextflow_coda.config deleted file mode 100644 index 23d475f4..00000000 --- a/nextflow_coda.config +++ /dev/null @@ -1,55 +0,0 @@ -//parametros generales, para el usuario (que se pueden arrancar poniendo los flags como el command line) - -params.tools = "./assets/tools_samplesheet.csv" -params.outdir = "../results_pipeline" -params.validationSkipDuplicateCheck = true -params.publish_dir_mode = 'copy' -params.pathway = 'all' -//params.maxRetries = 0 -includeConfig 'conf/modules.config' // now it should refer to modules_coda.config - - - -profiles { - debug { - dumpHashes = true - process.beforeScript = 'echo $HOSTNAME' - process.debug = true - cleanup = false - nextflow.enable.configProcessNamesValidation = true - } - crg { includeConfig "conf/crg.config" } -} - -// Function to ensure that resource requirements don't go beyond -// a maximum limit -def check_max(obj, type) { - if (type == 'memory') { - try { - if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) - return params.max_memory as nextflow.util.MemoryUnit - else - return obj - } catch (all) { - println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" - return obj - } - } else if (type == 'time') { - try { - if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) - return params.max_time as nextflow.util.Duration - else - return obj - } catch (all) { - println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" - return obj - } - } else if (type == 'cpus') { - try { - return Math.min( obj, params.max_cpus as int ) - } catch (all) { - println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" - return obj - } - } -} diff --git a/nextflow_schema.json b/nextflow_schema.json index 54fc1b19..f30bcaaa 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -24,7 +24,7 @@ "default": "rnaseq", "description": "A string identifying the technology used to produce the data", "help_text": "Currently 'rnaseq' or 'affy_array' may be specified.", - "enum": ["rnaseq", "affy_array", "maxquant", "geo_soft_file"], + "enum": ["rnaseq", "affy_array", "maxquant", "geo_soft_file", "experimental"], "fa_icon": "far fa-keyboard" }, "input": { @@ -58,7 +58,15 @@ "default": "counts", "description": "Type of abundance measure used, platform-dependent", "fa_icon": "fas fa-keyboard" - }, + } + } + }, + "experimental_analysis_options": { + "title": "Experimental analysis, including CoDa", + "type": "object", + "fa_icon": "fas fa-terminal", + "description": "Values required for experimental analysis", + "properties": { "tools": { "type": "string", "format": "file-path", @@ -68,6 +76,12 @@ "pattern": "^\\S+\\.(csv|tsv|yaml)$", "description": "Path to comma-separated file containing samplesheet", "help_text": "This file defines possible combinations of tools, which are to be run by the pipeline" + }, + "pathway": { + "type": "string", + "fa_icon": "fas fa-border-all", + "description": "(experimantal only): Choose a (list of) pathway from those predefined in the tool sheet", + "help_text": "Choose the a subset of pathways to run. Pathways are defined in the tool sheet." } } }, @@ -1298,6 +1312,9 @@ { "$ref": "#/definitions/input_output_options" }, + { + "$ref": "#/definitions/experimental_analysis_options" + }, { "$ref": "#/definitions/abundance_values" }, diff --git a/nextflow_schema_coda.json b/nextflow_schema_coda.json deleted file mode 100644 index cba300f4..00000000 --- a/nextflow_schema_coda.json +++ /dev/null @@ -1,25 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema", - "title": "nf-core/testpipeline pipeline parameters", - "description": "this is a test", - "type": "object", - "definitions": { - "tools_options": { - "title": "Tools options", - "type": "object", - "description": "Define where the pipeline should find input data", - "required": ["tools"], - "properties": { - "tools": { - "type": "string", - "format": "file-path", - "mimetype": "text/csv", - "schema": "assets/schema_tools.json", - "pattern": "^\\S+\\.(csv|tsv|yaml)$", - "description": "Path to comma-separated file containing samplesheet", - "help_text": "this is just a test" - } - } - } - } -} diff --git a/subworkflows/local/correlation/main.nf b/subworkflows/local/correlation/main.nf index 96a6b890..b586f369 100644 --- a/subworkflows/local/correlation/main.nf +++ b/subworkflows/local/correlation/main.nf @@ -1,8 +1,8 @@ - -// include nf-core modules +// +// Perform correlation analysis +// include {PROPR_PROPR as PROPR} from "../../../modules/nf-core/propr/propr/main.nf" - workflow CORRELATION { take: ch_counts @@ -17,30 +17,24 @@ workflow CORRELATION { [ metacounts+metatools, counts ] } .branch { - propr: it[0]["cor_method"] == "propr" + propr: it[0]["cor_method"] == "propr" } .set { ch_counts_cor } - // Hacer un branch del channel para coger las counts normales cuando no hay variable selection - + // Create a branch of the channel to retrieve the normal counts when there is no variable selection. ch_counts_cor.propr .branch{ - no_sel: it[0]["sel_method"] == null - sel: it[0]["sel_method"] != null + no_sel: it[0]["sel_method"] == null + sel: it[0]["sel_method"] != null } .set { ch_counts_selection } - //ch_counts_selection.no_sel.view() - //ch_counts_filtered.view() - ch_propr = ch_counts_filtered.mix(ch_counts_selection.no_sel) - //ch_propr.view() PROPR(ch_propr) ch_matrix = PROPR.out.matrix ch_adjacency = PROPR.out.adj - emit: matrix = ch_matrix adjacency = ch_adjacency diff --git a/subworkflows/local/differential/main.nf b/subworkflows/local/differential/main.nf index 296fe0f2..57451cd2 100644 --- a/subworkflows/local/differential/main.nf +++ b/subworkflows/local/differential/main.nf @@ -1,7 +1,8 @@ -// include modules +// +// Perform differential analysis +// include {PROPR_PROPD as PROPD} from "../../../modules/nf-core/propr/propd/main.nf" - workflow DIFFERENTIAL { take: ch_counts @@ -15,9 +16,8 @@ workflow DIFFERENTIAL { metacounts, counts, meta -> [ metacounts+meta, counts ] } - //.view() .branch { - propd: it[0]["diff_method"] == "propd" + propd: it[0]["diff_method"] == "propd" deseq2: it[0]["diff_method"] == "deseq2" } .set { ch_counts_tools } diff --git a/subworkflows/local/enrichment/main.nf b/subworkflows/local/enrichment/main.nf index a559a41b..4f3ae992 100644 --- a/subworkflows/local/enrichment/main.nf +++ b/subworkflows/local/enrichment/main.nf @@ -1,19 +1,17 @@ -// include modules - +// +// Perform enrichment analysis +// include { PROPR_GREA as GREA_DIFF } from "../../../modules/nf-core/propr/grea/main.nf" include { PROPR_GREA as GREA_COR } from "../../../modules/nf-core/propr/grea/main.nf" include { MYGENE } from "../../../modules/nf-core/mygene/main.nf" - workflow ENRICHMENT { take: ch_diff_adjacency ch_cor_adjacency ch_counts - main: - MYGENE(ch_counts) ch_gmt = MYGENE.out.gmt @@ -39,7 +37,6 @@ workflow ENRICHMENT { GREA_COR(ch_cor_grea.grea, ch_gmt.collect()) ch_enriched_cor = GREA_COR.out.enrichedGO - emit: enriched_diff = ch_enriched_diff enriched_cor = ch_enriched_cor diff --git a/subworkflows/local/experimental/main.nf b/subworkflows/local/experimental/main.nf index 6e811027..e2b1dbbf 100644 --- a/subworkflows/local/experimental/main.nf +++ b/subworkflows/local/experimental/main.nf @@ -1,5 +1,6 @@ -// include subworkflows - +// +// Run experimental analysis +// include { CORRELATION } from '../correlation/main.nf' include { DIFFERENTIAL } from '../differential/main.nf' include { VARIABLE_SELECTION } from '../variable_selection/main.nf' @@ -13,7 +14,7 @@ workflow EXPERIMENTAL { main: - // Dividir el ch_samples_and_matrix en un channel de samplesheet y otro de matrix (PROPD los coge por separado) + // Split the ch_samples_and_matrix into one channel for the samplesheet and another for the matrix (PROPD takes them separately). ch_samples_and_matrix .map { meta, samplesheet, counts -> @@ -27,7 +28,6 @@ workflow EXPERIMENTAL { [ meta, counts ] } .set { ch_counts } - // ch_counts.view() ch_counts .combine(ch_tools) @@ -41,7 +41,6 @@ workflow EXPERIMENTAL { ch_out = Channel.empty() // Perform differential analysis - DIFFERENTIAL(ch_counts, ch_tools, ch_samplesheet.collect()) ch_diff_results = DIFFERENTIAL.out.results ch_diff_adjacency = DIFFERENTIAL.out.adjacency @@ -49,17 +48,12 @@ workflow EXPERIMENTAL { // Perform variable selection ch_counts_filtered = VARIABLE_SELECTION(ch_diff_adjacency, ch_counts) - //VARIABLE_SELECTION.out.count.view() - // Perform correlation analysis CORRELATION(ch_counts, ch_tools, ch_counts_filtered) ch_matrix = CORRELATION.out.matrix ch_cor_adjacency = CORRELATION.out.adjacency ch_out.mix(ch_matrix) - //ch_diff_adjacency.view() - //ch_cor_adjacency.view() - // Perform enrichment analysis ENRICHMENT(ch_diff_adjacency, ch_cor_adjacency, ch_counts) ch_enriched_cor = ENRICHMENT.out.enriched_cor @@ -67,7 +61,6 @@ workflow EXPERIMENTAL { ch_out.mix(ch_enriched_diff, ch_enriched_cor) - emit: output = ch_out } diff --git a/subworkflows/local/variable_selection/main.nf b/subworkflows/local/variable_selection/main.nf index 71c8d6d9..22b35527 100644 --- a/subworkflows/local/variable_selection/main.nf +++ b/subworkflows/local/variable_selection/main.nf @@ -1,11 +1,12 @@ -// include modules +// +// Perform variable selection +// include { FILTERVAR } from "../../../modules/local/filtervar/main.nf" workflow VARIABLE_SELECTION { take: - ch_adj//meta_tools, adj - ch_counts //meta_id, counts - + ch_adj //meta_tools, adj + ch_counts //meta_id, counts main: ch_counts @@ -14,26 +15,19 @@ workflow VARIABLE_SELECTION { [counts] } .combine(ch_adj) - //.view() .map{ counts, meta, adj -> [ meta, counts, adj] } - //.view() .branch { filtervar: it[0]["sel_method"] == "filtervar" deseqfilter: it[0]["sel_method"] == "deseqfilter" } .set { ch_counts_adj_sel } - //ch_counts_adj_sel.nofilter.view() - - FILTERVAR(ch_counts_adj_sel.filtervar) ch_counts_cor = FILTERVAR.out.count - //ch_counts_cor.view() - emit: count = ch_counts_cor diff --git a/workflows/differentialabundance.nf b/workflows/differentialabundance.nf index 98e2772a..7a76c530 100644 --- a/workflows/differentialabundance.nf +++ b/workflows/differentialabundance.nf @@ -46,7 +46,7 @@ if (params.study_type == 'affy_array'){ // If this is not microarray data or maxquant output, and this an RNA-seq dataset, // then assume we're reading from a matrix - if (params.study_type == "rnaseq" && params.matrix) { + if (params.study_type in ["rnaseq", "experimental"] && params.matrix) { matrix_file = file(params.matrix, checkIfExists: true) ch_in_raw = Channel.of([ exp_meta, matrix_file]) } else { @@ -98,7 +98,9 @@ citations_file = file(params.citations_file, checkIfExists: true) */ include { TABULAR_TO_GSEA_CHIP } from '../modules/local/tabular_to_gsea_chip' -include { FILTER_DIFFTABLE } from '../modules/local/filter_difftable' +include { FILTER_DIFFTABLE } from '../modules/local/filter_difftable' +include { EXPERIMENTAL } from '../subworkflows/local/experimental/main.nf' + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -132,6 +134,8 @@ include { GEOQUERY_GETGEO } from '../modules/n include { ZIP as MAKE_REPORT_BUNDLE } from '../modules/nf-core/zip/main' include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { fromSamplesheet } from 'plugin/nf-validation' + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUN MAIN WORKFLOW @@ -304,7 +308,7 @@ workflow DIFFERENTIALABUNDANCE { ch_norm = VALIDATOR.out.assays } - if(params.study_type != 'rnaseq') { + if(!params.study_type in ['rnaseq', 'experimental']) { ch_matrix_for_differential = ch_norm } else{ @@ -355,8 +359,35 @@ workflow DIFFERENTIALABUNDANCE { ch_processed_matrices = ch_norm .map{ it.tail() } .first() - } - else{ + } else if (params.study_type == 'experimental') { + + ch_samples_and_matrix = ch_input.combine(ch_in_raw.map{it[1]}) + ch_samples_and_matrix.view() + + // Convert the samplesheet.csv in a channel with the proper format + ch_tools = Channel.fromSamplesheet('tools') + + // TO DO: This should be modified to run one path per default, not all + if (params.pathway == "all") { + ch_tools + .set{ ch_tools_single } + } else { + ch_tools + .filter{ + it[0]["pathway_name"] == params.pathway // TO DO: change pathway to path also in the tools_samplesheet file + } + .set{ ch_tools_single } + } + ch_tools_single.view() + + EXPERIMENTAL(ch_samples_and_matrix, ch_tools_single) + EXPERIMENTAL.out.output.view() + + ch_norm = Channel.empty() + ch_differential = Channel.empty() + ch_processed_matrices = Channel.empty() + ch_model = Channel.empty() + } else { DESEQ2_NORM ( ch_contrasts.first(), From b21894065d2adccdf6f47e31724cba6f934149c7 Mon Sep 17 00:00:00 2001 From: bjlang <> Date: Tue, 27 Aug 2024 14:53:28 +0200 Subject: [PATCH 6/6] Do not put not not in the not wrong place! --- workflows/differentialabundance.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/workflows/differentialabundance.nf b/workflows/differentialabundance.nf index 7a76c530..b846dd46 100644 --- a/workflows/differentialabundance.nf +++ b/workflows/differentialabundance.nf @@ -43,7 +43,7 @@ if (params.study_type == 'affy_array'){ error("Query GSE not specified or features metadata columns not specified") } } else { - // If this is not microarray data or maxquant output, and this an RNA-seq dataset, + // If this is not microarray data or maxquant output, and this an RNA-seq dataset or experimental analysis, // then assume we're reading from a matrix if (params.study_type in ["rnaseq", "experimental"] && params.matrix) { @@ -308,7 +308,7 @@ workflow DIFFERENTIALABUNDANCE { ch_norm = VALIDATOR.out.assays } - if(!params.study_type in ['rnaseq', 'experimental']) { + if(params.study_type !in ['rnaseq', 'experimental']) { ch_matrix_for_differential = ch_norm } else{