From 3d7afbbcbc1923e36be417aee5616f39ea3915ef Mon Sep 17 00:00:00 2001 From: Iaroslav Popov <ip13@sanger.ac.uk> Date: Fri, 25 Aug 2023 15:55:20 +0100 Subject: [PATCH 01/14] seqkit statistics files combined into a single file --- functions/functions.nf | 29 +++++++++++++++++++++++++++++ workflows/sge.nf | 15 +++++++++++++++ 2 files changed, 44 insertions(+) create mode 100644 functions/functions.nf diff --git a/functions/functions.nf b/functions/functions.nf new file mode 100644 index 0000000..74f1ede --- /dev/null +++ b/functions/functions.nf @@ -0,0 +1,29 @@ +// +// takes channel and SEQUENCING_QC workflow object +// extracts seqkit_stats output channel, combines it with workflow name and appends to input channel +// +def add_seqkit_stats(channel, workflow) { + return channel.mix( + workflow.out.seqkit_stats.combine( + [workflow.name.split(':').last()] + ) + ) +} + +// +// each seqkit stat file prepends with two columns for sample and stage +// +def modify_seqkit_stats(meta, path, stage) { + newLines = [] + file(path) + .readLines() + .eachWithIndex { it, i -> + if (i == 0) { + line = "sample" + "\t" + "stage" + "\t" + it + } else { + line = meta.id + "\t" + stage + "\t" + it + } + newLines.add(line) + } + return newLines.join("\n") + "\n" +} diff --git a/workflows/sge.nf b/workflows/sge.nf index 6e99666..5c40e94 100644 --- a/workflows/sge.nf +++ b/workflows/sge.nf @@ -182,6 +182,11 @@ include { SEQUENCING_QC as RAW_SEQUENCING_QC; // include { MULTIQC } from '../modules/nf-core/multiqc/main' addParams( options: multiqc_options ) +// +// FUNCTIONS: collection of custom functions +// +include { add_seqkit_stats; modify_seqkit_stats } from '../functions/functions.nf' + /* ======================================================================================== RUN MAIN WORKFLOW @@ -194,6 +199,7 @@ def multiqc_report = [] workflow SGE { // Set up empty channels ch_software_versions = Channel.empty() + seqkit_stat_ch = Channel.empty() if (params.input_type == 'cram') { // @@ -224,6 +230,7 @@ workflow SGE { ch_raw_read_qc = ch_raw_reads.map{it -> [[id: it[0].id + '_raw', single_end: it[0].single_end], it[1]]} RAW_SEQUENCING_QC ( ch_raw_read_qc ) ch_software_versions = ch_software_versions.mix(RAW_SEQUENCING_QC.out.fastqc_version, RAW_SEQUENCING_QC.out.seqkit_version) + seqkit_stat_ch = add_seqkit_stats(seqkit_stat_ch, RAW_SEQUENCING_QC) } // @@ -240,6 +247,7 @@ workflow SGE { ch_adapter_trimming_qc = ADAPTER_TRIMMING.out.reads.map{it -> [[id: it[0].id + '_adapter_trimmed', single_end: it[0].single_end], it[1]]} ADAPTER_TRIMMED_SEQUENCING_QC ( ch_adapter_trimming_qc ) ch_software_versions = ch_software_versions.mix(ADAPTER_TRIMMED_SEQUENCING_QC.out.fastqc_version, ADAPTER_TRIMMED_SEQUENCING_QC.out.seqkit_version) + seqkit_stat_ch = add_seqkit_stats(seqkit_stat_ch, ADAPTER_TRIMMED_SEQUENCING_QC) } // Send to next stage ch_primer_trim = ADAPTER_TRIMMING.out.reads @@ -261,6 +269,7 @@ workflow SGE { ch_primer_trimming_qc = PRIMER_TRIMMING.out.reads.map{it -> [[id: it[0].id + '_primer_trimmed', single_end: it[0].single_end], it[1]]} PRIMER_TRIMMED_SEQUENCING_QC ( ch_primer_trimming_qc ) ch_software_versions = ch_software_versions.mix(PRIMER_TRIMMED_SEQUENCING_QC.out.fastqc_version, PRIMER_TRIMMED_SEQUENCING_QC.out.seqkit_version) + seqkit_stat_ch = add_seqkit_stats(seqkit_stat_ch, PRIMER_TRIMMED_SEQUENCING_QC) } // Send to next stage ch_read_merge = PRIMER_TRIMMING.out.reads @@ -283,6 +292,7 @@ workflow SGE { ch_merged_read_qc = ch_read_transform MERGED_SEQUENCING_QC ( ch_merged_read_qc ) ch_software_versions = ch_software_versions.mix(MERGED_SEQUENCING_QC.out.fastqc_version, MERGED_SEQUENCING_QC.out.seqkit_version) + seqkit_stat_ch = add_seqkit_stats(seqkit_stat_ch, MERGED_SEQUENCING_QC) } } else { ch_read_transform = ch_read_merge @@ -316,6 +326,7 @@ workflow SGE { ch_filtered_read_qc = READ_FILTERING.out.reads.map{it -> [[id: it[0].id + '_filtered', single_end: true], it[1]]} FILTERED_SEQUENCING_QC ( ch_filtered_read_qc ) ch_software_versions = ch_software_versions.mix(FILTERED_SEQUENCING_QC.out.fastqc_version, FILTERED_SEQUENCING_QC.out.seqkit_version) + seqkit_stat_ch = add_seqkit_stats(seqkit_stat_ch, FILTERED_SEQUENCING_QC) } } else { ch_reads_to_modify = ch_read_filter @@ -361,6 +372,10 @@ workflow SGE { ch_software_versions.map { it }.collect() ) + seqkit_stat_ch + .map { meta, file, stage -> modify_seqkit_stats(meta, file, stage) } + .collectFile(keepHeader: true, name: 'seqkit_stats.tsv', storeDir: params.outdir) + // // MODULE: MultiQC // From 771f4f94e60a9ee0a8e29cacc5337a9c5e3107b3 Mon Sep 17 00:00:00 2001 From: Iaroslav Popov <ip13@sanger.ac.uk> Date: Fri, 25 Aug 2023 15:57:33 +0100 Subject: [PATCH 02/14] linting fixed --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9a8bc37..d2cd911 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -37,8 +37,8 @@ Initial release of QUANTS, created with the [nf-core](https://nf-co.re/) templat ## 3.0.0.0 - [21st August 2023] * Split read trimming into two stages - * Adapter trimming - removes user-defined adapter sequences and takes forward both trimmed and untrimmed reads - * Primer trimming - removes user-defined primer sequences and takes forward only trimmed reads + * Adapter trimming - removes user-defined adapter sequences and takes forward both trimmed and untrimmed reads + * Primer trimming - removes user-defined primer sequences and takes forward only trimmed reads * Add a read modification process which can append user-defined sequences to trimmed reads * Add library transformer to allow users to provide libraries in a different format (e.g. the meta CSV from VaLiAnT) and convert it for use with pyQUEST From 0dc96d8793866701a59df8651b5985a99fd45fe7 Mon Sep 17 00:00:00 2001 From: Iaroslav Popov <ip13@sanger.ac.uk> Date: Thu, 14 Sep 2023 10:16:39 +0100 Subject: [PATCH 03/14] sample name trimming added --- functions/functions.nf | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/functions/functions.nf b/functions/functions.nf index 74f1ede..c170274 100644 --- a/functions/functions.nf +++ b/functions/functions.nf @@ -10,10 +10,25 @@ def add_seqkit_stats(channel, workflow) { ) } +// +// removes stage suffix from the sample name +// +def trim_sample_name(sample_name) { + sample_name + .replaceFirst(/_raw$/, "") + .replaceFirst(/_primer_trimmed$/, "") + .replaceFirst(/_adapter_trimmed$/, "") + .replaceFirst(/_merged$/, "") + .replaceFirst(/_merged_filtered$/, "") +} + // // each seqkit stat file prepends with two columns for sample and stage // def modify_seqkit_stats(meta, path, stage) { + // TODO should be removed in the future once sample name handling in the pipeline is consistent + def sample_name = trim_sample_name(meta.id) + newLines = [] file(path) .readLines() @@ -21,9 +36,10 @@ def modify_seqkit_stats(meta, path, stage) { if (i == 0) { line = "sample" + "\t" + "stage" + "\t" + it } else { - line = meta.id + "\t" + stage + "\t" + it + line = sample_name + "\t" + stage + "\t" + it } newLines.add(line) } + return newLines.join("\n") + "\n" } From e99412374d66a32fe5e78d089b0869d0e483bdf9 Mon Sep 17 00:00:00 2001 From: Iaroslav Popov <ip13@sanger.ac.uk> Date: Thu, 14 Sep 2023 10:23:39 +0100 Subject: [PATCH 04/14] nf-core linting disabled --- .github/workflows/linting.yml | 1 + CHANGELOG.md | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 5b3d55e..5f3f4ae 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -99,6 +99,7 @@ jobs: allow-repeats: false nf-core: + if: false runs-on: ubuntu-latest steps: diff --git a/CHANGELOG.md b/CHANGELOG.md index d2cd911..71c2d47 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -44,4 +44,4 @@ Initial release of QUANTS, created with the [nf-core](https://nf-co.re/) templat ## 3.0.0.1 - [12th September 2023] -* Primer trimming - bugfix to ensure cutadapt splits reads into trimmed and untrimmed files \ No newline at end of file +* Primer trimming - bugfix to ensure cutadapt splits reads into trimmed and untrimmed files From 608c1efd5fda5c7c91eec50c7bb73ca2f0dc9e7e Mon Sep 17 00:00:00 2001 From: Victoria Offord <vo1@sanger.ac.uk> Date: Thu, 14 Sep 2023 20:55:22 +0100 Subject: [PATCH 05/14] write collated seqkit stats file to seqkit_stats subdirectory --- workflows/sge.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/sge.nf b/workflows/sge.nf index 5c40e94..51678c3 100644 --- a/workflows/sge.nf +++ b/workflows/sge.nf @@ -374,7 +374,7 @@ workflow SGE { seqkit_stat_ch .map { meta, file, stage -> modify_seqkit_stats(meta, file, stage) } - .collectFile(keepHeader: true, name: 'seqkit_stats.tsv', storeDir: params.outdir) + .collectFile(keepHeader: true, name: 'seqkit_stats.tsv', storeDir: "${params.outdir}/seqkit_stats") // // MODULE: MultiQC From 20f905272f021f60c681de7602e53779df898b5c Mon Sep 17 00:00:00 2001 From: Iaroslav Popov <ip13@sanger.ac.uk> Date: Tue, 29 Aug 2023 14:01:10 +0100 Subject: [PATCH 06/14] cutadapt json collation --- functions/functions.nf | 27 ++++++++++++++++++++++++++ modules/local/cutadapt/main.nf | 6 +++--- subworkflows/local/adapter_trimming.nf | 2 +- subworkflows/local/primer_trimming.nf | 2 +- workflows/sge.nf | 11 +++++++++++ 5 files changed, 43 insertions(+), 5 deletions(-) diff --git a/functions/functions.nf b/functions/functions.nf index c170274..a640f60 100644 --- a/functions/functions.nf +++ b/functions/functions.nf @@ -1,3 +1,6 @@ +import groovy.json.JsonSlurper +import groovy.json.JsonOutput + // // takes channel and SEQUENCING_QC workflow object // extracts seqkit_stats output channel, combines it with workflow name and appends to input channel @@ -42,4 +45,28 @@ def modify_seqkit_stats(meta, path, stage) { } return newLines.join("\n") + "\n" + +def compose_cutadapt_jsons(meta, pathList) { + def jsonSlurper = new JsonSlurper() + def record = [:] + + for (path in pathList) { + def stage = path.name.split("\\.")[-3] + def object = jsonSlurper.parse(path) + record[stage] = object + } + + record = [(meta.id): record] + return record +} + +def collate_cutadapt_jsons(jsonList) { + def output = [:] + + for (json in jsonList) { + output.putAll(json) + } + + def output_string = JsonOutput.toJson(output) + return output_string } diff --git a/modules/local/cutadapt/main.nf b/modules/local/cutadapt/main.nf index 102dbbb..0afe801 100644 --- a/modules/local/cutadapt/main.nf +++ b/modules/local/cutadapt/main.nf @@ -24,9 +24,9 @@ process CUTADAPT { output: tuple val(meta), path('*_trimmed{,_1,_2}.fastq.gz') , emit: reads tuple val(meta), path('*_untrimmed{,_1,_2}.fastq.gz'), emit: untrimmed_reads, optional: true - tuple val(meta), path('*.log') , emit: log - tuple val(meta), path('*.json') , emit: json - path '*.version.txt' , emit: version + tuple val(meta), path('*.log') , emit: log + tuple val(meta), path('*.json') , emit: json + path '*.version.txt' , emit: version script: def software = getSoftwareName(task.process) diff --git a/subworkflows/local/adapter_trimming.nf b/subworkflows/local/adapter_trimming.nf index 567dc4c..8081323 100644 --- a/subworkflows/local/adapter_trimming.nf +++ b/subworkflows/local/adapter_trimming.nf @@ -27,7 +27,7 @@ workflow ADAPTER_TRIMMING { CUTADAPT_ADAPTER ( reads ) ch_trimmed_reads = CUTADAPT_ADAPTER.out.reads - ch_trimmed_stats = CUTADAPT_ADAPTER.out.log + ch_trimmed_stats = CUTADAPT_ADAPTER.out.json } emit: reads = ch_trimmed_reads diff --git a/subworkflows/local/primer_trimming.nf b/subworkflows/local/primer_trimming.nf index a684019..e7ec10d 100644 --- a/subworkflows/local/primer_trimming.nf +++ b/subworkflows/local/primer_trimming.nf @@ -27,7 +27,7 @@ workflow PRIMER_TRIMMING { CUTADAPT_PRIMER ( reads ) ch_trimmed_reads = CUTADAPT_PRIMER.out.reads - ch_trimmed_stats = CUTADAPT_PRIMER.out.log + ch_trimmed_stats = CUTADAPT_PRIMER.out.json } emit: reads = ch_trimmed_reads diff --git a/workflows/sge.nf b/workflows/sge.nf index 51678c3..02c73c0 100644 --- a/workflows/sge.nf +++ b/workflows/sge.nf @@ -186,6 +186,7 @@ include { MULTIQC } from '../modules/nf-core/multiqc/main' addParams( options: m // FUNCTIONS: collection of custom functions // include { add_seqkit_stats; modify_seqkit_stats } from '../functions/functions.nf' +include { compose_cutadapt_jsons; collate_cutadapt_jsons } from '../functions/functions.nf' /* ======================================================================================== @@ -200,6 +201,7 @@ workflow SGE { // Set up empty channels ch_software_versions = Channel.empty() seqkit_stat_ch = Channel.empty() + cutadapt_jsons_ch = Channel.empty() if (params.input_type == 'cram') { // @@ -240,6 +242,7 @@ workflow SGE { // Run adapter trimming ADAPTER_TRIMMING ( ch_adapter_trim ) ch_software_versions = ch_software_versions.mix(ADAPTER_TRIMMING.out.versions) + cutadapt_jsons_ch = cutadapt_jsons_ch.mix(ADAPTER_TRIMMING.out.stats) // //SUBWORKFLOW: Run FASTQC on adapter trimmed reads // @@ -262,6 +265,7 @@ workflow SGE { // Run primer trimming PRIMER_TRIMMING ( ch_primer_trim ) ch_software_versions = ch_software_versions.mix(PRIMER_TRIMMING.out.versions) + cutadapt_jsons_ch = cutadapt_jsons_ch.mix(PRIMER_TRIMMING.out.stats) // //SUBWORKFLOW: Run FASTQC on primer trimmed reads // @@ -376,6 +380,13 @@ workflow SGE { .map { meta, file, stage -> modify_seqkit_stats(meta, file, stage) } .collectFile(keepHeader: true, name: 'seqkit_stats.tsv', storeDir: "${params.outdir}/seqkit_stats") + cutadapt_jsons_ch + .groupTuple() + .map { meta, fileList -> compose_cutadapt_jsons(meta, fileList) } + .toList() + .map { collate_cutadapt_jsons(it) } + .collectFile(name: 'cutadapt.json', storeDir: params.outdir) + // // MODULE: MultiQC // From 6447676105dda75bc6917807e40c1f630a684965 Mon Sep 17 00:00:00 2001 From: Iaroslav Popov <ip13@sanger.ac.uk> Date: Tue, 29 Aug 2023 16:16:51 +0100 Subject: [PATCH 07/14] stage no more depends on input file name --- functions/functions.nf | 24 +++++++++++++++++++++--- workflows/sge.nf | 7 ++++--- 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/functions/functions.nf b/functions/functions.nf index a640f60..b127c56 100644 --- a/functions/functions.nf +++ b/functions/functions.nf @@ -46,12 +46,27 @@ def modify_seqkit_stats(meta, path, stage) { return newLines.join("\n") + "\n" -def compose_cutadapt_jsons(meta, pathList) { + +// +// takes channel and workflow object +// extracts desired output channel from workflow, combines it with workflow name and appends to input channel +// +def add_stats_with_stage(channel, workflow, out_channel = 'stats') { + return channel.mix( + workflow.out.getProperty(out_channel).combine( + [workflow.name.split(':').last()] + ) + ) +} + +// +// takes cutadapt json filenames for the sample and creates a record +// +def compose_cutadapt_jsons(meta, pathList, stageList) { def jsonSlurper = new JsonSlurper() def record = [:] - for (path in pathList) { - def stage = path.name.split("\\.")[-3] + [pathList, stageList].transpose().each() { path, stage -> def object = jsonSlurper.parse(path) record[stage] = object } @@ -60,6 +75,9 @@ def compose_cutadapt_jsons(meta, pathList) { return record } +// +// takes a list of map-objects and combines them into one json string +// def collate_cutadapt_jsons(jsonList) { def output = [:] diff --git a/workflows/sge.nf b/workflows/sge.nf index 02c73c0..2a34d1a 100644 --- a/workflows/sge.nf +++ b/workflows/sge.nf @@ -186,6 +186,7 @@ include { MULTIQC } from '../modules/nf-core/multiqc/main' addParams( options: m // FUNCTIONS: collection of custom functions // include { add_seqkit_stats; modify_seqkit_stats } from '../functions/functions.nf' +include { add_stats_with_stage } from '../functions/functions.nf' include { compose_cutadapt_jsons; collate_cutadapt_jsons } from '../functions/functions.nf' /* @@ -242,7 +243,7 @@ workflow SGE { // Run adapter trimming ADAPTER_TRIMMING ( ch_adapter_trim ) ch_software_versions = ch_software_versions.mix(ADAPTER_TRIMMING.out.versions) - cutadapt_jsons_ch = cutadapt_jsons_ch.mix(ADAPTER_TRIMMING.out.stats) + cutadapt_jsons_ch = add_stats_with_stage(cutadapt_jsons_ch, ADAPTER_TRIMMING, 'stats') // //SUBWORKFLOW: Run FASTQC on adapter trimmed reads // @@ -265,7 +266,7 @@ workflow SGE { // Run primer trimming PRIMER_TRIMMING ( ch_primer_trim ) ch_software_versions = ch_software_versions.mix(PRIMER_TRIMMING.out.versions) - cutadapt_jsons_ch = cutadapt_jsons_ch.mix(PRIMER_TRIMMING.out.stats) + cutadapt_jsons_ch = add_stats_with_stage(cutadapt_jsons_ch, PRIMER_TRIMMING, 'stats') // //SUBWORKFLOW: Run FASTQC on primer trimmed reads // @@ -382,7 +383,7 @@ workflow SGE { cutadapt_jsons_ch .groupTuple() - .map { meta, fileList -> compose_cutadapt_jsons(meta, fileList) } + .map { meta, fileList, stageList -> compose_cutadapt_jsons(meta, fileList, stageList) } .toList() .map { collate_cutadapt_jsons(it) } .collectFile(name: 'cutadapt.json', storeDir: params.outdir) From 53cdea8553bbe245de6e1d756530604926e68677 Mon Sep 17 00:00:00 2001 From: Iaroslav Popov <ip13@sanger.ac.uk> Date: Tue, 29 Aug 2023 16:48:29 +0100 Subject: [PATCH 08/14] calculation of percent of reads with adapters added --- functions/functions.nf | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/functions/functions.nf b/functions/functions.nf index b127c56..e648ce1 100644 --- a/functions/functions.nf +++ b/functions/functions.nf @@ -68,6 +68,12 @@ def compose_cutadapt_jsons(meta, pathList, stageList) { [pathList, stageList].transpose().each() { path, stage -> def object = jsonSlurper.parse(path) + object["read_counts"]["read1_with_adapter_percent"] = 100 * object["read_counts"]["read1_with_adapter"] / object["read_counts"]["input"] + if (object["read_counts"]["read2_with_adapter"]){ + object["read_counts"]["read2_with_adapter_percent"] = 100 * object["read_counts"]["read2_with_adapter"] / object["read_counts"]["input"] + } else { + object["read_counts"]["read2_with_adapter_percent"] = null + } record[stage] = object } From 1a771b842f36d6db7650b934b2a89e2ed63c623e Mon Sep 17 00:00:00 2001 From: Iaroslav Popov <ip13@sanger.ac.uk> Date: Wed, 30 Aug 2023 14:59:20 +0100 Subject: [PATCH 09/14] fix https://github.com/nextflow-io/nextflow/issues/1698 --- functions/functions.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/functions/functions.nf b/functions/functions.nf index e648ce1..0b7cde6 100644 --- a/functions/functions.nf +++ b/functions/functions.nf @@ -51,7 +51,7 @@ def modify_seqkit_stats(meta, path, stage) { // takes channel and workflow object // extracts desired output channel from workflow, combines it with workflow name and appends to input channel // -def add_stats_with_stage(channel, workflow, out_channel = 'stats') { +def add_stats_with_stage(channel, workflow, out_channel) { return channel.mix( workflow.out.getProperty(out_channel).combine( [workflow.name.split(':').last()] From 9541de027d10f2c5e0601762ca605880033b1fd9 Mon Sep 17 00:00:00 2001 From: Iaroslav Popov <ip13@sanger.ac.uk> Date: Thu, 31 Aug 2023 10:53:09 +0100 Subject: [PATCH 10/14] json collation moved to separate process --- functions/functions.nf | 44 +------------------ .../cutadapt_json_collation/functions.nf | 25 +++++++++++ modules/local/cutadapt_json_collation/main.nf | 33 ++++++++++++++ .../local/cutadapt_json_collation/meta.yml | 17 +++++++ workflows/sge.nf | 12 +++-- 5 files changed, 82 insertions(+), 49 deletions(-) create mode 100644 modules/local/cutadapt_json_collation/functions.nf create mode 100644 modules/local/cutadapt_json_collation/main.nf create mode 100644 modules/local/cutadapt_json_collation/meta.yml diff --git a/functions/functions.nf b/functions/functions.nf index 0b7cde6..d405b83 100644 --- a/functions/functions.nf +++ b/functions/functions.nf @@ -1,6 +1,3 @@ -import groovy.json.JsonSlurper -import groovy.json.JsonOutput - // // takes channel and SEQUENCING_QC workflow object // extracts seqkit_stats output channel, combines it with workflow name and appends to input channel @@ -46,51 +43,14 @@ def modify_seqkit_stats(meta, path, stage) { return newLines.join("\n") + "\n" - // -// takes channel and workflow object +// takes channel, workflow object and name of output channel // extracts desired output channel from workflow, combines it with workflow name and appends to input channel // -def add_stats_with_stage(channel, workflow, out_channel) { +def add_stats_with_stage(channel, workflow, String out_channel) { return channel.mix( workflow.out.getProperty(out_channel).combine( [workflow.name.split(':').last()] ) ) } - -// -// takes cutadapt json filenames for the sample and creates a record -// -def compose_cutadapt_jsons(meta, pathList, stageList) { - def jsonSlurper = new JsonSlurper() - def record = [:] - - [pathList, stageList].transpose().each() { path, stage -> - def object = jsonSlurper.parse(path) - object["read_counts"]["read1_with_adapter_percent"] = 100 * object["read_counts"]["read1_with_adapter"] / object["read_counts"]["input"] - if (object["read_counts"]["read2_with_adapter"]){ - object["read_counts"]["read2_with_adapter_percent"] = 100 * object["read_counts"]["read2_with_adapter"] / object["read_counts"]["input"] - } else { - object["read_counts"]["read2_with_adapter_percent"] = null - } - record[stage] = object - } - - record = [(meta.id): record] - return record -} - -// -// takes a list of map-objects and combines them into one json string -// -def collate_cutadapt_jsons(jsonList) { - def output = [:] - - for (json in jsonList) { - output.putAll(json) - } - - def output_string = JsonOutput.toJson(output) - return output_string -} diff --git a/modules/local/cutadapt_json_collation/functions.nf b/modules/local/cutadapt_json_collation/functions.nf new file mode 100644 index 0000000..f2401f1 --- /dev/null +++ b/modules/local/cutadapt_json_collation/functions.nf @@ -0,0 +1,25 @@ +import groovy.json.JsonSlurper + +// +// takes cutadapt json filenames and stages for the sample and creates a record +// +def compose_cutadapt_jsons(meta, pathList, stageList) { + def jsonSlurper = new JsonSlurper() + def record = [:] + + [pathList, stageList].transpose().each() { path, stage -> + def object = jsonSlurper.parse(path) + + object["read_counts"]["read1_with_adapter_percent"] = 100 * object["read_counts"]["read1_with_adapter"] / object["read_counts"]["input"] + if (object["read_counts"]["read2_with_adapter"]){ + object["read_counts"]["read2_with_adapter_percent"] = 100 * object["read_counts"]["read2_with_adapter"] / object["read_counts"]["input"] + } else { + object["read_counts"]["read2_with_adapter_percent"] = null + } + + record[stage] = object + } + + record = [(meta.id): record] + return record +} diff --git a/modules/local/cutadapt_json_collation/main.nf b/modules/local/cutadapt_json_collation/main.nf new file mode 100644 index 0000000..2418aa0 --- /dev/null +++ b/modules/local/cutadapt_json_collation/main.nf @@ -0,0 +1,33 @@ +import groovy.json.JsonOutput + +// Import generic module functions +include { compose_cutadapt_jsons } from './functions' + +process COLLATE_CUTADAPT_JSONS { + label 'process_low' + publishDir "${params.outdir}", mode: params.publish_dir_mode + + input: + val inputList // list of tuples [meta, [list of jsons], [list of stages]] + + output: + path 'cutadapt.json', emit: json + + exec: + String filename = [task.workDir, 'cutadapt.json'].join(File.separator) + + new File(filename).withWriter { writer -> + writer.writeLine('{') + + inputList.eachWithIndex { e, index -> + def (meta, pathList, stageList) = e + def record = compose_cutadapt_jsons(meta, pathList, stageList) + String record_string = JsonOutput.toJson(record) + String comma = index + 1 < inputList.size() ? ',' : '' + String output_string = ' ' + record_string[1..-2] + comma + writer.writeLine(output_string) + } + + writer.writeLine('}') + } +} diff --git a/modules/local/cutadapt_json_collation/meta.yml b/modules/local/cutadapt_json_collation/meta.yml new file mode 100644 index 0000000..b7cfade --- /dev/null +++ b/modules/local/cutadapt_json_collation/meta.yml @@ -0,0 +1,17 @@ +name: cutadapt_json_collation +description: Collate all cutadapt output jsons into one file +keywords: + - cutadapt +input: + - inputList: + type: list + description: | + Groovy list containing tuples of three objects: + meta, list of cutadapt jsons, list of stages +output: + - json: + type: file + description: collated cutadapt json file for all samples + pattern: "cutadapt.json" +authors: + - "@y-popov" diff --git a/workflows/sge.nf b/workflows/sge.nf index 2a34d1a..bc50ab7 100644 --- a/workflows/sge.nf +++ b/workflows/sge.nf @@ -175,6 +175,7 @@ include { SEQUENCING_QC as RAW_SEQUENCING_QC; SEQUENCING_QC as PRIMER_TRIMMED_SEQUENCING_QC; SEQUENCING_QC as FILTERED_SEQUENCING_QC } from '../subworkflows/local/sequencing_qc' addParams( options: [:] ) +include { COLLATE_CUTADAPT_JSONS } from '../modules/local/cutadapt_json_collation/main.nf' addParams( options: [:] ) // editorconfig-checker-disable // @@ -187,7 +188,6 @@ include { MULTIQC } from '../modules/nf-core/multiqc/main' addParams( options: m // include { add_seqkit_stats; modify_seqkit_stats } from '../functions/functions.nf' include { add_stats_with_stage } from '../functions/functions.nf' -include { compose_cutadapt_jsons; collate_cutadapt_jsons } from '../functions/functions.nf' /* ======================================================================================== @@ -381,12 +381,10 @@ workflow SGE { .map { meta, file, stage -> modify_seqkit_stats(meta, file, stage) } .collectFile(keepHeader: true, name: 'seqkit_stats.tsv', storeDir: "${params.outdir}/seqkit_stats") - cutadapt_jsons_ch - .groupTuple() - .map { meta, fileList, stageList -> compose_cutadapt_jsons(meta, fileList, stageList) } - .toList() - .map { collate_cutadapt_jsons(it) } - .collectFile(name: 'cutadapt.json', storeDir: params.outdir) + cutadapt_jsons_ch + .groupTuple() + .toList() + | COLLATE_CUTADAPT_JSONS // // MODULE: MultiQC From de03feac9f54d1c6b2f9aca6a87d9221dd2f8792 Mon Sep 17 00:00:00 2001 From: Iaroslav Popov <ip13@sanger.ac.uk> Date: Fri, 15 Sep 2023 14:37:41 +0100 Subject: [PATCH 11/14] add_seqkit_stats replaced by add_stats_with_stage --- functions/functions.nf | 19 ++++--------------- .../local/cutadapt_json_collation/meta.yml | 2 +- workflows/sge.nf | 12 ++++++------ 3 files changed, 11 insertions(+), 22 deletions(-) diff --git a/functions/functions.nf b/functions/functions.nf index d405b83..e30d8d3 100644 --- a/functions/functions.nf +++ b/functions/functions.nf @@ -1,10 +1,10 @@ // -// takes channel and SEQUENCING_QC workflow object -// extracts seqkit_stats output channel, combines it with workflow name and appends to input channel +// takes channel, workflow object and name of output channel +// extracts desired output channel from workflow, combines it with workflow name and appends to input channel // -def add_seqkit_stats(channel, workflow) { +def add_stats_with_stage(channel, workflow, String out_channel) { return channel.mix( - workflow.out.seqkit_stats.combine( + workflow.out.getProperty(out_channel).combine( [workflow.name.split(':').last()] ) ) @@ -42,15 +42,4 @@ def modify_seqkit_stats(meta, path, stage) { } return newLines.join("\n") + "\n" - -// -// takes channel, workflow object and name of output channel -// extracts desired output channel from workflow, combines it with workflow name and appends to input channel -// -def add_stats_with_stage(channel, workflow, String out_channel) { - return channel.mix( - workflow.out.getProperty(out_channel).combine( - [workflow.name.split(':').last()] - ) - ) } diff --git a/modules/local/cutadapt_json_collation/meta.yml b/modules/local/cutadapt_json_collation/meta.yml index b7cfade..238059f 100644 --- a/modules/local/cutadapt_json_collation/meta.yml +++ b/modules/local/cutadapt_json_collation/meta.yml @@ -6,7 +6,7 @@ input: - inputList: type: list description: | - Groovy list containing tuples of three objects: + Groovy list containing tuples of three objects: meta, list of cutadapt jsons, list of stages output: - json: diff --git a/workflows/sge.nf b/workflows/sge.nf index bc50ab7..7e9279f 100644 --- a/workflows/sge.nf +++ b/workflows/sge.nf @@ -186,7 +186,7 @@ include { MULTIQC } from '../modules/nf-core/multiqc/main' addParams( options: m // // FUNCTIONS: collection of custom functions // -include { add_seqkit_stats; modify_seqkit_stats } from '../functions/functions.nf' +include { modify_seqkit_stats } from '../functions/functions.nf' include { add_stats_with_stage } from '../functions/functions.nf' /* @@ -233,7 +233,7 @@ workflow SGE { ch_raw_read_qc = ch_raw_reads.map{it -> [[id: it[0].id + '_raw', single_end: it[0].single_end], it[1]]} RAW_SEQUENCING_QC ( ch_raw_read_qc ) ch_software_versions = ch_software_versions.mix(RAW_SEQUENCING_QC.out.fastqc_version, RAW_SEQUENCING_QC.out.seqkit_version) - seqkit_stat_ch = add_seqkit_stats(seqkit_stat_ch, RAW_SEQUENCING_QC) + seqkit_stat_ch = add_stats_with_stage(seqkit_stat_ch, RAW_SEQUENCING_QC, 'seqkit_stats') } // @@ -251,7 +251,7 @@ workflow SGE { ch_adapter_trimming_qc = ADAPTER_TRIMMING.out.reads.map{it -> [[id: it[0].id + '_adapter_trimmed', single_end: it[0].single_end], it[1]]} ADAPTER_TRIMMED_SEQUENCING_QC ( ch_adapter_trimming_qc ) ch_software_versions = ch_software_versions.mix(ADAPTER_TRIMMED_SEQUENCING_QC.out.fastqc_version, ADAPTER_TRIMMED_SEQUENCING_QC.out.seqkit_version) - seqkit_stat_ch = add_seqkit_stats(seqkit_stat_ch, ADAPTER_TRIMMED_SEQUENCING_QC) + seqkit_stat_ch = add_stats_with_stage(seqkit_stat_ch, ADAPTER_TRIMMED_SEQUENCING_QC, 'seqkit_stats') } // Send to next stage ch_primer_trim = ADAPTER_TRIMMING.out.reads @@ -274,7 +274,7 @@ workflow SGE { ch_primer_trimming_qc = PRIMER_TRIMMING.out.reads.map{it -> [[id: it[0].id + '_primer_trimmed', single_end: it[0].single_end], it[1]]} PRIMER_TRIMMED_SEQUENCING_QC ( ch_primer_trimming_qc ) ch_software_versions = ch_software_versions.mix(PRIMER_TRIMMED_SEQUENCING_QC.out.fastqc_version, PRIMER_TRIMMED_SEQUENCING_QC.out.seqkit_version) - seqkit_stat_ch = add_seqkit_stats(seqkit_stat_ch, PRIMER_TRIMMED_SEQUENCING_QC) + seqkit_stat_ch = add_stats_with_stage(seqkit_stat_ch, PRIMER_TRIMMED_SEQUENCING_QC, 'seqkit_stats') } // Send to next stage ch_read_merge = PRIMER_TRIMMING.out.reads @@ -297,7 +297,7 @@ workflow SGE { ch_merged_read_qc = ch_read_transform MERGED_SEQUENCING_QC ( ch_merged_read_qc ) ch_software_versions = ch_software_versions.mix(MERGED_SEQUENCING_QC.out.fastqc_version, MERGED_SEQUENCING_QC.out.seqkit_version) - seqkit_stat_ch = add_seqkit_stats(seqkit_stat_ch, MERGED_SEQUENCING_QC) + seqkit_stat_ch = add_stats_with_stage(seqkit_stat_ch, MERGED_SEQUENCING_QC, 'seqkit_stats') } } else { ch_read_transform = ch_read_merge @@ -331,7 +331,7 @@ workflow SGE { ch_filtered_read_qc = READ_FILTERING.out.reads.map{it -> [[id: it[0].id + '_filtered', single_end: true], it[1]]} FILTERED_SEQUENCING_QC ( ch_filtered_read_qc ) ch_software_versions = ch_software_versions.mix(FILTERED_SEQUENCING_QC.out.fastqc_version, FILTERED_SEQUENCING_QC.out.seqkit_version) - seqkit_stat_ch = add_seqkit_stats(seqkit_stat_ch, FILTERED_SEQUENCING_QC) + seqkit_stat_ch = add_stats_with_stage(seqkit_stat_ch, FILTERED_SEQUENCING_QC, 'seqkit_stats') } } else { ch_reads_to_modify = ch_read_filter From 601299a81813b8e681f285c32c428bc62f216fe5 Mon Sep 17 00:00:00 2001 From: Victoria Offord <vo1@sanger.ac.uk> Date: Sun, 17 Sep 2023 21:50:44 +0100 Subject: [PATCH 12/14] write collated output to cutadapt directory --- modules/local/cutadapt_json_collation/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/cutadapt_json_collation/main.nf b/modules/local/cutadapt_json_collation/main.nf index 2418aa0..1f693f3 100644 --- a/modules/local/cutadapt_json_collation/main.nf +++ b/modules/local/cutadapt_json_collation/main.nf @@ -5,7 +5,7 @@ include { compose_cutadapt_jsons } from './functions' process COLLATE_CUTADAPT_JSONS { label 'process_low' - publishDir "${params.outdir}", mode: params.publish_dir_mode + publishDir "${params.outdir}/cutadapt", mode: params.publish_dir_mode input: val inputList // list of tuples [meta, [list of jsons], [list of stages]] From 30526b5820d98d9cacff4a3cfbe96f79ae1cb028 Mon Sep 17 00:00:00 2001 From: Iaroslav Popov <ip13@sanger.ac.uk> Date: Mon, 18 Sep 2023 10:15:41 +0100 Subject: [PATCH 13/14] set COLLATE_CUTADAPT_JSONS default executor to 'local' https://github.com/cancerit/QUANTS/pull/17#issuecomment-1722558091 --- conf/base.config | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/conf/base.config b/conf/base.config index fd5740f..37b92b1 100644 --- a/conf/base.config +++ b/conf/base.config @@ -54,4 +54,8 @@ process { errorStrategy = 'retry' maxRetries = 2 } + + withName:COLLATE_CUTADAPT_JSONS { + executor = 'local' + } } From 5079736bdea5f34c65737cd7c01a60d6dc7ac59b Mon Sep 17 00:00:00 2001 From: Victoria Offord <vo1@sanger.ac.uk> Date: Wed, 11 Oct 2023 21:13:52 +0100 Subject: [PATCH 14/14] update to version 3.0.0.2 --- CHANGELOG.md | 9 +++++++++ modules/local/pyquest/main.nf | 2 +- nextflow.config | 2 +- 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 71c2d47..be30813 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -45,3 +45,12 @@ Initial release of QUANTS, created with the [nf-core](https://nf-co.re/) templat ## 3.0.0.1 - [12th September 2023] * Primer trimming - bugfix to ensure cutadapt splits reads into trimmed and untrimmed files + +## 3.0.0.2 - [11th October 2023] + +* Collation of cutadapt JSON results into single JSON file +* Collation of SeqKit statistics results into a single TSV file +* Update version of pyQUEST to version 1.1.0 + * Improved handling of 0-length reads + * Ability to extract top 50 library-independent counts as FASTA + diff --git a/modules/local/pyquest/main.nf b/modules/local/pyquest/main.nf index 28d6c5f..bbb78c8 100644 --- a/modules/local/pyquest/main.nf +++ b/modules/local/pyquest/main.nf @@ -18,7 +18,7 @@ process PYQUEST { container "quay.io/biocontainers/flash2:2.2.00--h5bf99c6_3" } */ - container "quay.io/wtsicgp/pyquest:1.0.0" + container "quay.io/wtsicgp/pyquest:1.1.0" input: tuple val(meta), path(reads) diff --git a/nextflow.config b/nextflow.config index d913cde..8d6c77c 100644 --- a/nextflow.config +++ b/nextflow.config @@ -190,7 +190,7 @@ manifest { description = 'Analysis pipeline for saturation genome editing screens' mainScript = 'main.nf' nextflowVersion = '!>=21.10.6' - version = '3.0.0.1' + version = '3.0.0.2' } // Function to ensure that resource requirements don't go beyond