From 4d562f8d09a0b4972f1bb3a2694640ebfe03a835 Mon Sep 17 00:00:00 2001 From: Justine Pollet Date: Mon, 5 Aug 2024 16:42:29 +0200 Subject: [PATCH 1/9] Add new 5'-RACE protocol support --- subworkflows/local/sequence_assembly.nf | 28 +++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/subworkflows/local/sequence_assembly.nf b/subworkflows/local/sequence_assembly.nf index d001b124..03d5f496 100644 --- a/subworkflows/local/sequence_assembly.nf +++ b/subworkflows/local/sequence_assembly.nf @@ -156,6 +156,34 @@ workflow SEQUENCE_ASSEMBLY { if (params.internal_cregion_sequences) { error "Please do not set '--internal_cregion_sequences' when using the 'dt_5p_race' library generation method without UMIs." } + } else if (params.library_generation_method == 'specific_5p_race_umi') { + if (params.vprimers) { + error "The specific 5'-RACE UMI library generation method does not accept V-region primers, please provide a linker with '--race_linker' instead or select another library method option." + } else if (params.race_linker) { + ch_vprimers_fasta = Channel.fromPath(params.race_linker, checkIfExists: true) + } else if (params.maskprimers_align) { + ch_vprimers_fasta = Channel.of([]) + } else { + error "The specific 5'-RACE UMI library generation method requires a linker or Template Switch Oligo sequence, please provide it with the option '--race_linker'." + } + if (params.cprimers) { + ch_cprimers_fasta = Channel.fromPath(params.cprimers, checkIfExists: true) + } else { + error "The specific 5'-RACE UMI library generation method requires the C-region primer sequences, please provide a fasta file with the '--cprimers' option." + } + if (params.umi_linker) { + ch_umilinker_fasta = Channel.fromPath(params.umi_linker, checkIfExists: true) + } else { + error "The specific 5'-RACE UMI library generation method requires the UMI + linker sequences, please provide a fasta file with the '--umi_linker' option." + } + if (params.umi_length < 2) { + error "The specific 5'-RACE UMI 'specific_5p_race_umi' library generation method requires specifying the '--umi_length' to a value greater than 1." + } + if (params.internal_cregion_sequences) { + ch_internal_cregion = Channel.fromPath(params.internal_cregion_sequences, checkIfExists: true) + } else { + ch_internal_cregion = Channel.of([]) + } } else { error "The provided library generation method is not supported. Please check the docs for `--library_generation_method`." } From fc9e65d319ef2311c81b678a1b1174412831cc9f Mon Sep 17 00:00:00 2001 From: Justine Pollet Date: Mon, 5 Aug 2024 17:46:18 +0200 Subject: [PATCH 2/9] New presto_maskprimers_align_trim module and adjust presto_umi subworkflow for this new 5'-RACE support --- .../presto/presto_maskprimers_align_trim.nf | 40 +++++++++++++++++++ subworkflows/local/presto_umi.nf | 27 ++++++++++++- 2 files changed, 66 insertions(+), 1 deletion(-) create mode 100644 modules/local/presto/presto_maskprimers_align_trim.nf diff --git a/modules/local/presto/presto_maskprimers_align_trim.nf b/modules/local/presto/presto_maskprimers_align_trim.nf new file mode 100644 index 00000000..848959cf --- /dev/null +++ b/modules/local/presto/presto_maskprimers_align_trim.nf @@ -0,0 +1,40 @@ +process PRESTO_MASKPRIMERS_ALIGN_TRIM { + tag "$meta.id" + label "process_high" + label 'immcantation' + + conda "bioconda::presto=0.7.1" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/presto:0.7.1--pyhdfd78af_0' : + 'biocontainers/presto:0.7.1--pyhdfd78af_0' }" + + input: + tuple val(meta), path(R1) + path(umi_linker) + + output: + tuple val(meta), path("*_R1_primers-pass.fastq") , emit: reads + path "*_command_log_R1.txt", emit: logs + path "*_R1.log" + path "*.tab", emit: log_tab + path "versions.yml" , emit: versions + + script: + def args = task.ext.args?: '' + def args2 = task.ext.args2?: '' + """ + MaskPrimers.py align --nproc ${task.cpus} \\ + -s $R1 \\ + -p ${umi_linker} \\ + --mode trim \\ + $args \\ + --outname ${meta.id}_R1 \\ + --log ${meta.id}_R1.log > ${meta.id}_command_log_R1.txt + ParseLog.py -l ${meta.id}_R1.log $args2 + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + presto: \$( MaskPrimers.py --version | awk -F' ' '{print \$2}' ) + END_VERSIONS + """ +} diff --git a/subworkflows/local/presto_umi.nf b/subworkflows/local/presto_umi.nf index 17caa68e..d3016628 100644 --- a/subworkflows/local/presto_umi.nf +++ b/subworkflows/local/presto_umi.nf @@ -10,6 +10,7 @@ include { FASTP } from '../../modules/n include { PRESTO_FILTERSEQ as PRESTO_FILTERSEQ_UMI } from '../../modules/local/presto/presto_filterseq' include { PRESTO_MASKPRIMERS as PRESTO_MASKPRIMERS_UMI } from '../../modules/local/presto/presto_maskprimers' include { PRESTO_MASKPRIMERS_ALIGN as PRESTO_ALIGN_PRIMERS } from '../../modules/local/presto/presto_maskprimers_align' +include { PRESTO_MASKPRIMERS_ALIGN_TRIM as PRESTO_ALIGN_TRIM } from '../../modules/local/presto/presto_maskprimers_align_trim' include { PRESTO_MASKPRIMERS_EXTRACT } from '../../modules/local/presto/presto_maskprimers_extract' include { PRESTO_MASKPRIMERS_ALIGN as PRESTO_ALIGN_CREGION } from '../../modules/local/presto/presto_maskprimers_align' include { PRESTO_PAIRSEQ as PRESTO_PAIRSEQ_UMI } from '../../modules/local/presto/presto_pairseq' @@ -36,6 +37,7 @@ workflow PRESTO_UMI { ch_reads // channel: [ val(meta), [ reads ] ] ch_cprimers // channel: [ cprimers.fasta ] ch_vprimers // channel: [ vprimers.fasta ] + ch_umilinker // channel: [ umi_linker.fasta ] ch_adapter_fasta // channel: [ adapters.fasta ] ch_internal_cregion // channel: [ internal_cregions.fasta ] ch_igblast @@ -135,7 +137,30 @@ workflow PRESTO_UMI { ch_for_clustersets = PRESTO_PAIRSEQ_ALIGN.out.reads ch_pairseq_logs = PRESTO_PAIRSEQ_ALIGN.out.logs - } else { + } if (params.library_generation_method == 'specific_5p_race_umi') { + // trim any sequence in R1 that is before UMI pattern-race linker sequence + PRESTO_ALIGN_TRIM( + ch_reads_R1, + ch_umilinker.collect() + ) + PRESTO_MASKPRIMERS_UMI ( + PRESTO_ALIGN_TRIM.out.reads, + ch_cprimers.collect(), + ch_vprimers.collect() + ) + + ch_versions = ch_versions.mix(PRESTO_MASKPRIMERS_UMI.out.versions) + ch_maskprimers_logs = PRESTO_MASKPRIMERS_UMI.out.logs + + // Pre-consensus pair + PRESTO_PAIRSEQ_UMI ( + PRESTO_MASKPRIMERS_UMI.out.reads + ) + ch_versions = ch_versions.mix(PRESTO_PAIRSEQ_UMI.out.versions) + ch_for_clustersets = PRESTO_PAIRSEQ_UMI.out.reads + ch_pairseq_logs = PRESTO_PAIRSEQ_UMI.out.logs + + } else { PRESTO_MASKPRIMERS_UMI ( PRESTO_FILTERSEQ_UMI.out.reads, From 4fa3d18e20bc2e4898b37c01a4aed37a88190fc9 Mon Sep 17 00:00:00 2001 From: Justine Pollet Date: Tue, 6 Aug 2024 11:30:10 +0200 Subject: [PATCH 3/9] presto_maskprimers_align_trim module conf and update nextflow_schema JSON --- conf/modules.config | 10 +++++++ nextflow_schema.json | 68 ++++++++++++++++++++++++++++++++++++-------- 2 files changed, 66 insertions(+), 12 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 3dc63fa9..bca500ef 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -138,6 +138,16 @@ process { ] } + withName: PRESTO_MASKPRIMERS_ALIGN_TRIM { + publishDir = [ + path: { "${params.outdir}/presto/trim_upstream_umi_linker/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.args = '--skiprc --pf UMILINK' + ext.args2 = '-f ID UMILINK ERROR' + } + withName: PRESTO_MASKPRIMERS_ALIGN { publishDir = [ path: { "${params.outdir}/presto/02-maskprimers/${meta.id}" }, diff --git a/nextflow_schema.json b/nextflow_schema.json index 1c32a276..e9a6002c 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,7 +10,10 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": ["input", "outdir"], + "required": [ + "input", + "outdir" + ], "properties": { "input": { "type": "string", @@ -27,7 +30,10 @@ "type": "string", "default": "fastq", "description": "Specify the processing mode for the pipeline. Available options are \"fastq\" and \"assembled\".", - "enum": ["fastq", "assembled"], + "enum": [ + "fastq", + "assembled" + ], "fa_icon": "fas fa-terminal" }, "outdir": { @@ -61,14 +67,26 @@ "type": "string", "fa_icon": "fas fa-flask", "description": "Protocol used for the V(D)J amplicon sequencing library generation.", - "enum": ["specific_pcr_umi", "specific_pcr", "dt_5p_race", "dt_5p_race_umi", "sc_10x_genomics"], + "enum": [ + "specific_pcr_umi", + "specific_pcr", + "dt_5p_race", + "dt_5p_race_umi", + "specific_5p_race_umi", + "sc_10x_genomics" + ], "help_text": "Available protocols are:\n- `specific_pcr_umi`: RT-PCR using transcript-specific primers containing UMIs.\n- `specific_pcr`: RT-PCR using transcript-specific primers.\n- `dt_5p_race_umi`: 5\u2019-RACE PCR using oligo-dT primers and template switch primers containing UMI.\n- `dt_5p_race`: 5\u2019-RACE PCR (i.e. RT is followed by a template switch (TS) step) using oligo-dT primers.\n- `sc_10x_genomics`:10x genomics library preparation protocol for scVDJ sequencing." }, "race_linker": { "type": "string", "description": "Path to fasta file containing the linker sequence, if no V-region primers were used but a linker sequence is present (e.g. 5' RACE SMARTer TAKARA protocol).", "fa_icon": "fas fa-dna" - } + }, + "umi_linker": { + "type": "string", + "description": "Path to fasta file containing umi-linker motifs, if no V-region primer has been used but a linker sequence is present with a residual sequence upstream of the UMI.", + "fa_icon": "fas fa-dna" + }, }, "fa_icon": "fas fa-flask" }, @@ -105,7 +123,10 @@ "default": "R1", "fa_icon": "fas fa-dna", "description": "Indicate if C region primers are in the R1 or R2 reads.", - "enum": ["R1", "R2"] + "enum": [ + "R1", + "R2" + ] }, "primer_revpr": { "type": "boolean", @@ -126,7 +147,10 @@ "default": "R1", "description": "Indicate if UMI indices are recorded in the R1 (default) or R1 fastq file.", "help_text": "The pipeline requires UMI barcodes for identifying unique transcripts. These barcodes are typically read from an index file but sometimes can be provided merged with the start of the R1 or R2 reads. If provided in an additional index file, set the `--index_file` parameter, if provided merged with the R1 or R2 reads, set the `--umi_position` parameter to R1 or R2, respectively.", - "enum": ["R1", "R2"], + "enum": [ + "R1", + "R2" + ], "fa_icon": "fas fa-barcode" }, "umi_length": { @@ -228,7 +252,12 @@ "type": "string", "default": "cut", "description": "Masking mode for the pRESTO MaskPrimer step. Available: cut, mask, trim, tag.", - "enum": ["cut", "mask", "tag", "trim"], + "enum": [ + "cut", + "mask", + "tag", + "trim" + ], "help_text": "The primer masking modes will perform the following actions:\n\n* `cut`: remove both the primer region and the preceding sequence.\n* `mask`: replace the primer region with Ns and remove the preceding sequence.\n* `trim`: remove the region preceding the primer, but leave the primer region intact.\n* `tag`: leave the input sequence unmodified.", "fa_icon": "fas fa-mask" }, @@ -400,14 +429,19 @@ "oneOf": [ { "type": "string", - "enum": ["auto"] + "enum": [ + "auto" + ] }, { "type": "number", "minimum": 0 } ], - "type": ["string", "number"], + "type": [ + "string", + "number" + ], "default": "auto", "fa_icon": "fab fa-pagelines", "description": "Set the clustering threshold Hamming distance value. Default: 'auto'" @@ -433,7 +467,10 @@ "type": "string", "default": "raxml", "description": "Lineage tree software to use to build trees within Dowser. If you change the default, also set the `lineage_tree_exec` parameter.", - "enum": ["raxml", "igphyml"], + "enum": [ + "raxml", + "igphyml" + ], "fa_icon": "fas fa-pagelines" }, "lineage_tree_exec": { @@ -654,7 +691,14 @@ "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", - "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "enum": [ + "symlink", + "rellink", + "link", + "copy", + "copyNoFollow", + "move" + ], "hidden": true }, "email_on_fail": { @@ -800,4 +844,4 @@ "$ref": "#/definitions/generic_options" } ] -} +} \ No newline at end of file From b09fabd7ba330444ecfe973ac39200baa35a2ee8 Mon Sep 17 00:00:00 2001 From: Justine Pollet Date: Tue, 6 Aug 2024 13:25:51 +0200 Subject: [PATCH 4/9] pipeline work with the new protocol --- conf/modules.config | 2 +- .../presto/presto_maskprimers_align_trim.nf | 4 ++-- subworkflows/local/presto_umi.nf | 17 ++++++++++++++--- subworkflows/local/sequence_assembly.nf | 1 + 4 files changed, 18 insertions(+), 6 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index bca500ef..d17d293b 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -145,7 +145,7 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] ext.args = '--skiprc --pf UMILINK' - ext.args2 = '-f ID UMILINK ERROR' + ext.args2 = '-f ID PRIMER ERROR' } withName: PRESTO_MASKPRIMERS_ALIGN { diff --git a/modules/local/presto/presto_maskprimers_align_trim.nf b/modules/local/presto/presto_maskprimers_align_trim.nf index 848959cf..041e46e4 100644 --- a/modules/local/presto/presto_maskprimers_align_trim.nf +++ b/modules/local/presto/presto_maskprimers_align_trim.nf @@ -13,7 +13,7 @@ process PRESTO_MASKPRIMERS_ALIGN_TRIM { path(umi_linker) output: - tuple val(meta), path("*_R1_primers-pass.fastq") , emit: reads + tuple val(meta), path("*_trim_R1_primers-pass.fastq") , emit: reads path "*_command_log_R1.txt", emit: logs path "*_R1.log" path "*.tab", emit: log_tab @@ -28,7 +28,7 @@ process PRESTO_MASKPRIMERS_ALIGN_TRIM { -p ${umi_linker} \\ --mode trim \\ $args \\ - --outname ${meta.id}_R1 \\ + --outname ${meta.id}_trim_R1 \\ --log ${meta.id}_R1.log > ${meta.id}_command_log_R1.txt ParseLog.py -l ${meta.id}_R1.log $args2 diff --git a/subworkflows/local/presto_umi.nf b/subworkflows/local/presto_umi.nf index d3016628..0708ceab 100644 --- a/subworkflows/local/presto_umi.nf +++ b/subworkflows/local/presto_umi.nf @@ -137,14 +137,25 @@ workflow PRESTO_UMI { ch_for_clustersets = PRESTO_PAIRSEQ_ALIGN.out.reads ch_pairseq_logs = PRESTO_PAIRSEQ_ALIGN.out.logs - } if (params.library_generation_method == 'specific_5p_race_umi') { + } else if (params.library_generation_method == 'specific_5p_race_umi') { + + ch_reads_R1 = PRESTO_FILTERSEQ_UMI.out.reads + .map{ reads -> [reads[0], reads[1]] }.dump(tag: 'ch_reads_R1') + // trim any sequence in R1 that is before UMI pattern-race linker sequence PRESTO_ALIGN_TRIM( ch_reads_R1, ch_umilinker.collect() ) + + // Merge again R1 and R2 by sample ID. + ch_maskprimers_trim_reads_R1 = PRESTO_ALIGN_TRIM.out.reads.map{ reads -> [reads[0].id, reads[0], reads[1]]}.dump(tag: 'ch_maskprimers_trim_reads_R1') + ch_filterseq_umi_reads_R2 = PRESTO_FILTERSEQ_UMI.out.reads.map{ reads -> [reads[0].id, reads[0], reads[2]]}.dump(tag: 'ch_filterseq_umi_reads_R2') + ch_reads_for_maskprimers_umi = ch_maskprimers_trim_reads_R1.join(ch_filterseq_umi_reads_R2) + .map{ it -> [it[1], it[2], it[4]] }.dump(tag: 'ch_reads_for_maskprimers_umi') + PRESTO_MASKPRIMERS_UMI ( - PRESTO_ALIGN_TRIM.out.reads, + ch_reads_for_maskprimers_umi, ch_cprimers.collect(), ch_vprimers.collect() ) @@ -160,7 +171,7 @@ workflow PRESTO_UMI { ch_for_clustersets = PRESTO_PAIRSEQ_UMI.out.reads ch_pairseq_logs = PRESTO_PAIRSEQ_UMI.out.logs - } else { + } else { PRESTO_MASKPRIMERS_UMI ( PRESTO_FILTERSEQ_UMI.out.reads, diff --git a/subworkflows/local/sequence_assembly.nf b/subworkflows/local/sequence_assembly.nf index 03d5f496..366fd2d9 100644 --- a/subworkflows/local/sequence_assembly.nf +++ b/subworkflows/local/sequence_assembly.nf @@ -238,6 +238,7 @@ workflow SEQUENCE_ASSEMBLY { ch_reads, ch_cprimers_fasta, ch_vprimers_fasta, + ch_umilinker_fasta, ch_adapter_fasta, ch_internal_cregion, ch_igblast.collect() From 441dcffe3ec144ffee858c09d4f557b4a442d146 Mon Sep 17 00:00:00 2001 From: Justine Pollet Date: Wed, 7 Aug 2024 13:14:27 +0200 Subject: [PATCH 5/9] pipeline work with new option --assemblepairs_join but with --failed parameter in nexflow user custom conf files for PRESTO_ASSEMBLEPAIRS_UMI process via args. --- conf/modules.config | 10 +++++ modules/local/presto/presto_assemblepairs.nf | 1 + .../local/presto/presto_assemblepairs_join.nf | 37 +++++++++++++++++++ nextflow_schema.json | 5 +++ subworkflows/local/presto_umi.nf | 26 +++++++++++++ 5 files changed, 79 insertions(+) create mode 100644 modules/local/presto/presto_assemblepairs_join.nf diff --git a/conf/modules.config b/conf/modules.config index d17d293b..e1db1ab1 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -259,6 +259,16 @@ process { ext.args = '--coord presto --rc tail --1f CONSCOUNT PRCONS --2f CONSCOUNT PRCONS' ext.args2 = '-f ID BARCODE SEQCOUNT PRIMER PRCOUNT PRCONS PRFREQ CONSCOUNT LENGTH OVERLAP ERROR PVALUE' } + + withName: PRESTO_ASSEMBLEPAIRS_JOIN { + publishDir = [ + path: { "${params.outdir}/presto/08-assemble-pairs-join/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + ext.args = '--coord presto --rc tail --1f CONSCOUNT PRCONS --2f CONSCOUNT PRCONS' + ext.args2 = '-f ID BARCODE SEQCOUNT PRIMER PRCOUNT PRCONS PRFREQ CONSCOUNT LENGTH OVERLAP ERROR PVALUE' + } withName: PRESTO_ASSEMBLEPAIRS_SEQUENTIAL { publishDir = [ diff --git a/modules/local/presto/presto_assemblepairs.nf b/modules/local/presto/presto_assemblepairs.nf index c6ba4287..3b46af3d 100644 --- a/modules/local/presto/presto_assemblepairs.nf +++ b/modules/local/presto/presto_assemblepairs.nf @@ -13,6 +13,7 @@ process PRESTO_ASSEMBLEPAIRS { output: tuple val(meta), path("*_assemble-pass.fastq"), emit: reads + tuple val(meta), path("*_assemble-fail.fastq"), emit: reads_fail path("*_command_log.txt"), emit: logs path("*.log") path("*_table.tab") diff --git a/modules/local/presto/presto_assemblepairs_join.nf b/modules/local/presto/presto_assemblepairs_join.nf new file mode 100644 index 00000000..a0c4e2b3 --- /dev/null +++ b/modules/local/presto/presto_assemblepairs_join.nf @@ -0,0 +1,37 @@ +process PRESTO_ASSEMBLEPAIRS_JOIN { + tag "$meta.id" + label 'process_long_parallelized' + label 'immcantation' + + conda "bioconda::presto=0.7.1" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/presto:0.7.1--pyhdfd78af_0' : + 'biocontainers/presto:0.7.1--pyhdfd78af_0' }" + + input: + tuple val(meta), path(R1), path(R2), path(reads_pass) + + output: + tuple val(meta), path("*_assemblejoin-pass.fastq"), emit: reads + path("*_command_log.txt"), emit: logs + path("*.log") + path("*_table.tab") + path "versions.yml" , emit: versions + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + """ + AssemblePairs.py join -1 $R1 -2 $R2 --nproc ${task.cpus} \\ + $args \\ + --outname ${meta.id}_join --log ${meta.id}_join.log > ${meta.id}_join_command_log.txt + ParseLog.py -l ${meta.id}_join.log $args2 + cp ${meta.id}_assemble-pass.fastq ${meta.id}_assemblejoin-pass.fastq + cat ${meta.id}_join_assemble-pass.fastq >> ${meta.id}_assemblejoin-pass.fastq + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + presto: \$( AssemblePairs.py --version | awk -F' ' '{print \$2}' ) + END_VERSIONS + """ +} diff --git a/nextflow_schema.json b/nextflow_schema.json index e9a6002c..d8d24ca0 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -313,6 +313,11 @@ "fa_icon": "fas fa-align-center", "description": "Use AssemblePairs sequential instead of AssemblePairs align when assembling read pairs." }, + "assemblepairs_join": { + "type": "boolean", + "fa_icon": "fas fa-align-center", + "description": "Use AssemblePairs join after AssemblePairs align to rescue non-overlapping reads by concatening them by their ends." + }, "align_cregion": { "type": "boolean", "fa_icon": "fas fa-align-center", diff --git a/subworkflows/local/presto_umi.nf b/subworkflows/local/presto_umi.nf index 0708ceab..9fd6d17d 100644 --- a/subworkflows/local/presto_umi.nf +++ b/subworkflows/local/presto_umi.nf @@ -21,6 +21,7 @@ include { PRESTO_BUILDCONSENSUS as PRESTO_BUILDCONSENSUS_UMI} from '../../mo include { PRESTO_BUILDCONSENSUS as PRESTO_BUILDCONSENSUS_ALIGN } from '../../modules/local/presto/presto_buildconsensus' include { PRESTO_POSTCONSENSUS_PAIRSEQ as PRESTO_POSTCONSENSUS_PAIRSEQ_UMI } from '../../modules/local/presto/presto_postconsensus_pairseq' include { PRESTO_ASSEMBLEPAIRS as PRESTO_ASSEMBLEPAIRS_UMI } from '../../modules/local/presto/presto_assemblepairs' +include { PRESTO_ASSEMBLEPAIRS_JOIN as PRESTO_ASSEMBLEPAIRS_JOIN_UMI } from '../../modules/local/presto/presto_assemblepairs_join' include { PRESTO_ASSEMBLEPAIRS_SEQUENTIAL } from '../../modules/local/presto/presto_assemblepairs_sequential' include { PRESTO_PARSEHEADERS as PRESTO_PARSEHEADERS_COLLAPSE_UMI } from '../../modules/local/presto/presto_parseheaders' include { PRESTO_PARSEHEADERS as PRESTO_PARSEHEADERS_CREGION } from '../../modules/local/presto/presto_parseheaders' @@ -244,6 +245,31 @@ workflow PRESTO_UMI { ch_versions = ch_versions.mix(PRESTO_ASSEMBLEPAIRS_SEQUENTIAL.out.versions) ch_assemblepairs_reads = PRESTO_ASSEMBLEPAIRS_SEQUENTIAL.out.reads ch_assemblepairs_logs = PRESTO_ASSEMBLEPAIRS_SEQUENTIAL.out.logs + } else if (params.assemblepairs_join) { + // Assemble read pairs align and get failed reads + PRESTO_ASSEMBLEPAIRS_UMI ( + PRESTO_POSTCONSENSUS_PAIRSEQ_UMI.out.reads + ) + + + // Merge R1 failed, R2 failed and assemblepairs pass reads by sample ID. + ch_assemblepairs_fail_reads = PRESTO_ASSEMBLEPAIRS_UMI.out.reads_fail.map{ reads -> [reads[0].id, reads[0], reads[1]]}.dump(tag: 'ch_assemblepairs_fail_reads') + + ch_assemblepairs_pass_reads = PRESTO_ASSEMBLEPAIRS_UMI.out.reads.map{ reads -> [reads[0].id, reads[0], reads[1]]}.dump(tag: 'ch_assemblepairs_pass_reads') + + ch_reads_for_assemblepairs_join_umi = ch_assemblepairs_fail_reads.join(ch_assemblepairs_pass_reads) + .map{ it -> [it[1], it[2][0], it[2][1], it[4]] }.dump(tag: 'ch_reads_for_assemblepairs_join_umi') + + // rescue no overlapping reads + PRESTO_ASSEMBLEPAIRS_JOIN_UMI ( + ch_reads_for_assemblepairs_join_umi + ) + + ch_versions = ch_versions.mix(PRESTO_ASSEMBLEPAIRS_JOIN_UMI.out.versions) + ch_assemblepairs_reads = PRESTO_ASSEMBLEPAIRS_JOIN_UMI.out.reads + ch_assemblepairs_logs = PRESTO_ASSEMBLEPAIRS_JOIN_UMI.out.logs + + } else { // Assemble read pairs align PRESTO_ASSEMBLEPAIRS_UMI ( From cae541622079d669e0445ea8df5c9a02cda50f41 Mon Sep 17 00:00:00 2001 From: Justine Pollet Date: Wed, 7 Aug 2024 16:08:45 +0200 Subject: [PATCH 6/9] default value for --assemblepairs_join option, PRESTO_ASSEMBLEPAIRS_UMI always output failed reads --- conf/modules.config | 4 ++-- modules/local/presto/presto_assemblepairs.nf | 2 +- nextflow.config | 1 + nextflow_schema.json | 1 + 4 files changed, 5 insertions(+), 3 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index e1db1ab1..9002520a 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -256,7 +256,7 @@ process { mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] - ext.args = '--coord presto --rc tail --1f CONSCOUNT PRCONS --2f CONSCOUNT PRCONS' + ext.args = '--coord presto --rc tail --1f CONSCOUNT PRCONS --2f CONSCOUNT PRCONS --failed' ext.args2 = '-f ID BARCODE SEQCOUNT PRIMER PRCOUNT PRCONS PRFREQ CONSCOUNT LENGTH OVERLAP ERROR PVALUE' } @@ -267,7 +267,7 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] ext.args = '--coord presto --rc tail --1f CONSCOUNT PRCONS --2f CONSCOUNT PRCONS' - ext.args2 = '-f ID BARCODE SEQCOUNT PRIMER PRCOUNT PRCONS PRFREQ CONSCOUNT LENGTH OVERLAP ERROR PVALUE' + ext.args2 = '-f ID LENGTH' } withName: PRESTO_ASSEMBLEPAIRS_SEQUENTIAL { diff --git a/modules/local/presto/presto_assemblepairs.nf b/modules/local/presto/presto_assemblepairs.nf index 3b46af3d..bc7d63f8 100644 --- a/modules/local/presto/presto_assemblepairs.nf +++ b/modules/local/presto/presto_assemblepairs.nf @@ -13,7 +13,7 @@ process PRESTO_ASSEMBLEPAIRS { output: tuple val(meta), path("*_assemble-pass.fastq"), emit: reads - tuple val(meta), path("*_assemble-fail.fastq"), emit: reads_fail + tuple val(meta), path("*_assemble-fail.fastq"),emit: reads_fail, optional: true path("*_command_log.txt"), emit: logs path("*.log") path("*_table.tab") diff --git a/nextflow.config b/nextflow.config index e51ff966..0026bd9b 100644 --- a/nextflow.config +++ b/nextflow.config @@ -68,6 +68,7 @@ params { // Assemble pairs assemblepairs_sequential = false + assemblepairs_join = false // internal cregion align_cregion = false diff --git a/nextflow_schema.json b/nextflow_schema.json index d8d24ca0..e53b2ce4 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -316,6 +316,7 @@ "assemblepairs_join": { "type": "boolean", "fa_icon": "fas fa-align-center", + "default": false, "description": "Use AssemblePairs join after AssemblePairs align to rescue non-overlapping reads by concatening them by their ends." }, "align_cregion": { From 2f0399c9bd71c75ccac8297bde13e7d94ee49574 Mon Sep 17 00:00:00 2001 From: Justine Pollet Date: Wed, 7 Aug 2024 18:19:36 +0200 Subject: [PATCH 7/9] qc graph not include join reads --- subworkflows/local/presto_umi.nf | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/presto_umi.nf b/subworkflows/local/presto_umi.nf index 9fd6d17d..a72a5635 100644 --- a/subworkflows/local/presto_umi.nf +++ b/subworkflows/local/presto_umi.nf @@ -267,9 +267,11 @@ workflow PRESTO_UMI { ch_versions = ch_versions.mix(PRESTO_ASSEMBLEPAIRS_JOIN_UMI.out.versions) ch_assemblepairs_reads = PRESTO_ASSEMBLEPAIRS_JOIN_UMI.out.reads - ch_assemblepairs_logs = PRESTO_ASSEMBLEPAIRS_JOIN_UMI.out.logs - + + // not include number of rescue reads + ch_assemblepairs_logs = PRESTO_ASSEMBLEPAIRS_UMI.out.logs + } else { // Assemble read pairs align PRESTO_ASSEMBLEPAIRS_UMI ( From ff3b048d8ea206bcaf8f9fd3c8dc8449603a7388 Mon Sep 17 00:00:00 2001 From: Justine Pollet Date: Thu, 8 Aug 2024 16:41:14 +0200 Subject: [PATCH 8/9] fix test profile --- subworkflows/local/sequence_assembly.nf | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/subworkflows/local/sequence_assembly.nf b/subworkflows/local/sequence_assembly.nf index 366fd2d9..586a41a1 100644 --- a/subworkflows/local/sequence_assembly.nf +++ b/subworkflows/local/sequence_assembly.nf @@ -67,6 +67,7 @@ workflow SEQUENCE_ASSEMBLY { // Validate library generation method parameter if (params.library_generation_method == 'specific_pcr_umi'){ + ch_umilinker_fasta = Channel.of([]) if (params.vprimers) { ch_vprimers_fasta = Channel.fromPath(params.vprimers, checkIfExists: true) } else { @@ -89,6 +90,7 @@ workflow SEQUENCE_ASSEMBLY { ch_internal_cregion = Channel.of([]) } } else if (params.library_generation_method == 'specific_pcr') { + ch_umilinker_fasta = Channel.of([]) if (params.vprimers) { ch_vprimers_fasta = Channel.fromPath(params.vprimers, checkIfExists: true) } else { @@ -111,6 +113,7 @@ workflow SEQUENCE_ASSEMBLY { error "Please do not set '--internal_cregion_sequences' when using the 'specific_pcr' library generation method without UMIs." } } else if (params.library_generation_method == 'dt_5p_race_umi') { + ch_umilinker_fasta = Channel.of([]) if (params.vprimers) { error "The oligo-dT 5'-RACE UMI library generation method does not accept V-region primers, please provide a linker with '--race_linker' instead or select another library method option." } else if (params.race_linker) { @@ -134,6 +137,7 @@ workflow SEQUENCE_ASSEMBLY { ch_internal_cregion = Channel.of([]) } } else if (params.library_generation_method == 'dt_5p_race') { + ch_umilinker_fasta = Channel.of([]) if (params.vprimers) { error "The oligo-dT 5'-RACE library generation method does not accept V-region primers, please provide a linker with '--race_linker' instead or select another library method option." } else if (params.race_linker) { From 3c02fd015397da67270f3d0aca746821a9eb1a6d Mon Sep 17 00:00:00 2001 From: Justine Pollet Date: Thu, 8 Aug 2024 17:23:03 +0200 Subject: [PATCH 9/9] update doc/usage.md --- docs/usage.md | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/docs/usage.md b/docs/usage.md index c3844547..7144a405 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -437,6 +437,27 @@ The UMI barcodes are typically read from an index file but sometimes can be prov - No UMIs in R1 or R2 reads: if no UMIs are present in the samples, specify `--umi_length 0` to use the sans-UMI subworkflow. +### 5’-RACE where R1 not starting directly by UMI + +This sequencing type requires setting `--library_generation_method specific_5p_race_umi`. + +A fasta file containing the UMI + race linker sequence pattern is required to locate and trim the sequence upstream of the UMI. + +```bash +nextflow run nf-core/airrflow -profile docker \ +--input samplesheet.tsv \ +--library_generation_method specific_5p_race_umi \ +--cprimers Cprimers.fasta \ +--race_linker linker.fasta \ +--umi_linker umi_pattern.fasta \ +--umi_position R1 \ +--umi_length 18 \ +--cprimer_start 0 \ +--cprimer_position R2 +--outdir ./results +``` + + ## Supported single cell library generation methods (protocols) When processing single cell sequencing data departing from raw `fastq` reads, currently only a `--library_generation_method` to support 10xGenomics data is available.