Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New bulk 5'-RACE supported protocol, non-overlaping reads rescue #343

Open
wants to merge 11 commits into
base: dev
Choose a base branch
from
22 changes: 21 additions & 1 deletion conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,16 @@ process {
]
}

withName: PRESTO_MASKPRIMERS_ALIGN_TRIM {
publishDir = [
path: { "${params.outdir}/presto/trim_upstream_umi_linker/${meta.id}" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
ext.args = '--skiprc --pf UMILINK'
ext.args2 = '-f ID PRIMER ERROR'
}

withName: PRESTO_MASKPRIMERS_ALIGN {
publishDir = [
path: { "${params.outdir}/presto/02-maskprimers/${meta.id}" },
Expand Down Expand Up @@ -246,9 +256,19 @@ process {
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
ext.args = '--coord presto --rc tail --1f CONSCOUNT PRCONS --2f CONSCOUNT PRCONS'
ext.args = '--coord presto --rc tail --1f CONSCOUNT PRCONS --2f CONSCOUNT PRCONS --failed'
ext.args2 = '-f ID BARCODE SEQCOUNT PRIMER PRCOUNT PRCONS PRFREQ CONSCOUNT LENGTH OVERLAP ERROR PVALUE'
}

withName: PRESTO_ASSEMBLEPAIRS_JOIN {
publishDir = [
path: { "${params.outdir}/presto/08-assemble-pairs-join/${meta.id}" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
ext.args = '--coord presto --rc tail --1f CONSCOUNT PRCONS --2f CONSCOUNT PRCONS'
ext.args2 = '-f ID LENGTH'
}

withName: PRESTO_ASSEMBLEPAIRS_SEQUENTIAL {
publishDir = [
Expand Down
21 changes: 21 additions & 0 deletions docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -444,6 +444,27 @@ The UMI barcodes are typically read from an index file but sometimes can be prov

- No UMIs in R1 or R2 reads: if no UMIs are present in the samples, specify `--umi_length 0` to use the sans-UMI subworkflow.

### 5’-RACE where R1 not starting directly by UMI

This sequencing type requires setting `--library_generation_method specific_5p_race_umi`.

A fasta file containing the UMI + race linker sequence pattern is required to locate and trim the sequence upstream of the UMI.

```bash
nextflow run nf-core/airrflow -profile docker \
--input samplesheet.tsv \
--library_generation_method specific_5p_race_umi \
--cprimers Cprimers.fasta \
--race_linker linker.fasta \
--umi_linker umi_pattern.fasta \
--umi_position R1 \
--umi_length 18 \
--cprimer_start 0 \
--cprimer_position R2
--outdir ./results
```


## Supported single cell library generation methods (protocols)

When processing single cell sequencing data departing from raw `fastq` reads, currently only a `--library_generation_method` to support 10xGenomics data is available.
Expand Down
1 change: 1 addition & 0 deletions modules/local/presto/presto_assemblepairs.nf
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ process PRESTO_ASSEMBLEPAIRS {

output:
tuple val(meta), path("*_assemble-pass.fastq"), emit: reads
tuple val(meta), path("*_assemble-fail.fastq"),emit: reads_fail, optional: true
path("*_command_log.txt"), emit: logs
path("*.log")
path("*_table.tab")
Expand Down
37 changes: 37 additions & 0 deletions modules/local/presto/presto_assemblepairs_join.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
process PRESTO_ASSEMBLEPAIRS_JOIN {
tag "$meta.id"
label 'process_long_parallelized'
label 'immcantation'

conda "bioconda::presto=0.7.1"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/presto:0.7.1--pyhdfd78af_0' :
'biocontainers/presto:0.7.1--pyhdfd78af_0' }"

input:
tuple val(meta), path(R1), path(R2), path(reads_pass)

output:
tuple val(meta), path("*_assemblejoin-pass.fastq"), emit: reads
path("*_command_log.txt"), emit: logs
path("*.log")
path("*_table.tab")
path "versions.yml" , emit: versions

script:
def args = task.ext.args ?: ''
def args2 = task.ext.args2 ?: ''
"""
AssemblePairs.py join -1 $R1 -2 $R2 --nproc ${task.cpus} \\
$args \\
--outname ${meta.id}_join --log ${meta.id}_join.log > ${meta.id}_join_command_log.txt
ParseLog.py -l ${meta.id}_join.log $args2
cp ${meta.id}_assemble-pass.fastq ${meta.id}_assemblejoin-pass.fastq
cat ${meta.id}_join_assemble-pass.fastq >> ${meta.id}_assemblejoin-pass.fastq

cat <<-END_VERSIONS > versions.yml
"${task.process}":
presto: \$( AssemblePairs.py --version | awk -F' ' '{print \$2}' )
END_VERSIONS
"""
}
40 changes: 40 additions & 0 deletions modules/local/presto/presto_maskprimers_align_trim.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
process PRESTO_MASKPRIMERS_ALIGN_TRIM {
tag "$meta.id"
label "process_high"
label 'immcantation'

conda "bioconda::presto=0.7.1"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/presto:0.7.1--pyhdfd78af_0' :
'biocontainers/presto:0.7.1--pyhdfd78af_0' }"

input:
tuple val(meta), path(R1)
path(umi_linker)

output:
tuple val(meta), path("*_trim_R1_primers-pass.fastq") , emit: reads
path "*_command_log_R1.txt", emit: logs
path "*_R1.log"
path "*.tab", emit: log_tab
path "versions.yml" , emit: versions

script:
def args = task.ext.args?: ''
def args2 = task.ext.args2?: ''
"""
MaskPrimers.py align --nproc ${task.cpus} \\
-s $R1 \\
-p ${umi_linker} \\
--mode trim \\
$args \\
--outname ${meta.id}_trim_R1 \\
--log ${meta.id}_R1.log > ${meta.id}_command_log_R1.txt
ParseLog.py -l ${meta.id}_R1.log $args2

cat <<-END_VERSIONS > versions.yml
"${task.process}":
presto: \$( MaskPrimers.py --version | awk -F' ' '{print \$2}' )
END_VERSIONS
"""
}
1 change: 1 addition & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ params {

// Assemble pairs
assemblepairs_sequential = false
assemblepairs_join = false

// internal cregion
align_cregion = false
Expand Down
63 changes: 53 additions & 10 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,10 @@
"type": "object",
"fa_icon": "fas fa-terminal",
"description": "Define where the pipeline should find input data and save output data.",
"required": ["input", "outdir"],
"required": [
"input",
"outdir"
],
"properties": {
"input": {
"type": "string",
Expand All @@ -27,7 +30,10 @@
"type": "string",
"default": "fastq",
"description": "Specify the processing mode for the pipeline. Available options are \"fastq\" and \"assembled\".",
"enum": ["fastq", "assembled"],
"enum": [
"fastq",
"assembled"
],
"fa_icon": "fas fa-terminal"
},
"outdir": {
Expand Down Expand Up @@ -75,7 +81,12 @@
"type": "string",
"description": "Path to fasta file containing the linker sequence, if no V-region primers were used but a linker sequence is present (e.g. 5' RACE SMARTer TAKARA protocol).",
"fa_icon": "fas fa-dna"
}
},
"umi_linker": {
"type": "string",
"description": "Path to fasta file containing umi-linker motifs, if no V-region primer has been used but a linker sequence is present with a residual sequence upstream of the UMI.",
"fa_icon": "fas fa-dna"
},
},
"fa_icon": "fas fa-flask"
},
Expand Down Expand Up @@ -112,7 +123,10 @@
"default": "R1",
"fa_icon": "fas fa-dna",
"description": "Indicate if C region primers are in the R1 or R2 reads.",
"enum": ["R1", "R2"]
"enum": [
"R1",
"R2"
]
},
"primer_revpr": {
"type": "boolean",
Expand All @@ -133,7 +147,10 @@
"default": "R1",
"description": "Indicate if UMI indices are recorded in the R1 (default) or R1 fastq file.",
"help_text": "The pipeline requires UMI barcodes for identifying unique transcripts. These barcodes are typically read from an index file but sometimes can be provided merged with the start of the R1 or R2 reads. If provided in an additional index file, set the `--index_file` parameter, if provided merged with the R1 or R2 reads, set the `--umi_position` parameter to R1 or R2, respectively.",
"enum": ["R1", "R2"],
"enum": [
"R1",
"R2"
],
"fa_icon": "fas fa-barcode"
},
"umi_length": {
Expand Down Expand Up @@ -235,7 +252,12 @@
"type": "string",
"default": "cut",
"description": "Masking mode for the pRESTO MaskPrimer step. Available: cut, mask, trim, tag.",
"enum": ["cut", "mask", "tag", "trim"],
"enum": [
"cut",
"mask",
"tag",
"trim"
],
"help_text": "The primer masking modes will perform the following actions:\n\n* `cut`: remove both the primer region and the preceding sequence.\n* `mask`: replace the primer region with Ns and remove the preceding sequence.\n* `trim`: remove the region preceding the primer, but leave the primer region intact.\n* `tag`: leave the input sequence unmodified.",
"fa_icon": "fas fa-mask"
},
Expand Down Expand Up @@ -291,6 +313,12 @@
"fa_icon": "fas fa-align-center",
"description": "Use AssemblePairs sequential instead of AssemblePairs align when assembling read pairs."
},
"assemblepairs_join": {
"type": "boolean",
"fa_icon": "fas fa-align-center",
"default": false,
"description": "Use AssemblePairs join after AssemblePairs align to rescue non-overlapping reads by concatening them by their ends."
},
"align_cregion": {
"type": "boolean",
"fa_icon": "fas fa-align-center",
Expand Down Expand Up @@ -410,14 +438,19 @@
"oneOf": [
{
"type": "string",
"enum": ["auto"]
"enum": [
"auto"
]
},
{
"type": "number",
"minimum": 0
}
],
"type": ["string", "number"],
"type": [
"string",
"number"
],
"default": "auto",
"fa_icon": "fab fa-pagelines",
"description": "Set the clustering threshold Hamming distance value. Default: 'auto'"
Expand All @@ -443,7 +476,10 @@
"type": "string",
"default": "raxml",
"description": "Lineage tree software to use to build trees within Dowser. If you change the default, also set the `lineage_tree_exec` parameter.",
"enum": ["raxml", "igphyml"],
"enum": [
"raxml",
"igphyml"
],
"fa_icon": "fas fa-pagelines"
},
"lineage_tree_exec": {
Expand Down Expand Up @@ -694,7 +730,14 @@
"description": "Method used to save pipeline results to output directory.",
"help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.",
"fa_icon": "fas fa-copy",
"enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"],
"enum": [
"symlink",
"rellink",
"link",
"copy",
"copyNoFollow",
"move"
],
"hidden": true
},
"email_on_fail": {
Expand Down
64 changes: 64 additions & 0 deletions subworkflows/local/presto_umi.nf
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ include { FASTP } from '../../modules/n
include { PRESTO_FILTERSEQ as PRESTO_FILTERSEQ_UMI } from '../../modules/local/presto/presto_filterseq'
include { PRESTO_MASKPRIMERS as PRESTO_MASKPRIMERS_UMI } from '../../modules/local/presto/presto_maskprimers'
include { PRESTO_MASKPRIMERS_ALIGN as PRESTO_ALIGN_PRIMERS } from '../../modules/local/presto/presto_maskprimers_align'
include { PRESTO_MASKPRIMERS_ALIGN_TRIM as PRESTO_ALIGN_TRIM } from '../../modules/local/presto/presto_maskprimers_align_trim'
include { PRESTO_MASKPRIMERS_EXTRACT } from '../../modules/local/presto/presto_maskprimers_extract'
include { PRESTO_MASKPRIMERS_ALIGN as PRESTO_ALIGN_CREGION } from '../../modules/local/presto/presto_maskprimers_align'
include { PRESTO_PAIRSEQ as PRESTO_PAIRSEQ_UMI } from '../../modules/local/presto/presto_pairseq'
Expand All @@ -20,6 +21,7 @@ include { PRESTO_BUILDCONSENSUS as PRESTO_BUILDCONSENSUS_UMI} from '../../mo
include { PRESTO_BUILDCONSENSUS as PRESTO_BUILDCONSENSUS_ALIGN } from '../../modules/local/presto/presto_buildconsensus'
include { PRESTO_POSTCONSENSUS_PAIRSEQ as PRESTO_POSTCONSENSUS_PAIRSEQ_UMI } from '../../modules/local/presto/presto_postconsensus_pairseq'
include { PRESTO_ASSEMBLEPAIRS as PRESTO_ASSEMBLEPAIRS_UMI } from '../../modules/local/presto/presto_assemblepairs'
include { PRESTO_ASSEMBLEPAIRS_JOIN as PRESTO_ASSEMBLEPAIRS_JOIN_UMI } from '../../modules/local/presto/presto_assemblepairs_join'
include { PRESTO_ASSEMBLEPAIRS_SEQUENTIAL } from '../../modules/local/presto/presto_assemblepairs_sequential'
include { PRESTO_PARSEHEADERS as PRESTO_PARSEHEADERS_COLLAPSE_UMI } from '../../modules/local/presto/presto_parseheaders'
include { PRESTO_PARSEHEADERS as PRESTO_PARSEHEADERS_CREGION } from '../../modules/local/presto/presto_parseheaders'
Expand All @@ -36,6 +38,7 @@ workflow PRESTO_UMI {
ch_reads // channel: [ val(meta), [ reads ] ]
ch_cprimers // channel: [ cprimers.fasta ]
ch_vprimers // channel: [ vprimers.fasta ]
ch_umilinker // channel: [ umi_linker.fasta ]
ch_adapter_fasta // channel: [ adapters.fasta ]
ch_internal_cregion // channel: [ internal_cregions.fasta ]
ch_igblast
Expand Down Expand Up @@ -135,6 +138,40 @@ workflow PRESTO_UMI {
ch_for_clustersets = PRESTO_PAIRSEQ_ALIGN.out.reads
ch_pairseq_logs = PRESTO_PAIRSEQ_ALIGN.out.logs

} else if (params.library_generation_method == 'specific_5p_race_umi') {

ch_reads_R1 = PRESTO_FILTERSEQ_UMI.out.reads
.map{ reads -> [reads[0], reads[1]] }.dump(tag: 'ch_reads_R1')

// trim any sequence in R1 that is before UMI pattern-race linker sequence
PRESTO_ALIGN_TRIM(
ch_reads_R1,
ch_umilinker.collect()
)

// Merge again R1 and R2 by sample ID.
ch_maskprimers_trim_reads_R1 = PRESTO_ALIGN_TRIM.out.reads.map{ reads -> [reads[0].id, reads[0], reads[1]]}.dump(tag: 'ch_maskprimers_trim_reads_R1')
ch_filterseq_umi_reads_R2 = PRESTO_FILTERSEQ_UMI.out.reads.map{ reads -> [reads[0].id, reads[0], reads[2]]}.dump(tag: 'ch_filterseq_umi_reads_R2')
ch_reads_for_maskprimers_umi = ch_maskprimers_trim_reads_R1.join(ch_filterseq_umi_reads_R2)
.map{ it -> [it[1], it[2], it[4]] }.dump(tag: 'ch_reads_for_maskprimers_umi')

PRESTO_MASKPRIMERS_UMI (
ch_reads_for_maskprimers_umi,
ch_cprimers.collect(),
ch_vprimers.collect()
)

ch_versions = ch_versions.mix(PRESTO_MASKPRIMERS_UMI.out.versions)
ch_maskprimers_logs = PRESTO_MASKPRIMERS_UMI.out.logs

// Pre-consensus pair
PRESTO_PAIRSEQ_UMI (
PRESTO_MASKPRIMERS_UMI.out.reads
)
ch_versions = ch_versions.mix(PRESTO_PAIRSEQ_UMI.out.versions)
ch_for_clustersets = PRESTO_PAIRSEQ_UMI.out.reads
ch_pairseq_logs = PRESTO_PAIRSEQ_UMI.out.logs

} else {

PRESTO_MASKPRIMERS_UMI (
Expand Down Expand Up @@ -208,6 +245,33 @@ workflow PRESTO_UMI {
ch_versions = ch_versions.mix(PRESTO_ASSEMBLEPAIRS_SEQUENTIAL.out.versions)
ch_assemblepairs_reads = PRESTO_ASSEMBLEPAIRS_SEQUENTIAL.out.reads
ch_assemblepairs_logs = PRESTO_ASSEMBLEPAIRS_SEQUENTIAL.out.logs
} else if (params.assemblepairs_join) {
// Assemble read pairs align and get failed reads
PRESTO_ASSEMBLEPAIRS_UMI (
PRESTO_POSTCONSENSUS_PAIRSEQ_UMI.out.reads
)


// Merge R1 failed, R2 failed and assemblepairs pass reads by sample ID.
ch_assemblepairs_fail_reads = PRESTO_ASSEMBLEPAIRS_UMI.out.reads_fail.map{ reads -> [reads[0].id, reads[0], reads[1]]}.dump(tag: 'ch_assemblepairs_fail_reads')

ch_assemblepairs_pass_reads = PRESTO_ASSEMBLEPAIRS_UMI.out.reads.map{ reads -> [reads[0].id, reads[0], reads[1]]}.dump(tag: 'ch_assemblepairs_pass_reads')

ch_reads_for_assemblepairs_join_umi = ch_assemblepairs_fail_reads.join(ch_assemblepairs_pass_reads)
.map{ it -> [it[1], it[2][0], it[2][1], it[4]] }.dump(tag: 'ch_reads_for_assemblepairs_join_umi')

// rescue no overlapping reads
PRESTO_ASSEMBLEPAIRS_JOIN_UMI (
ch_reads_for_assemblepairs_join_umi
)

ch_versions = ch_versions.mix(PRESTO_ASSEMBLEPAIRS_JOIN_UMI.out.versions)
ch_assemblepairs_reads = PRESTO_ASSEMBLEPAIRS_JOIN_UMI.out.reads


// not include number of rescue reads
ch_assemblepairs_logs = PRESTO_ASSEMBLEPAIRS_UMI.out.logs

} else {
// Assemble read pairs align
PRESTO_ASSEMBLEPAIRS_UMI (
Expand Down
Loading
Loading