Skip to content

Commit

Permalink
Merge pull request nf-core#1018 from scarlhoff/dsl2_ref_sheet
Browse files Browse the repository at this point in the history
DSL2: Pull input files from reference sheet columns
  • Loading branch information
scarlhoff authored Oct 27, 2023
2 parents bc9c7b5 + 6e2abeb commit 6cb0c30
Show file tree
Hide file tree
Showing 15 changed files with 387 additions and 126 deletions.
8 changes: 4 additions & 4 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,7 @@ process {
// READ MAPPING
//
withName: BWA_ALN {
tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" }
tag = { "${meta.id_index}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" }
ext.args = { "-n ${params.mapping_bwaaln_n} -k ${params.mapping_bwaaln_k} -l ${params.mapping_bwaaln_l} -o ${params.mapping_bwaaln_o}" }
ext.prefix = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}_${meta.reference}" }
publishDir = [
Expand All @@ -384,7 +384,7 @@ process {
}

withName: 'BWA_SAMSE|BWA_SAMPE' {
tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" }
tag = { "${meta.id_index}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" }
ext.args = { "-r '@RG\\tID:ILLUMINA-${meta.library_id}\\tSM:${meta.sample_id}\\tPL:illumina\\tPU:ILLUMINA-${meta.library_id}-${meta.strandedness}'" }
ext.prefix = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}_${meta.reference}" }
publishDir = [
Expand All @@ -403,7 +403,7 @@ process {
}

withName: ".*MAP:FASTQ_ALIGN_BWAALN:SAMTOOLS_INDEX" {
tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" }
tag = { "${meta.id_index}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" }
ext.args = { params.fasta_largeref ? "-c" : "" }
ext.prefix = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}_${meta.reference}" }
publishDir = [
Expand Down Expand Up @@ -828,7 +828,7 @@ process {
]
}

withName: "QUALIMAP_BAMQC" {
withName: 'QUALIMAP_BAMQC_WITHBED|QUALIMAP_BAMQC_NOBED' {
tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" }
publishDir = [
path: { "${params.outdir}/mapstats/qualimap/${meta.reference}/${meta.sample_id}/}" },
Expand Down
3 changes: 3 additions & 0 deletions conf/test_humanbam.config
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ params {
contamination_estimation_angsd_mapq = 0
contamination_estimation_angsd_minq = 0

// Qualimap
snpcapture_bed = 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/reference/Human/1240K.pos.list_hs37d5.0based.bed.gz'

// TODO Reactivate sexDet and genotyping params when those steps get implemented.
// //Sex Determination
// sexdeterrmine_bedfile = 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/reference/Human/1240K.pos.list_hs37d5.0based.bed.gz'
Expand Down
2 changes: 1 addition & 1 deletion conf/test_multiref.config
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ params {
input = 'https://github.com/nf-core/test-datasets/raw/eager/testdata/Mammoth/samplesheet_multilane_multilib.tsv'

// Genome references
fasta = 'https://github.com/jfy133/nf-core-test-datasets/raw/eager/reference/reference_sheet_multiref.csv'
fasta = 'https://github.com/nf-core/test-datasets/raw/eager/reference/reference_sheet_multiref.csv'

// BAM filtering
run_bamfiltering = true
Expand Down
7 changes: 6 additions & 1 deletion docs/development/manual_tests.md
Original file line number Diff line number Diff line change
Expand Up @@ -94,10 +94,11 @@ Tool Specific combinations
- with stricter threshold

- BAM trimming

- with default parameters
- different length by udg treatment

- All together
- All together

### Multi-reference tests

Expand Down Expand Up @@ -145,6 +146,10 @@ nextflow run ../main.nf -profile singularity,test --outdir ./results --input sam
## Test: (11) Broken path correctly fails pipeline ✅
## Expect: Expect fail
nextflow run ../main.nf -profile singularity,test --outdir ./results --input samplesheet.tsv --fasta reference_sheet_multiref_test11.csv -ansi-log false -dump-channels --save_reference

# Test: File input via reference sheet
# Expect: Qualimap with bed, mtnucratio and angsd successful and bedtools not run for hs37d5, qualimap without bed file, mtnucratio and bedtools successful and angsd not run for Mammoth_MT
nextflow run main.nf -profile test_multiref,docker --outdir ./results --run_bedtools_coverage --run_contamination_estimation_angsd --run_mtnucratio
```

### AdapterRemoval
Expand Down
2 changes: 1 addition & 1 deletion modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@
},
"fastq_align_bwaaln": {
"branch": "master",
"git_sha": "a9784afdd5dcda23b84e64db75dc591065d64653",
"git_sha": "e2c81fea3daeacfa190f78d2b82f82361b734507",
"installed_by": ["subworkflows"]
}
}
Expand Down
2 changes: 1 addition & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ params {
fasta_dict = null
fasta_mapperindexdir = null
fasta_circular_target = null
fasta_mitochondrion_header = null
fasta_largeref = false

// References
Expand Down Expand Up @@ -219,6 +218,7 @@ try {
// Additional configs for subworkflows
includeConfig 'subworkflows/nf-core/bam_split_by_region/nextflow.config'
includeConfig 'subworkflows/nf-core/bam_docounts_contamination_angsd/nextflow.config'
includeConfig 'subworkflows/nf-core/fastq_align_bwaaln/nextflow.config'

profiles {
debug {
Expand Down
6 changes: 0 additions & 6 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -111,12 +111,6 @@
"description": "Specify the FASTA header of the target chromosome to extend. Only applies when using `circularmapper`.",
"help_text": "The entry (chromosome, contig, etc.) in your FASTA reference that you'd like to be treated as circular.\n\nApplies only when providing a single FASTA file via `--fasta` (NOT multi-reference input - see reference TSV/CSV input).\n\n> Modifies tool parameter(s):\n> - circulargenerator `-s`\n",
"fa_icon": "fas fa-bullseye"
},
"fasta_mitochondrion_header": {
"type": "string",
"fa_icon": "fas fa-tag",
"description": "Specify the name of the reference FASTA entry corresponding to the mitochondrial genome, up to the first space. Only applies when using `--run_mtnucratio`.",
"help_text": "Specify the FASTA entry in the reference file specified as `--fasta` that acts as the mitochondrial 'chromosome' to base a mitochondrial-to-nuclear ratio calculation on. \n\nThe tool only accepts the first section of the header before the first space. For example, mitochondrion chromosome name is `MT` for the hs37d5/GrCH37 human reference genome.\n"
}
}
},
Expand Down
51 changes: 34 additions & 17 deletions subworkflows/local/map.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@
// Prepare reference indexing for downstream
//

include { FASTQ_ALIGN_BWAALN } from '../../subworkflows/nf-core/fastq_align_bwaaln/main'
include { BWA_MEM } from '../../modules/nf-core/bwa/mem/main'
include { BOWTIE2_ALIGN } from '../../modules/nf-core/bowtie2/align/main'
include { SAMTOOLS_MERGE as SAMTOOLS_MERGE_LANES } from '../../modules/nf-core/samtools/merge/main'
include { SAMTOOLS_SORT as SAMTOOLS_SORT_MERGED_LANES } from '../../modules/nf-core/samtools/sort/main'
include { FASTQ_ALIGN_BWAALN } from '../../subworkflows/nf-core/fastq_align_bwaaln/main'
include { BWA_MEM } from '../../modules/nf-core/bwa/mem/main'
include { BOWTIE2_ALIGN } from '../../modules/nf-core/bowtie2/align/main'
include { SAMTOOLS_MERGE as SAMTOOLS_MERGE_LANES } from '../../modules/nf-core/samtools/merge/main'
include { SAMTOOLS_SORT as SAMTOOLS_SORT_MERGED_LANES } from '../../modules/nf-core/samtools/sort/main'
include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_MEM; SAMTOOLS_INDEX as SAMTOOLS_INDEX_BT2; SAMTOOLS_INDEX as SAMTOOLS_INDEX_MERGED_LANES } from '../../modules/nf-core/samtools/index/main'
include { SAMTOOLS_FLAGSTAT as SAMTOOLS_FLAGSTAT_MAPPED } from '../../modules/nf-core/samtools/flagstat/main'
include { SAMTOOLS_FLAGSTAT as SAMTOOLS_FLAGSTAT_MAPPED } from '../../modules/nf-core/samtools/flagstat/main'

workflow MAP {
take:
Expand All @@ -19,24 +19,32 @@ workflow MAP {
ch_versions = Channel.empty()
ch_multiqc_files = Channel.empty()

ch_input_for_mapping = reads
.combine(index)
.multiMap {
meta, reads, meta2, index ->
new_meta = meta.clone()
new_meta.reference = meta2.id
reads: [ new_meta, reads ]
index: [ meta2, index]
}

if ( params.mapping_tool == 'bwaaln' ) {
FASTQ_ALIGN_BWAALN ( ch_input_for_mapping.reads, ch_input_for_mapping.index )
ch_index_for_mapping = index
ch_reads_for_mapping = reads

FASTQ_ALIGN_BWAALN ( ch_reads_for_mapping, ch_index_for_mapping )
ch_versions = ch_versions.mix ( FASTQ_ALIGN_BWAALN.out.versions.first() )
ch_mapped_lane_bam = FASTQ_ALIGN_BWAALN.out.bam
.map{
// create meta consistent with rest of workflow
meta, bam ->
new_meta = meta + [ reference: meta.id_index ]
[ new_meta, bam ]
}

ch_mapped_lane_bai = params.fasta_largeref ? FASTQ_ALIGN_BWAALN.out.csi : FASTQ_ALIGN_BWAALN.out.bai

} else if ( params.mapping_tool == 'bwamem' ) {
ch_input_for_mapping = reads
.combine( index )
.multiMap {
meta, reads, meta2, index ->
new_meta = meta + [ reference: meta2.id ]
reads: [ new_meta, reads ]
index: [ meta2, index ]
}

BWA_MEM ( ch_input_for_mapping.reads, ch_input_for_mapping.index, true )
ch_versions = ch_versions.mix ( BWA_MEM.out.versions.first() )
ch_mapped_lane_bam = BWA_MEM.out.bam
Expand All @@ -46,6 +54,15 @@ workflow MAP {
ch_mapped_lane_bai = params.fasta_largeref ? SAMTOOLS_INDEX_MEM.out.csi : SAMTOOLS_INDEX_MEM.out.bai

} else if ( params.mapping_tool == 'bowtie2' ) {
ch_input_for_mapping = reads
.combine( index )
.multiMap {
meta, reads, meta2, index ->
new_meta = meta + [ reference: meta2.id ]
reads: [ new_meta, reads ]
index: [ meta2, index ]
}

BOWTIE2_ALIGN ( ch_input_for_mapping.reads, ch_input_for_mapping.index, false, true )
ch_versions = ch_versions.mix ( BOWTIE2_ALIGN.out.versions.first() )
ch_mapped_lane_bam = BOWTIE2_ALIGN.out.bam
Expand Down
62 changes: 60 additions & 2 deletions subworkflows/local/reference_indexing.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

include { REFERENCE_INDEXING_SINGLE } from '../../subworkflows/local/reference_indexing_single.nf'
include { REFERENCE_INDEXING_MULTI } from '../../subworkflows/local/reference_indexing_multi.nf'
include { GUNZIP as GUNZIP_SNPBED } from '../../modules/nf-core/gunzip/main.nf'

workflow REFERENCE_INDEXING {
take:
Expand All @@ -17,20 +18,77 @@ workflow REFERENCE_INDEXING {

// Warn user if they've given a reference sheet that already includes fai/dict/mapper index etc.
if ( ( fasta.extension == 'csv' || fasta.extension == 'tsv' ) && (fasta_fai || fasta_dict || fasta_mapperindexdir)) log.warn("A TSV or CSV has been supplied to `--fasta` as well as e.g. `--fasta_fai`. --fasta CSV/TSV takes priority and --fasta_* parameters will be ignored.")
if ( ( fasta.extension == 'csv' || fasta.extension == 'tsv' ) && (params.mitochondrion_header || params.contamination_estimation_angsd_hapmap || params.damage_manipulation_pmdtools_reference_mask || params.snpcapture_bed || params.mapstats_bedtools_featurefile )) log.warn("A TSV or CSV has been supplied to `--fasta` as well as individual reference-specific input files, e.g. `--contamination_estimation_angsd_hapmap`. Input files specified in the --fasta CSV/TSV take priority and other input parameters will be ignored.")

if ( fasta.extension == 'csv' || fasta.extension == 'tsv' ) {
// If input (multi-)reference sheet supplied
REFERENCE_INDEXING_MULTI ( fasta )
ch_reference_for_mapping = REFERENCE_INDEXING_MULTI.out.reference
ch_mitochondrion_header = REFERENCE_INDEXING_MULTI.out.mitochondrion_header
ch_hapmap = REFERENCE_INDEXING_MULTI.out.hapmap
ch_pmd_mask = REFERENCE_INDEXING_MULTI.out.pmd_mask
ch_snp_capture_bed = REFERENCE_INDEXING_MULTI.out.snp_capture_bed
ch_pileupcaller_snp = REFERENCE_INDEXING_MULTI.out.pileupcaller_snp
ch_sexdeterrmine_bed = REFERENCE_INDEXING_MULTI.out.sexdeterrmine_bed
ch_bedtools_feature = REFERENCE_INDEXING_MULTI.out.bedtools_feature
ch_versions = ch_versions.mix( REFERENCE_INDEXING_MULTI.out.versions )
} else {
// If input FASTA and/or indicies supplied
REFERENCE_INDEXING_SINGLE ( fasta, fasta_fai, fasta_dict, fasta_mapperindexdir )
ch_mitochondrion_header = REFERENCE_INDEXING_SINGLE.out.mitochondrion_header
ch_hapmap = REFERENCE_INDEXING_SINGLE.out.hapmap
ch_pmd_mask = REFERENCE_INDEXING_SINGLE.out.pmd_mask
ch_snp_capture_bed = REFERENCE_INDEXING_SINGLE.out.snp_capture_bed
ch_pileupcaller_snp = REFERENCE_INDEXING_SINGLE.out.pileupcaller_snp
ch_sexdeterrmine_bed = REFERENCE_INDEXING_SINGLE.out.sexdeterrmine_bed
ch_bedtools_feature = REFERENCE_INDEXING_SINGLE.out.bedtools_feature
ch_reference_for_mapping = REFERENCE_INDEXING_SINGLE.out.reference
ch_versions = ch_versions.mix( REFERENCE_INDEXING_SINGLE.out.versions )
}

// Filter out input options that are not provided
ch_mitochondrion_header = ch_mitochondrion_header
.filter{ it[1] != "" }

ch_hapmap = ch_hapmap
.filter{ it[1] != "" }

ch_pmd_mask = ch_pmd_mask
.filter{ it[1] != "" && it[2] != "" }

ch_capture_bed = ch_snp_capture_bed //optional
.branch {
meta, capture_bed ->
input: capture_bed != ""
skip: true
}
ch_capture_bed_gunzip = ch_capture_bed.input //unzip
.branch {
meta, capture_bed ->
forgunzip: capture_bed.extension == "gz"
skip: true
}
GUNZIP_SNPBED( ch_capture_bed_gunzip.forgunzip )
ch_capture_bed = GUNZIP_SNPBED.out.gunzip.mix( ch_capture_bed_gunzip.skip ).mix( ch_capture_bed.skip )

ch_pileupcaller_snp = ch_pileupcaller_snp
.filter{ it[1] != "" && it[2] != "" }

ch_sexdeterrmine_bed = ch_sexdeterrmine_bed
.filter{ it[1] != "" }

ch_bedtools_feature = ch_bedtools_feature
.filter{ it[1] != "" }

emit:
reference = ch_reference_for_mapping // [ meta, fasta, fai, dict, mapindex ]
versions = ch_versions
reference = ch_reference_for_mapping // [ meta, fasta, fai, dict, mapindex, circular_target ]
mitochondrion_header = ch_mitochondrion_header // [ meta, mitochondrion_header ]
hapmap = ch_hapmap // [ meta, hapmap ]
pmd_mask = ch_pmd_mask // [ meta, masked_fasta, capture_bed ]
snp_capture_bed = ch_capture_bed // [ meta, capture_bed ]
pileupcaller_snp = ch_pileupcaller_snp // [ meta, pileupcaller_bed, pileupcaller_snp ]
sexdeterrmine_bed = ch_sexdeterrmine_bed // [ meta, sexdet_bed ]
bedtools_feature = ch_bedtools_feature // [ meta, bedtools_feature ]
versions = ch_versions

}
Loading

0 comments on commit 6cb0c30

Please sign in to comment.