diff --git a/modules/nf-core/bcftools/concat/main.nf b/modules/nf-core/bcftools/concat/main.nf index 244a42ccf05..ba0ec1e3296 100644 --- a/modules/nf-core/bcftools/concat/main.nf +++ b/modules/nf-core/bcftools/concat/main.nf @@ -8,23 +8,32 @@ process BCFTOOLS_CONCAT { 'biocontainers/bcftools:1.17--haef29d1_0' }" input: - tuple val(meta), path(vcfs), path(tbi) + tuple val(meta), path(vcfs), path(tbis) + path(bed) output: - tuple val(meta), path("*.gz"), emit: vcf - path "versions.yml" , emit: versions + tuple val(meta), path("*.${extension}") , emit: vcf + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: def args = task.ext.args ?: '' - prefix = task.ext.prefix ?: "${meta.id}" + def prefix = task.ext.prefix ?: "${meta.id}" + def regions = bed ? "--regions-file ${bed} --allow-overlaps" : '' // --allow-overlaps is required for bcftools concat to work with bed files + + extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf" """ bcftools concat \\ - --output ${prefix}.vcf.gz \\ - $args \\ - --threads $task.cpus \\ + --output ${prefix}.${extension} \\ + ${args} \\ + ${regions} \\ + --threads ${task.cpus} \\ ${vcfs} cat <<-END_VERSIONS > versions.yml @@ -34,9 +43,16 @@ process BCFTOOLS_CONCAT { """ stub: - prefix = task.ext.prefix ?: "${meta.id}" + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf" """ - touch ${prefix}.vcf.gz + touch ${prefix}.${extension} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/bcftools/concat/meta.yml b/modules/nf-core/bcftools/concat/meta.yml index 8731b17bc8c..e670a4bf23b 100644 --- a/modules/nf-core/bcftools/concat/meta.yml +++ b/modules/nf-core/bcftools/concat/meta.yml @@ -20,15 +20,20 @@ input: Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - vcfs: - type: files + type: list description: | List containing 2 or more vcf files e.g. [ 'file1.vcf', 'file2.vcf' ] - - tbi: - type: files + - tbis: + type: list description: | List containing 2 or more index files (optional) e.g. [ 'file1.tbi', 'file2.tbi' ] + - bed: + type: file + description: | + A BED file containing the regions to be concatenated (optional) + output: - meta: type: map @@ -38,7 +43,7 @@ output: - vcf: type: file description: VCF concatenated output file - pattern: "*.{vcf.gz}" + pattern: "*.{vcf.gz,vcf,bcf,bcf.gz}" - versions: type: file description: File containing software versions diff --git a/subworkflows/nf-core/vcf_annotate_ensemblvep_snpeff/main.nf b/subworkflows/nf-core/vcf_annotate_ensemblvep_snpeff/main.nf index 91cace4d1d9..235675c4809 100644 --- a/subworkflows/nf-core/vcf_annotate_ensemblvep_snpeff/main.nf +++ b/subworkflows/nf-core/vcf_annotate_ensemblvep_snpeff/main.nf @@ -142,7 +142,8 @@ workflow VCF_ANNOTATE_ENSEMBLVEP_SNPEFF { .map { it + [[]] } BCFTOOLS_CONCAT( - ch_concat_input + ch_concat_input, + [] ) ch_versions = ch_versions.mix(BCFTOOLS_CONCAT.out.versions.first()) diff --git a/subworkflows/nf-core/vcf_gather_bcftools/main.nf b/subworkflows/nf-core/vcf_gather_bcftools/main.nf index 88f6de045fc..57fc72c28f9 100644 --- a/subworkflows/nf-core/vcf_gather_bcftools/main.nf +++ b/subworkflows/nf-core/vcf_gather_bcftools/main.nf @@ -5,8 +5,9 @@ include { TABIX_TABIX } from '../../../modules/nf-core/tabix/tabix/main' workflow VCF_GATHER_BCFTOOLS { take: - ch_vcfs // channel: [ meta, vcf, tbi ] - ch_scatter_output // channel: [ meta, bed, gather_count ] => output from the scatter subworkflow, if you didn't use this subworkflow you can just use `[]` as bed since it isn't used + ch_vcfs // channel: [ val(meta), path(vcf), path(tbi) ] + ch_scatter_count // channel: [ val(meta), val(gather_count) ] => The scatter count per group of input files + ch_bed // channel: [ path(bed) ] => The BED file to be used by bcftools concat val_common_meta // string: The name of the meta field that should become the new id val_sort // boolean: Whether or not the output file should be sorted !! Add the config when using sort !! @@ -14,18 +15,18 @@ workflow VCF_GATHER_BCFTOOLS { ch_versions = Channel.empty() - ch_concat_input = ch_vcfs.join(ch_scatter_output) - .map{ meta, vcf, tbi, bed, gather_count -> - meta = val_common_meta ? meta + [id:meta[val_common_meta]] : meta - [ groupKey(meta, gather_count), vcf, tbi ] + ch_concat_input = ch_vcfs.join(ch_scatter_count) + .map{ meta, vcf, tbi, gather_count -> + new_meta = val_common_meta ? meta + [id:meta[val_common_meta]] : meta + [ groupKey(new_meta, gather_count), vcf, tbi ] }.groupTuple() - BCFTOOLS_CONCAT ( ch_concat_input ) - ch_versions = ch_versions.mix(BCFTOOLS_CONCAT.out.versions) + BCFTOOLS_CONCAT ( ch_concat_input, ch_bed ) + ch_versions = ch_versions.mix(BCFTOOLS_CONCAT.out.versions.first()) if (val_sort) { BCFTOOLS_SORT(BCFTOOLS_CONCAT.out.vcf) - ch_versions = ch_versions.mix(BCFTOOLS_SORT.out.versions) + ch_versions = ch_versions.mix(BCFTOOLS_SORT.out.versions.first()) ch_tabix_input = BCFTOOLS_SORT.out.vcf @@ -34,11 +35,11 @@ workflow VCF_GATHER_BCFTOOLS { } TABIX_TABIX ( ch_tabix_input ) - ch_versions = ch_versions.mix(TABIX_TABIX.out.versions) + ch_versions = ch_versions.mix(TABIX_TABIX.out.versions.first()) emit: - vcf = ch_tabix_input // channel: [ val(meta), [ vcf ] ] - tbi = TABIX_TABIX.out.tbi // channel: [ val(meta), [ tbi ] ] + vcf = ch_tabix_input // channel: [ val(meta), path(vcf) ] + tbi = TABIX_TABIX.out.tbi // channel: [ val(meta), path(tbi) ] versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/nf-core/vcf_gather_bcftools/meta.yml b/subworkflows/nf-core/vcf_gather_bcftools/meta.yml index 1e2240b159c..58d38bf44a8 100644 --- a/subworkflows/nf-core/vcf_gather_bcftools/meta.yml +++ b/subworkflows/nf-core/vcf_gather_bcftools/meta.yml @@ -15,52 +15,40 @@ components: - bcftools/concat - tabix/tabix input: - - meta: - type: map - description: | - Groovy Map containing at least two fields: 'id' and a common field for each VCF that needs to be merged - e.g. [ id:'test.001', common_meta:'test' ] - ch_vcfs: - type: file(s) description: | VCF files and their indices that should be concatenated - Structure: [ meta, vcf, tbi ] - - ch_scatter_output: - type: file and integer + Structure: [ val(meta), path(vcf), path(tbi) ] + - ch_scatter_count: + description: | + A channel containing the scatter count for each input sample + Structure: [ val(meta), val(gather_count) ] + - ch_bed: + type: file(s) description: | - The output created from a scatter subworkflow (contains a BED file and scatter_count). - The BED file isn't actually used in the pipeline so can remain empty if you manually set this input channel - Structure: [ meta, bed, scatter_count ] - - common_meta: + The BED file to be used by bcftools concat + Structure: [ path(bed) ] + - val_common_meta: type: string description: | OPTIONAL: The string of the common meta to use as the new 'id'. Please make sure all VCFs that need to be concatenated have the same value in the the meta field specified. (and that only the ID is different between these files) - - sort: + - val_sort: type: boolean description: | Whether or not to sort the output VCF, this can be useful if this subworkflow isn't used in a scatter/gather workflow output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test' ] - vcf: - type: file description: | The concatenated (and possible sorted) VCF file - Structure: [ meta, vcf ] - pattern: "*.vcf.gz" + Structure: [ val(meta), path(vcf) ] - tbi: - type: file description: | The indices of the output VCFs - Structure: [ meta, tbi ] - pattern: "*.vcf.gz.tbi" + Structure: [ val(meta), path(tbi) ] - versions: type: file description: File containing software versions diff --git a/tests/modules/nf-core/bcftools/concat/main.nf b/tests/modules/nf-core/bcftools/concat/main.nf index 582af6db550..cc570dd1b41 100644 --- a/tests/modules/nf-core/bcftools/concat/main.nf +++ b/tests/modules/nf-core/bcftools/concat/main.nf @@ -4,25 +4,40 @@ nextflow.enable.dsl = 2 include { BCFTOOLS_CONCAT } from '../../../../../modules/nf-core/bcftools/concat/main.nf' -workflow test_bcftools_concat_tbi { +workflow test_bcftools_concat { - input = [ [ id:'test3' ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test2_vcf_gz'], checkIfExists: true) ], - [ file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test2_vcf_gz_tbi'], checkIfExists: true) ] - ] - - BCFTOOLS_CONCAT ( input ) + input = [ + [ id:'test' ], // meta map + [ + file(params.test_data['homo_sapiens']['illumina']['test_haplotc_cnn_vcf_gz'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz'], checkIfExists: true) + ], + [ + file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz_tbi'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_haplotc_cnn_vcf_gz_tbi'], checkIfExists: true) + ] + ] + + bed = [] + + BCFTOOLS_CONCAT ( input, bed ) } -workflow test_bcftools_concat_no_tbi { +workflow test_bcftools_concat_bed { - input = [ [ id:'test3' ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test2_vcf_gz'], checkIfExists: true) ], - [] - ] + input = [ + [ id:'test' ], // meta map + [ + file(params.test_data['homo_sapiens']['illumina']['test_haplotc_cnn_vcf_gz'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz'], checkIfExists: true) + ], + [ + file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz_tbi'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_haplotc_cnn_vcf_gz_tbi'], checkIfExists: true) + ] + ] - BCFTOOLS_CONCAT ( input ) -} + bed = file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) + + BCFTOOLS_CONCAT ( input, bed ) +} \ No newline at end of file diff --git a/tests/modules/nf-core/bcftools/concat/nextflow.config b/tests/modules/nf-core/bcftools/concat/nextflow.config index 3f0d064a7bc..5b8a99a4cad 100644 --- a/tests/modules/nf-core/bcftools/concat/nextflow.config +++ b/tests/modules/nf-core/bcftools/concat/nextflow.config @@ -3,7 +3,7 @@ process { publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } withName: BCFTOOLS_CONCAT { - ext.args = '--no-version' + ext.args = '--no-version --output-type z' } } diff --git a/tests/modules/nf-core/bcftools/concat/test.yml b/tests/modules/nf-core/bcftools/concat/test.yml index e587598fb92..2c3002b0410 100644 --- a/tests/modules/nf-core/bcftools/concat/test.yml +++ b/tests/modules/nf-core/bcftools/concat/test.yml @@ -1,25 +1,19 @@ -- name: bcftools concat test_bcftools_concat_tbi - command: nextflow run ./tests/modules/nf-core/bcftools/concat -entry test_bcftools_concat_tbi -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/bcftools/concat/nextflow.config +- name: bcftools concat test_bcftools_concat + command: nextflow run ./tests/modules/nf-core/bcftools/concat -entry test_bcftools_concat -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/bcftools/concat/nextflow.config tags: - bcftools - bcftools/concat files: - - path: output/bcftools/test3.vcf.gz - md5sum: 18c1612343f5e8a219ee6476a870a674 + - path: output/bcftools/test.vcf.gz + md5sum: 4bcd0afd89f56c5d433f6b6abc44d0a6 + - path: output/bcftools/versions.yml -- name: bcftools concat test_bcftools_concat_no_tbi - command: nextflow run ./tests/modules/nf-core/bcftools/concat -entry test_bcftools_concat_no_tbi -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/bcftools/concat/nextflow.config +- name: bcftools concat test_bcftools_concat_bed + command: nextflow run ./tests/modules/nf-core/bcftools/concat -entry test_bcftools_concat_bed -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/bcftools/concat/nextflow.config tags: - bcftools - bcftools/concat files: - - path: output/bcftools/test3.vcf.gz - md5sum: 18c1612343f5e8a219ee6476a870a674 - -- name: bcftools concat test_bcftools_concat_tbi_stub - command: nextflow run ./tests/modules/nf-core/bcftools/concat -entry test_bcftools_concat_tbi -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/bcftools/concat/nextflow.config -stub - tags: - - bcftools - - bcftools/concat - files: - - path: output/bcftools/test3.vcf.gz + - path: output/bcftools/test.vcf.gz + md5sum: 094920f3d5aac3d273c94f93a0e76fd1 + - path: output/bcftools/versions.yml diff --git a/tests/subworkflows/nf-core/vcf_annotate_ensemblvep_snpeff/nextflow.config b/tests/subworkflows/nf-core/vcf_annotate_ensemblvep_snpeff/nextflow.config index fa4f5b43091..8ce63482526 100644 --- a/tests/subworkflows/nf-core/vcf_annotate_ensemblvep_snpeff/nextflow.config +++ b/tests/subworkflows/nf-core/vcf_annotate_ensemblvep_snpeff/nextflow.config @@ -17,4 +17,13 @@ process { ].join(" ")} } + withName: SNPEFF_SNPEFF { + container = 'docker.io/nfcore/snpeff:5.0.WBcel235' + } + + withName: BCFTOOLS_CONCAT { + ext.prefix = { "${meta.id}_concat" } + ext.args = "--output-type z" + } + } diff --git a/tests/subworkflows/nf-core/vcf_gather_bcftools/main.nf b/tests/subworkflows/nf-core/vcf_gather_bcftools/main.nf index 32d5939ba93..f6e9cf97f3e 100644 --- a/tests/subworkflows/nf-core/vcf_gather_bcftools/main.nf +++ b/tests/subworkflows/nf-core/vcf_gather_bcftools/main.nf @@ -19,12 +19,10 @@ workflow test_vcf_gather_bcftools { scatter = Channel.of([ [id:'test_1', sample:'test'], - [], 2 ], [ [id:'test_2', sample:'test'], - [], 2 ]) @@ -32,6 +30,7 @@ workflow test_vcf_gather_bcftools { VCF_GATHER_BCFTOOLS ( input, scatter, + [], 'sample', true ) @@ -52,19 +51,20 @@ workflow test_vcf_gather_bcftools_no_meta { scatter = Channel.of([ [id:'test'], - [], 2 ], [ [id:'test'], - [], 2 ]) + bed = file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) + VCF_GATHER_BCFTOOLS ( input, scatter, + bed, [], true ) diff --git a/tests/subworkflows/nf-core/vcf_gather_bcftools/nextflow.config b/tests/subworkflows/nf-core/vcf_gather_bcftools/nextflow.config index 823efbaa418..ed0eaf0caa4 100644 --- a/tests/subworkflows/nf-core/vcf_gather_bcftools/nextflow.config +++ b/tests/subworkflows/nf-core/vcf_gather_bcftools/nextflow.config @@ -2,8 +2,13 @@ process { publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + withName: ".*:VCF_GATHER_BCFTOOLS:BCFTOOLS_CONCAT" { + ext.args = "--output-type z" + } + withName: ".*:VCF_GATHER_BCFTOOLS:BCFTOOLS_SORT" { ext.prefix = {"${meta.id}.sorted"} + ext.args = "--output-type z" } }