nf-core · nvnieuwk · Jun 1, 2023 · Jun 2, 2023 · Jun 2, 2023 · Jun 2, 2023
diff --git a/modules/nf-core/bcftools/concat/main.nf b/modules/nf-core/bcftools/concat/main.nf
@@ -8,23 +8,32 @@ process BCFTOOLS_CONCAT {
         'biocontainers/bcftools:1.17--haef29d1_0' }"
 
     input:
-    tuple val(meta), path(vcfs), path(tbi)
+    tuple val(meta), path(vcfs), path(tbis)
+    path(bed)
 
     output:
-    tuple val(meta), path("*.gz"), emit: vcf
-    path  "versions.yml"         , emit: versions
+    tuple val(meta), path("*.${extension}") , emit: vcf
+    path  "versions.yml"                    , emit: versions
 
     when:
     task.ext.when == null || task.ext.when
 
     script:
-    def args = task.ext.args   ?: ''
-    prefix   = task.ext.prefix ?: "${meta.id}"
+    def args = task.ext.args   ?: '--output-type z'
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def regions = bed ? "--regions-file ${bed} --allow-overlaps" : '' // --allow-overlaps is required for bcftools concat to work with bed files
+
+    extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" :
+                args.contains("--output-type u") || args.contains("-Ou") ? "bcf" :
+                args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" :
+                args.contains("--output-type v") || args.contains("-Ov") ? "vcf" :
+                "vcf"
     """
     bcftools concat \\
-        --output ${prefix}.vcf.gz \\
-        $args \\
-        --threads $task.cpus \\
+        --output ${prefix}.${extension} \\
+        ${args} \\
+        ${regions} \\
+        --threads ${task.cpus} \\
         ${vcfs}
 
     cat <<-END_VERSIONS > versions.yml
@@ -34,9 +43,15 @@ process BCFTOOLS_CONCAT {
     """
 
     stub:
-    prefix   = task.ext.prefix ?: "${meta.id}"
+    def prefix = task.ext.prefix ?: "${meta.id}"
+
+    extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" :
+                args.contains("--output-type u") || args.contains("-Ou") ? "bcf" :
+                args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" :
+                args.contains("--output-type v") || args.contains("-Ov") ? "vcf" :
+                "vcf"
     """
-    touch ${prefix}.vcf.gz
+    touch ${prefix}.${extension}
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":

diff --git a/modules/nf-core/bcftools/concat/meta.yml b/modules/nf-core/bcftools/concat/meta.yml
@@ -21,15 +21,20 @@ input:
         Groovy Map containing sample information
         e.g. [ id:'test', single_end:false ]
   - vcfs:
-      type: files
+      type: list
       description: |
         List containing 2 or more vcf files
         e.g. [ 'file1.vcf', 'file2.vcf' ]
-  - tbi:
-      type: files
+  - tbis:
+      type: list
       description: |
         List containing 2 or more index files (optional)
         e.g. [ 'file1.tbi', 'file2.tbi' ]
+  - tbis:
+      type: file
+      description: |
+        A BED file containing the regions to be concatenated (optional)
+
 output:
   - meta:
       type: map
@@ -39,7 +44,7 @@ output:
   - vcf:
       type: file
       description: VCF concatenated output file
-      pattern: "*.{vcf.gz}"
+      pattern: "*.{vcf.gz,vcf,bcf,bcf.gz}"
   - versions:
       type: file
       description: File containing software versions

diff --git a/subworkflows/nf-core/vcf_annotate_ensemblvep_snpeff/main.nf b/subworkflows/nf-core/vcf_annotate_ensemblvep_snpeff/main.nf
@@ -142,7 +142,8 @@ workflow VCF_ANNOTATE_ENSEMBLVEP_SNPEFF {
             .map { it + [[]] }
 
         BCFTOOLS_CONCAT(
-            ch_concat_input
+            ch_concat_input,
+            []
         )
         ch_versions = ch_versions.mix(BCFTOOLS_CONCAT.out.versions.first())
 

diff --git a/subworkflows/nf-core/vcf_gather_bcftools/main.nf b/subworkflows/nf-core/vcf_gather_bcftools/main.nf
@@ -5,27 +5,28 @@ include { TABIX_TABIX        } from '../../../modules/nf-core/tabix/tabix/main'
 workflow VCF_GATHER_BCFTOOLS {
 
     take:
-    ch_vcfs             // channel: [ meta, vcf, tbi ]
-    ch_scatter_output   // channel: [ meta, bed, gather_count ] => output from the scatter subworkflow, if you didn't use this subworkflow you can just use `[]` as bed since it isn't used
+    ch_vcfs             // channel: [ val(meta), path(vcf), path(tbi) ]
+    ch_scatter_count    // channel: [ val(meta), val(gather_count) ] => The scatter count per group of input files
+    ch_bed              // channel: [ path(bed) ] => The BED file to be used by bcftools concat
     val_common_meta     // string:  The name of the meta field that should become the new id
     val_sort            // boolean: Whether or not the output file should be sorted !! Add the config when using sort !!
 
     main:
 
     ch_versions = Channel.empty()
 
-    ch_concat_input = ch_vcfs.join(ch_scatter_output)
-        .map{ meta, vcf, tbi, bed, gather_count ->
-            meta = val_common_meta ? meta + [id:meta[val_common_meta]] : meta
-            [ groupKey(meta, gather_count), vcf, tbi ]
+    ch_concat_input = ch_vcfs.join(ch_scatter_count)
+        .map{ meta, vcf, tbi, gather_count ->
+            new_meta = val_common_meta ? meta + [id:meta[val_common_meta]] : meta
+            [ groupKey(new_meta, gather_count), vcf, tbi ]
         }.groupTuple()
 
-    BCFTOOLS_CONCAT ( ch_concat_input )
-    ch_versions = ch_versions.mix(BCFTOOLS_CONCAT.out.versions)
+    BCFTOOLS_CONCAT ( ch_concat_input, ch_bed )
+    ch_versions = ch_versions.mix(BCFTOOLS_CONCAT.out.versions.first())
 
     if (val_sort) {
         BCFTOOLS_SORT(BCFTOOLS_CONCAT.out.vcf)
-        ch_versions = ch_versions.mix(BCFTOOLS_SORT.out.versions)
+        ch_versions = ch_versions.mix(BCFTOOLS_SORT.out.versions.first())
 
         ch_tabix_input = BCFTOOLS_SORT.out.vcf
 
@@ -34,11 +35,11 @@ workflow VCF_GATHER_BCFTOOLS {
     }
 
     TABIX_TABIX ( ch_tabix_input )
-    ch_versions = ch_versions.mix(TABIX_TABIX.out.versions)
+    ch_versions = ch_versions.mix(TABIX_TABIX.out.versions.first())
 
     emit:
-    vcf      = ch_tabix_input        // channel: [ val(meta), [ vcf ] ]
-    tbi      = TABIX_TABIX.out.tbi   // channel: [ val(meta), [ tbi ] ]
+    vcf      = ch_tabix_input        // channel: [ val(meta), path(vcf) ]
+    tbi      = TABIX_TABIX.out.tbi   // channel: [ val(meta), path(tbi) ]
 
     versions = ch_versions           // channel: [ versions.yml ]
 }

diff --git a/subworkflows/nf-core/vcf_gather_bcftools/meta.yml b/subworkflows/nf-core/vcf_gather_bcftools/meta.yml
@@ -15,52 +15,40 @@ modules:
   - bcftools/concat
   - tabix/tabix
 input:
-  - meta:
-      type: map
-      description: |
-        Groovy Map containing at least two fields: 'id' and a common field for each VCF that needs to be merged
-        e.g. [ id:'test.001', common_meta:'test' ]
   - ch_vcfs:
-      type: file(s)
       description: |
         VCF files and their indices that should be concatenated
-        Structure: [ meta, vcf, tbi ]
-  - ch_scatter_output:
-      type: file and integer
+        Structure: [ val(meta), path(vcf), path(tbi) ]
+  - ch_scatter_count:
+      description: |
+        A channel containing the scatter count for each input sample
+        Structure: [ val(meta), val(gather_count) ]
+  - ch_bed:
+      type: file(s)
       description: |
-        The output created from a scatter subworkflow (contains a BED file and scatter_count).
-        The BED file isn't actually used in the pipeline so can remain empty if you manually set this input channel
-        Structure: [ meta, bed, scatter_count ]
-  - common_meta:
+        The BED file to be used by bcftools concat
+        Structure: [ path(bed) ]
+  - val_common_meta:
       type: string
       description: |
         OPTIONAL:
         The string of the common meta to use as the new 'id'.
         Please make sure all VCFs that need to be concatenated have the same value in the
         the meta field specified. (and that only the ID is different between these files)
-  - sort:
+  - val_sort:
       type: boolean
       description: |
         Whether or not to sort the output VCF,
         this can be useful if this subworkflow isn't used in a scatter/gather workflow
 output:
-  - meta:
-      type: map
-      description: |
-        Groovy Map containing sample information
-        e.g. [ id:'test' ]
   - vcf:
-      type: file
       description: |
         The concatenated (and possible sorted) VCF file
-        Structure: [ meta, vcf ]
-      pattern: "*.vcf.gz"
+        Structure: [ val(meta), path(vcf) ]
   - tbi:
-      type: file
       description: |
         The indices of the output VCFs
-        Structure: [ meta, tbi ]
-      pattern: "*.vcf.gz.tbi"
+        Structure: [ val(meta), path(tbi) ]
   - versions:
       type: file
       description: File containing software versions

diff --git a/tests/modules/nf-core/bcftools/concat/main.nf b/tests/modules/nf-core/bcftools/concat/main.nf
@@ -6,23 +6,19 @@ include { BCFTOOLS_CONCAT } from '../../../../../modules/nf-core/bcftools/concat
 
 workflow test_bcftools_concat_tbi {
 
-    input = [ [ id:'test3' ], // meta map
-              [ file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true),
-                file(params.test_data['sarscov2']['illumina']['test2_vcf_gz'], checkIfExists: true) ],
-              [ file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true),
-                file(params.test_data['sarscov2']['illumina']['test2_vcf_gz_tbi'], checkIfExists: true) ]
-            ]
+    input = [ 
+        [ id:'test' ], // meta map
+        [
+          file(params.test_data['homo_sapiens']['illumina']['test_haplotc_cnn_vcf_gz'], checkIfExists: true),
+          file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz'], checkIfExists: true) 
+        ],
+        [
+          file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf_gz_tbi'], checkIfExists: true),
+          file(params.test_data['homo_sapiens']['illumina']['test_haplotc_cnn_vcf_gz_tbi'], checkIfExists: true)
+        ]
+    ]
 
-    BCFTOOLS_CONCAT ( input )
-}
+    bed = file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true)
 
-workflow test_bcftools_concat_no_tbi {
-
-    input = [ [ id:'test3' ], // meta map
-              [ file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true),
-                file(params.test_data['sarscov2']['illumina']['test2_vcf_gz'], checkIfExists: true) ],
-              []
-            ]
-
-    BCFTOOLS_CONCAT ( input )
-}
+    BCFTOOLS_CONCAT ( input, [] )
+}
diff --git a/tests/modules/nf-core/bcftools/concat/nextflow.config b/tests/modules/nf-core/bcftools/concat/nextflow.config
@@ -3,7 +3,7 @@ process {
     publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
 
     withName: BCFTOOLS_CONCAT {
-        ext.args = '--no-version'
+        ext.args = '--no-version --output-type z'
     }
 
 }
diff --git a/tests/modules/nf-core/bcftools/concat/test.yml b/tests/modules/nf-core/bcftools/concat/test.yml
@@ -1,25 +1,9 @@
 - name: bcftools concat test_bcftools_concat_tbi
-  command: nextflow run ./tests/modules/nf-core/bcftools/concat -entry test_bcftools_concat_tbi -c ./tests/config/nextflow.config  -c ./tests/modules/nf-core/bcftools/concat/nextflow.config
+  command: nextflow run ./tests/modules/nf-core/bcftools/concat -entry test_bcftools_concat_tbi -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/bcftools/concat/nextflow.config
   tags:
     - bcftools
     - bcftools/concat
   files:
-    - path: output/bcftools/test3.vcf.gz
-      md5sum: 18c1612343f5e8a219ee6476a870a674
-
-- name: bcftools concat test_bcftools_concat_no_tbi
-  command: nextflow run ./tests/modules/nf-core/bcftools/concat -entry test_bcftools_concat_no_tbi -c ./tests/config/nextflow.config  -c ./tests/modules/nf-core/bcftools/concat/nextflow.config
-  tags:
-    - bcftools
-    - bcftools/concat
-  files:
-    - path: output/bcftools/test3.vcf.gz
-      md5sum: 18c1612343f5e8a219ee6476a870a674
-
-- name: bcftools concat test_bcftools_concat_tbi_stub
-  command: nextflow run ./tests/modules/nf-core/bcftools/concat -entry test_bcftools_concat_tbi -c ./tests/config/nextflow.config  -c ./tests/modules/nf-core/bcftools/concat/nextflow.config -stub
-  tags:
-    - bcftools
-    - bcftools/concat
-  files:
-    - path: output/bcftools/test3.vcf.gz
+    - path: output/bcftools/test.vcf.gz
+      md5sum: 4bcd0afd89f56c5d433f6b6abc44d0a6
+    - path: output/bcftools/versions.yml
diff --git a/tests/subworkflows/nf-core/vcf_gather_bcftools/main.nf b/tests/subworkflows/nf-core/vcf_gather_bcftools/main.nf
@@ -19,19 +19,18 @@ workflow test_vcf_gather_bcftools {
 
     scatter = Channel.of([
         [id:'test_1', sample:'test'],
-        [],
         2
     ],
     [
         [id:'test_2', sample:'test'],
-        [],
         2
     ])
 
 
     VCF_GATHER_BCFTOOLS (
         input,
         scatter,
+        [],
         'sample',
         true
     )
@@ -52,19 +51,20 @@ workflow test_vcf_gather_bcftools_no_meta {
 
     scatter = Channel.of([
         [id:'test'],
-        [],
         2
     ],
     [
         [id:'test'],
-        [],
         2
     ])
 
+    bed = file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true)
+
 
     VCF_GATHER_BCFTOOLS (
         input,
         scatter,
+        bed,
         [],
         true
     )