diff --git a/.gitignore b/.gitignore index 007cbf2..0e42d4d 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,11 @@ null/ .nf-test .nf-test* .nf-test/* + +.vscode +.vscode/* + +tests/unmergedgvcfs +tests/unmergedgvcfs/* +tests/input-full-ncgm.csv +conf/test_full_ncgm.config diff --git a/.nf-core.yml b/.nf-core.yml index 60f9cf0..474e44e 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -18,6 +18,7 @@ lint: - docs/images/nf-core-vcftomat_logo_dark.png - .github/ISSUE_TEMPLATE/bug_report.yml included_configs: false + actions_ci: false multiqc_config: - report_comment nextflow_config: @@ -30,7 +31,7 @@ lint: nf_core_version: 3.1.0 repository_type: pipeline template: - author: "Famke B\xE4uerle, Dorothy Ellis" + author: "Famke Bäuerle, Dorothy Ellis" description: Nextflow pipeline to convert (g)vcfs to matrices suitable for statistical analysis force: false @@ -43,4 +44,4 @@ template: - codespaces - fastqc - adaptivecard - version: 1.0.0dev + version: 1.1.0 diff --git a/CHANGELOG.md b/CHANGELOG.md index 655cef6..a21bb96 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,18 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## v1.1.0 - Newton Puccoon - 08.01.2025 + +### Added + +- [#7](https://github.com/qbic-pipelines/vcftomat/pull/7) - samplenames to columns +- [#8](https://github.com/qbic-pipelines/vcftomat/pull/8) - concat for sample, label pairs + +### Fixed + +- [#5](https://github.com/qbic-pipelines/vcftomat/pull/5) - filename collision +- [#10](https://github.com/qbic-pipelines/vcftomat/pull/10) - prepare release 1.1.0 + ## v1.0.0 - Curie Purpureal - 16.12.2024 Initial release of qbic-pipelines/vcftomat, created with the [nf-core](https://nf-co.re/) template. diff --git a/README.md b/README.md index c4a78f1..0bd6998 100644 --- a/README.md +++ b/README.md @@ -16,9 +16,11 @@ 1. Indexes (g.)vcf files ([`tabix`](http://www.htslib.org/doc/tabix.html)) 2. Converts g.vcf files to vcf with `genotypegvcf` ([`GATK`](https://gatk.broadinstitute.org/hc/en-us)) -3. Merges all vcfs from the same sample with `bcftools/merge` ([`bcftools`](https://samtools.github.io/bcftools/bcftools.html)) -4. Converts the (merged) vcfs to a matrix using a custom R script written by @ellisdoro ([`R`](https://www.r-project.org/)) -5. Collects all reports into a MultiQC report ([`MultiQC`](http://multiqc.info/)) +3. Concatenates all vcfs that have the same id and the same label with `bcftools/concat` ([`bcftools`](https://samtools.github.io/bcftools/bcftools.html)) +4. Changes the sample name in the vcf file to the filename with `bcftools/reheader` ([`bcftools`](https://samtools.github.io/bcftools/bcftools.html)) - This can be turned off by adding `--rename false` to the `nextflow run` command. +5. Merges all vcfs from the same sample with `bcftools/merge` ([`bcftools`](https://samtools.github.io/bcftools/bcftools.html)) +6. Converts the (merged) vcfs to a matrix using a custom R script written by @ellisdoro ([`R`](https://www.r-project.org/)) +7. Collects all reports into a MultiQC report ([`MultiQC`](http://multiqc.info/)) ![](./docs/images/vcftomat.excalidraw.png) @@ -32,13 +34,14 @@ First, prepare a samplesheet with your input data that looks as follows: `samplesheet.csv`: ```csv -sample,gvcf,vcf_path,vcf_index_path -SAMPLE-1,false,path/to/vcf.gz,path/to/.vcf.gz.tbi -SAMPLE-1,false,path/to/vcf.gz,path/to/.vcf.gz.tbi -SAMPLE-2,true,path/to/g.vcf.gz,path/to/g.vcf.gz.tbi +sample,label,gvcf,vcf_path,vcf_index_path +SAMPLE-1,pipelineA-callerA,false,path/to/vcf.gz,path/to/.vcf.gz.tbi +SAMPLE-1,pipelineB-callerA,false,path/to/vcf.gz,path/to/.vcf.gz.tbi +SAMPLE-2,pipelineB-callerB,true,path/to/g.vcf.gz,path/to/g.vcf.gz.tbi +SAMPLE-2,pipelineB-callerB,true,path/to/g.vcf.gz,path/to/g.vcf.gz.tbi ``` -Each row represents a VCF file coming from a sample. The `gvcf` column indicates whether the file is a g.vcf file or not. The `vcf_path` and `vcf_index_path` columns contain the path to the VCF file and its index, respectively. +Each row represents a VCF file coming from a sample. The `label` column enables concatenation of vcfs (for example when the pipeline produces different vcfs for chrM and chrY). The `gvcf` column indicates whether the file is a g.vcf file or not. The `vcf_path` and `vcf_index_path` columns contain the path to the VCF file and its index, respectively. Now, you can run the pipeline using: diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 59cb191..b1d5749 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,6 +1,6 @@ report_comment: > - This report has been generated by the qbic-pipelines/vcftomat - analysis pipeline. + This report has been generated by the qbic-pipelines/vcftomat analysis pipeline. report_section_order: "qbic-pipelines-vcftomat-methods-description": order: -1000 diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv index 2231c37..1551a8c 100644 --- a/assets/samplesheet.csv +++ b/assets/samplesheet.csv @@ -1,4 +1,5 @@ -sample,gvcf,vcf_path,vcf_index_path -SAMPLE-1,false,path/to/vcf.gz,path/to/.vcf.gz.tbi -SAMPLE-1,false,path/to/vcf.gz,path/to/.vcf.gz.tbi -SAMPLE-2,true,path/to/g.vcf.gz,path/to/g.vcf.gz.tbi +sample,label,gvcf,vcf_path,vcf_index_path +SAMPLE-1,pipelineA-callerA,false,path/to/vcf.gz,path/to/.vcf.gz.tbi +SAMPLE-1,pipelineB-callerA,false,path/to/vcf.gz,path/to/.vcf.gz.tbi +SAMPLE-2,pipelineB-callerB,true,path/to/g.vcf.gz,path/to/g.vcf.gz.tbi +SAMPLE-2,pipelineB-callerB,true,path/to/g.vcf.gz,path/to/g.vcf.gz.tbi diff --git a/assets/schema_input.json b/assets/schema_input.json index 13e869f..e64606b 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -13,6 +13,12 @@ "errorMessage": "Sample name must be provided and cannot contain spaces", "meta": ["id"] }, + "label": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Label must be provided and cannot contain spaces", + "meta": ["label"] + }, "gvcf": { "type": "boolean", "errorMessage": "", @@ -40,6 +46,6 @@ "errorMessage": "Index of VCF file must have extension '.tbi'- Optional" } }, - "required": ["sample", "gvcf", "vcf_path"] + "required": ["sample", "label", "gvcf", "vcf_path"] } } diff --git a/conf/modules.config b/conf/modules.config index e1b4e63..1cf6704 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -23,13 +23,38 @@ process { } withName: 'GATK4_GENOTYPEGVCFS' { - ext.prefix = { "${input.baseName.tokenize('.')[0]}" } + ext.prefix = { "${meta.name}" } + } + + withName: 'BCFTOOLS_CONCAT' { + memory = 8.GB + ext.prefix = { "${meta.label}.concat" } + ext.args = { " --allow-overlaps --output-type z --write-index=tbi" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/bcftools/concat/" }, + ] + } + + withName: 'BCFTOOLS_REHEADER' { + beforeScript = { "echo ${meta.label} > ${meta.label}.txt" } + ext.args = { "--samples ${meta.label}.txt" } + ext.prefix = { "${meta.label}.reheader" } + ext.args2 = { "--output-type z --write-index=tbi" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/bcftools/reheader/" }, + ] } withName: 'BCFTOOLS_MERGE' { - memory = 8.GB - ext.args = { '--force-samples' } - ext.prefix = { "${meta.id}.merged" } + memory = 8.GB + ext.args = { "--force-samples --output-type z --write-index=tbi" } + ext.prefix = { "${meta.id}.merge" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/bcftools/merge/" }, + ] } withName: 'MULTIQC' { diff --git a/docs/images/vcftomat.excalidraw.png b/docs/images/vcftomat.excalidraw.png index aa81f82..543f8a0 100644 Binary files a/docs/images/vcftomat.excalidraw.png and b/docs/images/vcftomat.excalidraw.png differ diff --git a/docs/output.md b/docs/output.md index 55f881f..82c23e5 100644 --- a/docs/output.md +++ b/docs/output.md @@ -6,14 +6,14 @@ This document describes the output produced by the pipeline. Most of the plots a The directories listed below will be created in the results directory after the pipeline has finished. All paths are relative to the top-level results directory. - - ## Pipeline overview The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: - [Tabix](#tabix) - Indexes (g.)vcf files - [GenotypeGVCFs](#genotypegvcfs) - Converts g.vcf files to vcf with GATK +- [Concatenate VCFs](#concatenate-vcfs) - Concatenates all vcfs that have the same id and the same label with bcftools/concat +- [Rename Samples](#rename-samples) - Changes the sample name in the vcf file to the label with bcftools/reheader - [Merge VCFs](#merge-vcfs) - Merges all vcfs from the same sample with bcftools/merge - [Convert to matrix](#convert-to-matrix) - Converts the (merged) vcfs to a matrix using a custom R script written by @ellisdoro - [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline @@ -21,12 +21,28 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d ### Tabix +Tabix generated index files with `.tbi` extension for all `(g).vcf` files that are given to the pipeline without index. + ### GenotypeGVCFs +The GATK GenotypeGVCFs module translates genotype (g) vcf files into classic vcf files. The key difference between a regular VCF and a GVCF is that the GVCF has records for all sites, whether there is a variant call there or not. + +### Concatenate VCFs + +Some variant calling pipelines will return multiple (g)VCF files for one patient. The `concatenate` function of `bcftools` is used to add these VCFs to one VCF. + +### Rename Samples + +To make enable the comparison of the finalized CSV files, `bcftools reheader` can be enabled to rename the variant sample name from the generic name given by the variant caller to a custom label given with the samplesheet. + ### Merge VCFs +To enable comparison of different variant callers or variant calling pipelines, all VCFs that come from the same sample are merged based on the sample ID submitted by the user. + ### Convert to matrix +A custom R script is used to convert the finalized VCF to a CSV which can be used for further downstream analysis. Script was written by [Dorothy Ellis](https://github.com/ellisdoro). + ### MultiQC
diff --git a/docs/usage.md b/docs/usage.md index 16348df..9d8cefa 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -19,15 +19,17 @@ You will need to create a samplesheet with information about the samples you wou The `sample` identifiers have to be the same when the vcfs originate from the same bam but were yielded with different callers. The pipeline will merge all vcfs from the same sample into one vcf file but is also able to handle if there is only one vcf file for a sample (merging will then be skipped). ```csv title="samplesheet.csv" -sample,gvcf,vcf_path,vcf_index_path -SAMPLE-1,false,path/to/vcf.gz,path/to/.vcf.gz.tbi -SAMPLE-1,false,path/to/vcf.gz,path/to/.vcf.gz.tbi -SAMPLE-2,true,path/to/g.vcf.gz,path/to/g.vcf.gz.tbi +sample,label,gvcf,vcf_path,vcf_index_path +SAMPLE-1,pipelineA-callerA,false,path/to/vcf.gz,path/to/.vcf.gz.tbi +SAMPLE-1,pipelineB-callerA,false,path/to/vcf.gz,path/to/.vcf.gz.tbi +SAMPLE-2,pipelineB-callerB,true,path/to/g.vcf.gz,path/to/g.vcf.gz.tbi +SAMPLE-2,pipelineB-callerB,true,path/to/g.vcf.gz,path/to/g.vcf.gz.tbi ``` | Column | Description | | ---------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | | `sample` | Custom sample name. This entry will be identical for vcfs that originate from the same bam but were yielded with different callers. Spaces in sample names are automatically converted to underscores (`_`). | +| `label` | Label for the vcf file. This is used to concatenate vcfs with the same label. | | `gvcf` | Boolean whether the supplied sample is a gvcf (true) or a normal vcf (false). | | `vcf_path` | Full path to VCF file, should have the extension ".g.vcf.gz", ".vcf.gz", ".g.vcf" or ".vcf". | | `vcf_index_path` | Full path to index of (g)VCF file. Optional. Should have extension ".tbi". | @@ -39,7 +41,7 @@ An [example samplesheet](../assets/samplesheet.csv) has been provided with the p The typical command for running the pipeline is as follows: ```bash -nextflow run qbic-pipelines/vcftomat --input ./samplesheet.csv --outdir ./results --genome GATK.GRCh38 -profile docker +nextflow run qbic-pipelines/vcftomat --input ./samplesheet.csv --outdir ./results --genome GATK.GRCh38 --rename true -profile docker ``` This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. @@ -69,10 +71,9 @@ nextflow run qbic-pipelines/vcftomat -profile docker -params-file params.yaml with: ```yaml title="params.yaml" -input: './samplesheet.csv' -outdir: './results/' -genome: 'GATK.GRCh38' -<...> +input: "./samplesheet.csv" +outdir: "./results/" +genome: "GATK.GRCh38" ``` You can also generate such `YAML`/`JSON` files via [nf-core/launch](https://nf-co.re/launch). diff --git a/modules.json b/modules.json index 4e13864..ef0f8d3 100644 --- a/modules.json +++ b/modules.json @@ -5,11 +5,21 @@ "https://github.com/nf-core/modules.git": { "modules": { "nf-core": { + "bcftools/concat": { + "branch": "master", + "git_sha": "d1e0ec7670fa77905a378627232566ce54c3c26d", + "installed_by": ["modules"] + }, "bcftools/merge": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, + "bcftools/reheader": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, "gatk4/genotypegvcfs": { "branch": "master", "git_sha": "1999eff2c530b2b185a25cc42117a1686f09b685", diff --git a/modules/nf-core/bcftools/concat/environment.yml b/modules/nf-core/bcftools/concat/environment.yml new file mode 100644 index 0000000..5c00b11 --- /dev/null +++ b/modules/nf-core/bcftools/concat/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::bcftools=1.20 diff --git a/modules/nf-core/bcftools/concat/main.nf b/modules/nf-core/bcftools/concat/main.nf new file mode 100644 index 0000000..a94b28d --- /dev/null +++ b/modules/nf-core/bcftools/concat/main.nf @@ -0,0 +1,59 @@ +process BCFTOOLS_CONCAT { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bcftools:1.20--h8b25389_0': + 'biocontainers/bcftools:1.20--h8b25389_0' }" + + input: + tuple val(meta), path(vcfs), path(tbi) + + output: + tuple val(meta), path("${prefix}.vcf.gz") , emit: vcf + tuple val(meta), path("${prefix}.vcf.gz.tbi"), emit: tbi, optional: true + tuple val(meta), path("${prefix}.vcf.gz.csi"), emit: csi, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def tbi_names = tbi.findAll { file -> !(file instanceof List) }.collect { file -> file.name } + def create_input_index = vcfs.collect { vcf -> tbi_names.contains(vcf.name + ".tbi") ? "" : "tabix ${vcf}" }.join("\n ") + """ + ${create_input_index} + + bcftools concat \\ + --output ${prefix}.vcf.gz \\ + $args \\ + --threads $task.cpus \\ + ${vcfs} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def index = args.contains("--write-index=tbi") || args.contains("-W=tbi") ? "tbi" : + args.contains("--write-index=csi") || args.contains("-W=csi") ? "csi" : + args.contains("--write-index") || args.contains("-W") ? "csi" : + "" + def create_index = index.matches("csi|tbi") ? "touch ${prefix}.vcf.gz.${index}" : "" + """ + echo "" | gzip > ${prefix}.vcf.gz + ${create_index} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bcftools/concat/meta.yml b/modules/nf-core/bcftools/concat/meta.yml new file mode 100644 index 0000000..d2565b2 --- /dev/null +++ b/modules/nf-core/bcftools/concat/meta.yml @@ -0,0 +1,83 @@ +name: bcftools_concat +description: Concatenate VCF files +keywords: + - variant calling + - concat + - bcftools + - VCF +tools: + - concat: + description: | + Concatenate VCF files. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: http://www.htslib.org/doc/bcftools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:bcftools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcfs: + type: list + description: | + List containing 2 or more vcf files + e.g. [ 'file1.vcf', 'file2.vcf' ] + - tbi: + type: list + description: | + List containing 2 or more index files (optional) + e.g. [ 'file1.tbi', 'file2.tbi' ] +output: + - vcf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.{vcf.gz}" + - ${prefix}.vcf.gz: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.{vcf.gz}" + - tbi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.tbi" + - ${prefix}.vcf.gz.tbi: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.tbi" + - csi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.csi" + - ${prefix}.vcf.gz.csi: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + pattern: "*.csi" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@abhi18av" + - "@nvnieuwk" +maintainers: + - "@abhi18av" + - "@nvnieuwk" diff --git a/modules/nf-core/bcftools/concat/tests/main.nf.test b/modules/nf-core/bcftools/concat/tests/main.nf.test new file mode 100644 index 0000000..cb4642b --- /dev/null +++ b/modules/nf-core/bcftools/concat/tests/main.nf.test @@ -0,0 +1,316 @@ +nextflow_process { + + name "Test Process BCFTOOLS_CONCAT" + script "../main.nf" + process "BCFTOOLS_CONCAT" + + tag "modules" + tag "modules_nfcore" + tag "bcftools" + tag "bcftools/concat" + + + test("homo_sapiens - [[vcf1, vcf2], [tbi1, tbi2]]") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test3' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test_haplotcaller.cnn.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test_haplotcaller.cnn.vcf.gz.tbi', checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo_sapiens - [[vcf1, vcf2], [tbi1, tbi2]] - vcf_gz_index") { + + config "./vcf_gz_index.config" + + when { + process { + """ + input[0] = [ + [ id:'test3' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test_haplotcaller.cnn.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test_haplotcaller.cnn.vcf.gz.tbi', checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.versions + ).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + + } + + test("homo_sapiens - [[vcf1, vcf2], [tbi1, tbi2]] - vcf_gz_index_csi") { + + config "./vcf_gz_index_csi.config" + + when { + process { + """ + input[0] = [ + [ id:'test3' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test_haplotcaller.cnn.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test_haplotcaller.cnn.vcf.gz.tbi', checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.versions + ).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + + } + + test("homo_sapiens - [[vcf1, vcf2], [tbi1, tbi2]] - vcf_gz_index_tbi") { + + config "./vcf_gz_index_tbi.config" + + when { + process { + """ + input[0] = [ + [ id:'test3' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test_haplotcaller.cnn.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test_haplotcaller.cnn.vcf.gz.tbi', checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.versions + ).match() }, + { assert process.out.tbi[0][1].endsWith(".tbi") } + ) + } + + } + + + test("homo_sapiens - [[vcf1, vcf2], []]") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test3' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test_haplotcaller.cnn.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true) + ], + [] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo_sapiens - [[vcf1, vcf2], [tbi1, tbi2]] - stub") { + + config "./nextflow.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test3' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test_haplotcaller.cnn.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test_haplotcaller.cnn.vcf.gz.tbi', checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo_sapiens - [[vcf1, vcf2], [tbi1, tbi2]] - vcf_gz_index - stub") { + + config "./vcf_gz_index.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test3' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test_haplotcaller.cnn.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test_haplotcaller.cnn.vcf.gz.tbi', checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + + } + + test("homo_sapiens - [[vcf1, vcf2], [tbi1, tbi2]] - vcf_gz_index_csi - stub") { + + config "./vcf_gz_index_csi.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test3' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test_haplotcaller.cnn.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test_haplotcaller.cnn.vcf.gz.tbi', checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + + } + + test("homo_sapiens - [[vcf1, vcf2], [tbi1, tbi2]] - vcf_gz_index_tbi - stub") { + + config "./vcf_gz_index_tbi.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test3' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test_haplotcaller.cnn.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gvcf/test.genome.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/gatk/haplotypecaller_calls/test_haplotcaller.cnn.vcf.gz.tbi', checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.tbi[0][1].endsWith(".tbi") } + ) + } + + } + + +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/concat/tests/main.nf.test.snap b/modules/nf-core/bcftools/concat/tests/main.nf.test.snap new file mode 100644 index 0000000..09e87cd --- /dev/null +++ b/modules/nf-core/bcftools/concat/tests/main.nf.test.snap @@ -0,0 +1,395 @@ +{ + "homo_sapiens - [[vcf1, vcf2], [tbi1, tbi2]] - vcf_gz_index - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test3" + }, + "test3_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test3" + }, + "test3_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,c6e19f105510a46af1c5da9064e2e659" + ], + "csi": [ + [ + { + "id": "test3" + }, + "test3_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test3" + }, + "test3_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,c6e19f105510a46af1c5da9064e2e659" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-26T11:04:11.178539482" + }, + "homo_sapiens - [[vcf1, vcf2], [tbi1, tbi2]]": { + "content": [ + { + "0": [ + [ + { + "id": "test3" + }, + "test3.vcf.gz:md5,5f6796c3ae109a1a5b87353954693f5a" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,c6e19f105510a46af1c5da9064e2e659" + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test3" + }, + "test3.vcf.gz:md5,5f6796c3ae109a1a5b87353954693f5a" + ] + ], + "versions": [ + "versions.yml:md5,c6e19f105510a46af1c5da9064e2e659" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-26T11:03:08.765639958" + }, + "homo_sapiens - [[vcf1, vcf2], [tbi1, tbi2]] - vcf_gz_index": { + "content": [ + [ + [ + { + "id": "test3" + }, + "test3_vcf.vcf.gz:md5,5f6796c3ae109a1a5b87353954693f5a" + ] + ], + [ + [ + { + "id": "test3" + }, + "test3_vcf.vcf.gz.csi" + ] + ], + [ + + ], + [ + "versions.yml:md5,c6e19f105510a46af1c5da9064e2e659" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-26T11:03:21.607274757" + }, + "homo_sapiens - [[vcf1, vcf2], [tbi1, tbi2]] - vcf_gz_index_tbi - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test3" + }, + "test3_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test3" + }, + "test3_vcf.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,c6e19f105510a46af1c5da9064e2e659" + ], + "csi": [ + + ], + "tbi": [ + [ + { + "id": "test3" + }, + "test3_vcf.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "vcf": [ + [ + { + "id": "test3" + }, + "test3_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,c6e19f105510a46af1c5da9064e2e659" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-26T11:04:27.332133878" + }, + "homo_sapiens - [[vcf1, vcf2], [tbi1, tbi2]] - vcf_gz_index_csi": { + "content": [ + [ + [ + { + "id": "test3" + }, + "test3_vcf.vcf.gz:md5,5f6796c3ae109a1a5b87353954693f5a" + ] + ], + [ + [ + { + "id": "test3" + }, + "test3_vcf.vcf.gz.csi" + ] + ], + [ + + ], + [ + "versions.yml:md5,c6e19f105510a46af1c5da9064e2e659" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-26T11:03:36.575719606" + }, + "homo_sapiens - [[vcf1, vcf2], []]": { + "content": [ + { + "0": [ + [ + { + "id": "test3" + }, + "test3.vcf.gz:md5,5f6796c3ae109a1a5b87353954693f5a" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,c6e19f105510a46af1c5da9064e2e659" + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test3" + }, + "test3.vcf.gz:md5,5f6796c3ae109a1a5b87353954693f5a" + ] + ], + "versions": [ + "versions.yml:md5,c6e19f105510a46af1c5da9064e2e659" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-26T11:03:54.069826178" + }, + "homo_sapiens - [[vcf1, vcf2], [tbi1, tbi2]] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test3" + }, + "test3.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,c6e19f105510a46af1c5da9064e2e659" + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test3" + }, + "test3.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,c6e19f105510a46af1c5da9064e2e659" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-26T11:04:02.45346063" + }, + "homo_sapiens - [[vcf1, vcf2], [tbi1, tbi2]] - vcf_gz_index_tbi": { + "content": [ + [ + [ + { + "id": "test3" + }, + "test3_vcf.vcf.gz:md5,5f6796c3ae109a1a5b87353954693f5a" + ] + ], + [ + + ], + [ + [ + { + "id": "test3" + }, + "test3_vcf.vcf.gz.tbi" + ] + ], + [ + "versions.yml:md5,c6e19f105510a46af1c5da9064e2e659" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-26T11:03:44.618596639" + }, + "homo_sapiens - [[vcf1, vcf2], [tbi1, tbi2]] - vcf_gz_index_csi - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test3" + }, + "test3_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test3" + }, + "test3_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,c6e19f105510a46af1c5da9064e2e659" + ], + "csi": [ + [ + { + "id": "test3" + }, + "test3_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test3" + }, + "test3_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,c6e19f105510a46af1c5da9064e2e659" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-26T11:04:19.745768656" + } +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/concat/tests/nextflow.config b/modules/nf-core/bcftools/concat/tests/nextflow.config new file mode 100644 index 0000000..f3e1e98 --- /dev/null +++ b/modules/nf-core/bcftools/concat/tests/nextflow.config @@ -0,0 +1,3 @@ +process { + ext.args = "--no-version" +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/concat/tests/tags.yml b/modules/nf-core/bcftools/concat/tests/tags.yml new file mode 100644 index 0000000..21710d4 --- /dev/null +++ b/modules/nf-core/bcftools/concat/tests/tags.yml @@ -0,0 +1,2 @@ +bcftools/concat: + - "modules/nf-core/bcftools/concat/**" diff --git a/modules/nf-core/bcftools/concat/tests/vcf_gz_index.config b/modules/nf-core/bcftools/concat/tests/vcf_gz_index.config new file mode 100644 index 0000000..7dd696e --- /dev/null +++ b/modules/nf-core/bcftools/concat/tests/vcf_gz_index.config @@ -0,0 +1,4 @@ +process { + ext.prefix = { "${meta.id}_vcf" } + ext.args = "--output-type z --write-index --no-version" +} diff --git a/modules/nf-core/bcftools/concat/tests/vcf_gz_index_csi.config b/modules/nf-core/bcftools/concat/tests/vcf_gz_index_csi.config new file mode 100644 index 0000000..aebffb6 --- /dev/null +++ b/modules/nf-core/bcftools/concat/tests/vcf_gz_index_csi.config @@ -0,0 +1,4 @@ +process { + ext.prefix = { "${meta.id}_vcf" } + ext.args = "--output-type z --write-index=csi --no-version" +} diff --git a/modules/nf-core/bcftools/concat/tests/vcf_gz_index_tbi.config b/modules/nf-core/bcftools/concat/tests/vcf_gz_index_tbi.config new file mode 100644 index 0000000..b192ae7 --- /dev/null +++ b/modules/nf-core/bcftools/concat/tests/vcf_gz_index_tbi.config @@ -0,0 +1,4 @@ +process { + ext.prefix = { "${meta.id}_vcf" } + ext.args = "--output-type z --write-index=tbi --no-version" +} diff --git a/modules/nf-core/bcftools/reheader/environment.yml b/modules/nf-core/bcftools/reheader/environment.yml new file mode 100644 index 0000000..5c00b11 --- /dev/null +++ b/modules/nf-core/bcftools/reheader/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::bcftools=1.20 diff --git a/modules/nf-core/bcftools/reheader/main.nf b/modules/nf-core/bcftools/reheader/main.nf new file mode 100644 index 0000000..9cf6d0d --- /dev/null +++ b/modules/nf-core/bcftools/reheader/main.nf @@ -0,0 +1,79 @@ +process BCFTOOLS_REHEADER { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bcftools:1.20--h8b25389_0': + 'biocontainers/bcftools:1.20--h8b25389_0' }" + + input: + tuple val(meta), path(vcf), path(header), path(samples) + tuple val(meta2), path(fai) + + output: + tuple val(meta), path("*.{vcf,vcf.gz,bcf,bcf.gz}"), emit: vcf + tuple val(meta), path("*.{csi,tbi}") , emit: index, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def fai_argument = fai ? "--fai $fai" : "" + def header_argument = header ? "--header $header" : "" + def samples_argument = samples ? "--samples $samples" : "" + + def args2 = task.ext.args2 ?: '--output-type z' + def extension = args2.contains("--output-type b") || args2.contains("-Ob") ? "bcf.gz" : + args2.contains("--output-type u") || args2.contains("-Ou") ? "bcf" : + args2.contains("--output-type z") || args2.contains("-Oz") ? "vcf.gz" : + args2.contains("--output-type v") || args2.contains("-Ov") ? "vcf" : + "vcf" + """ + bcftools \\ + reheader \\ + $fai_argument \\ + $header_argument \\ + $samples_argument \\ + $args \\ + --threads $task.cpus \\ + $vcf \\ + | bcftools view \\ + $args2 \\ + --output ${prefix}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args2 = task.ext.args2 ?: '--output-type z' + def prefix = task.ext.prefix ?: "${meta.id}" + + def extension = args2.contains("--output-type b") || args2.contains("-Ob") ? "bcf.gz" : + args2.contains("--output-type u") || args2.contains("-Ou") ? "bcf" : + args2.contains("--output-type z") || args2.contains("-Oz") ? "vcf.gz" : + args2.contains("--output-type v") || args2.contains("-Ov") ? "vcf" : + "vcf" + def index = args2.contains("--write-index=tbi") || args2.contains("-W=tbi") ? "tbi" : + args2.contains("--write-index=csi") || args2.contains("-W=csi") ? "csi" : + args2.contains("--write-index") || args2.contains("-W") ? "csi" : + "" + def create_cmd = extension.endsWith(".gz") ? "echo '' | gzip >" : "touch" + def create_index = extension.endsWith(".gz") && index.matches("csi|tbi") ? "touch ${prefix}.${extension}.${index}" : "" + + """ + ${create_cmd} ${prefix}.${extension} + ${create_index} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bcftools/reheader/meta.yml b/modules/nf-core/bcftools/reheader/meta.yml new file mode 100644 index 0000000..47e5344 --- /dev/null +++ b/modules/nf-core/bcftools/reheader/meta.yml @@ -0,0 +1,76 @@ +name: bcftools_reheader +description: Reheader a VCF file +keywords: + - reheader + - vcf + - update header +tools: + - reheader: + description: | + Modify header of VCF/BCF files, change sample names. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: http://samtools.github.io/bcftools/bcftools.html#reheader + doi: 10.1093/gigascience/giab008 + licence: ["MIT"] + identifier: biotools:bcftools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: VCF/BCF file + pattern: "*.{vcf.gz,vcf,bcf}" + - header: + type: file + description: New header to add to the VCF + pattern: "*.{header.txt}" + - samples: + type: file + description: File containing sample names to update (one sample per line) + pattern: "*.{samples.txt}" + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fai: + type: file + description: Fasta index to update header sequences with + pattern: "*.{fai}" +output: + - vcf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.{vcf,vcf.gz,bcf,bcf.gz}": + type: file + description: VCF with updated header, bgzipped per default + pattern: "*.{vcf,vcf.gz,bcf,bcf.gz}" + - index: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.{csi,tbi}": + type: file + description: Index of VCF with updated header + pattern: "*.{csi,tbi}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@bjohnnyd" + - "@jemten" + - "@ramprasadn" +maintainers: + - "@bjohnnyd" + - "@jemten" + - "@ramprasadn" diff --git a/modules/nf-core/bcftools/reheader/tests/bcf.config b/modules/nf-core/bcftools/reheader/tests/bcf.config new file mode 100644 index 0000000..2b7dff5 --- /dev/null +++ b/modules/nf-core/bcftools/reheader/tests/bcf.config @@ -0,0 +1,4 @@ +process { + ext.args2 = { "--no-version --output-type b" } + ext.prefix = "tested" +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/reheader/tests/main.nf.test b/modules/nf-core/bcftools/reheader/tests/main.nf.test new file mode 100644 index 0000000..96c1b7b --- /dev/null +++ b/modules/nf-core/bcftools/reheader/tests/main.nf.test @@ -0,0 +1,394 @@ +nextflow_process { + + name "Test Process BCFTOOLS_REHEADER" + script "../main.nf" + process "BCFTOOLS_REHEADER" + tag "modules" + tag "modules_nfcore" + tag "bcftools" + tag "bcftools/reheader" + + test("sarscov2 - [vcf, [], []], fai - vcf output") { + + config "./vcf.config" + when { + + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [], + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [vcf, [], []], fai - vcf.gz output") { + + config "./vcf.gz.config" + when { + + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [], + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [vcf, [], []], fai - vcf.gz output - index") { + + config "./vcf_gz_index.config" + when { + + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [], + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.index.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.versions + ).match() }, + { assert process.out.index[0][1].endsWith(".csi") } + ) + } + + } + + test("sarscov2 - [vcf, [], []], fai - vcf.gz output - csi index") { + + config "./vcf_gz_index_csi.config" + when { + + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [], + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.index.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.versions + ).match() }, + { assert process.out.index[0][1].endsWith(".csi") } + ) + } + + } + + test("sarscov2 - [vcf, [], []], fai - vcf.gz output - tbi index") { + + config "./vcf_gz_index_tbi.config" + when { + + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [], + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.index.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.versions + ).match() }, + { assert process.out.index[0][1].endsWith(".tbi") } + ) + } + + } + + test("sarscov2 - [vcf, [], []], fai - bcf output") { + + config "./bcf.config" + when { + + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [], + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [vcf, header, []], []") { + + config "./vcf.config" + when { + + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + [] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [vcf, [], samples], fai") { + + config "./vcf.config" + when { + + process { + """ + ch_no_samples = Channel.of([ + [ id:'test', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] + ]) + ch_samples = Channel.of(["samples.txt", "new_name"]) + .collectFile(newLine:true) + input[0] = ch_no_samples.combine(ch_samples) + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [vcf, [], []], fai - stub") { + + options "-stub" + config "./vcf.config" + when { + + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [], + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.vcf[0][1]).name, + process.out.versions, + ).match() } + ) + } + + } + test("sarscov2 - [vcf, [], []], fai - vcf.gz output - index -stub") { + + options "-stub" + config "./vcf_gz_index.config" + when { + + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [], + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [vcf, [], []], fai - vcf.gz output - csi index -stub") { + + options "-stub" + config "./vcf_gz_index_csi.config" + when { + + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [], + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - [vcf, [], []], fai - vcf.gz output - tbi index -stub") { + + options "-stub" + config "./vcf_gz_index_tbi.config" + when { + + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [], + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/bcftools/reheader/tests/main.nf.test.snap b/modules/nf-core/bcftools/reheader/tests/main.nf.test.snap new file mode 100644 index 0000000..87a3654 --- /dev/null +++ b/modules/nf-core/bcftools/reheader/tests/main.nf.test.snap @@ -0,0 +1,469 @@ +{ + "sarscov2 - [vcf, [], []], fai - vcf.gz output - tbi index": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test_vcf.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test_vcf.vcf.gz.tbi" + ] + ], + [ + "versions.yml:md5,486e3d4ebc1dbf5c0a4dfaebae12ea34" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-09-03T10:09:05.955833763" + }, + "sarscov2 - [vcf, [], []], fai - vcf.gz output - index -stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,486e3d4ebc1dbf5c0a4dfaebae12ea34" + ], + "index": [ + [ + { + "id": "test", + "single_end": false + }, + "test_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "vcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,486e3d4ebc1dbf5c0a4dfaebae12ea34" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-09-03T09:52:41.444952182" + }, + "sarscov2 - [vcf, [], []], fai - vcf.gz output - tbi index -stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test_vcf.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,486e3d4ebc1dbf5c0a4dfaebae12ea34" + ], + "index": [ + [ + { + "id": "test", + "single_end": false + }, + "test_vcf.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "vcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,486e3d4ebc1dbf5c0a4dfaebae12ea34" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-09-03T09:53:04.314827944" + }, + "sarscov2 - [vcf, [], []], fai - vcf output": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "tested.vcf:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,486e3d4ebc1dbf5c0a4dfaebae12ea34" + ], + "index": [ + + ], + "vcf": [ + [ + { + "id": "test", + "single_end": false + }, + "tested.vcf:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "versions": [ + "versions.yml:md5,486e3d4ebc1dbf5c0a4dfaebae12ea34" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-09-03T09:50:41.983008108" + }, + "sarscov2 - [vcf, [], []], fai - bcf output": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "tested.bcf.gz:md5,c8a304c8d2892039201154153c8cd536" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,486e3d4ebc1dbf5c0a4dfaebae12ea34" + ], + "index": [ + + ], + "vcf": [ + [ + { + "id": "test", + "single_end": false + }, + "tested.bcf.gz:md5,c8a304c8d2892039201154153c8cd536" + ] + ], + "versions": [ + "versions.yml:md5,486e3d4ebc1dbf5c0a4dfaebae12ea34" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-09-03T09:51:43.072513252" + }, + "sarscov2 - [vcf, [], []], fai - vcf.gz output": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "tested.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,486e3d4ebc1dbf5c0a4dfaebae12ea34" + ], + "index": [ + + ], + "vcf": [ + [ + { + "id": "test", + "single_end": false + }, + "tested.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "versions": [ + "versions.yml:md5,486e3d4ebc1dbf5c0a4dfaebae12ea34" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-09-03T09:50:53.055630152" + }, + "sarscov2 - [vcf, [], []], fai - vcf.gz output - index": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test_vcf.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test_vcf.vcf.gz.csi" + ] + ], + [ + "versions.yml:md5,486e3d4ebc1dbf5c0a4dfaebae12ea34" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-09-03T10:08:37.999924355" + }, + "sarscov2 - [vcf, [], []], fai - vcf.gz output - csi index -stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,486e3d4ebc1dbf5c0a4dfaebae12ea34" + ], + "index": [ + [ + { + "id": "test", + "single_end": false + }, + "test_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "vcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,486e3d4ebc1dbf5c0a4dfaebae12ea34" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-09-03T09:52:52.512269206" + }, + "sarscov2 - [vcf, [], []], fai - stub": { + "content": [ + "tested.vcf", + [ + "versions.yml:md5,486e3d4ebc1dbf5c0a4dfaebae12ea34" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-31T15:16:36.337112514" + }, + "sarscov2 - [vcf, [], []], fai - vcf.gz output - csi index": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test_vcf.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test_vcf.vcf.gz.csi" + ] + ], + [ + "versions.yml:md5,486e3d4ebc1dbf5c0a4dfaebae12ea34" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-09-03T10:08:55.434831174" + }, + "sarscov2 - [vcf, [], samples], fai": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "tested.vcf:md5,c64c373c10b0be24b29d6f18708ec1e8" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,486e3d4ebc1dbf5c0a4dfaebae12ea34" + ], + "index": [ + + ], + "vcf": [ + [ + { + "id": "test", + "single_end": false + }, + "tested.vcf:md5,c64c373c10b0be24b29d6f18708ec1e8" + ] + ], + "versions": [ + "versions.yml:md5,486e3d4ebc1dbf5c0a4dfaebae12ea34" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-09-03T09:52:12.216002665" + }, + "sarscov2 - [vcf, header, []], []": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "tested.vcf:md5,3189bc9a720d5d5d3006bf72d91300cb" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,486e3d4ebc1dbf5c0a4dfaebae12ea34" + ], + "index": [ + + ], + "vcf": [ + [ + { + "id": "test", + "single_end": false + }, + "tested.vcf:md5,3189bc9a720d5d5d3006bf72d91300cb" + ] + ], + "versions": [ + "versions.yml:md5,486e3d4ebc1dbf5c0a4dfaebae12ea34" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-09-03T09:51:54.062386022" + } +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/reheader/tests/tags.yml b/modules/nf-core/bcftools/reheader/tests/tags.yml new file mode 100644 index 0000000..c252941 --- /dev/null +++ b/modules/nf-core/bcftools/reheader/tests/tags.yml @@ -0,0 +1,2 @@ +bcftools/reheader: + - modules/nf-core/bcftools/reheader/** diff --git a/modules/nf-core/bcftools/reheader/tests/vcf.config b/modules/nf-core/bcftools/reheader/tests/vcf.config new file mode 100644 index 0000000..820f2ae --- /dev/null +++ b/modules/nf-core/bcftools/reheader/tests/vcf.config @@ -0,0 +1,4 @@ +process { + ext.args2 = { "--no-version" } + ext.prefix = "tested" +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/reheader/tests/vcf.gz.config b/modules/nf-core/bcftools/reheader/tests/vcf.gz.config new file mode 100644 index 0000000..c3031c3 --- /dev/null +++ b/modules/nf-core/bcftools/reheader/tests/vcf.gz.config @@ -0,0 +1,4 @@ +process { + ext.args2 = { "--no-version --output-type z" } + ext.prefix = "tested" +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/reheader/tests/vcf_gz_index.config b/modules/nf-core/bcftools/reheader/tests/vcf_gz_index.config new file mode 100644 index 0000000..1e050ec --- /dev/null +++ b/modules/nf-core/bcftools/reheader/tests/vcf_gz_index.config @@ -0,0 +1,4 @@ +process { + ext.prefix = { "${meta.id}_vcf" } + ext.args2 = "--output-type z --write-index --no-version" +} diff --git a/modules/nf-core/bcftools/reheader/tests/vcf_gz_index_csi.config b/modules/nf-core/bcftools/reheader/tests/vcf_gz_index_csi.config new file mode 100644 index 0000000..536e4b4 --- /dev/null +++ b/modules/nf-core/bcftools/reheader/tests/vcf_gz_index_csi.config @@ -0,0 +1,4 @@ +process { + ext.prefix = { "${meta.id}_vcf" } + ext.args2 = "--output-type z --write-index=csi --no-version" +} diff --git a/modules/nf-core/bcftools/reheader/tests/vcf_gz_index_tbi.config b/modules/nf-core/bcftools/reheader/tests/vcf_gz_index_tbi.config new file mode 100644 index 0000000..91a80db --- /dev/null +++ b/modules/nf-core/bcftools/reheader/tests/vcf_gz_index_tbi.config @@ -0,0 +1,5 @@ +process { + ext.prefix = { "${meta.id}_vcf" } + ext.args2 = "--output-type z --write-index=tbi --no-version" + +} diff --git a/nextflow.config b/nextflow.config index bb03373..16515b3 100644 --- a/nextflow.config +++ b/nextflow.config @@ -11,6 +11,7 @@ params { // Input options input = null + rename = true // References genome = null @@ -18,10 +19,10 @@ params { igenomes_ignore = false // MultiQC options - multiqc_config = null - multiqc_title = null - multiqc_logo = null - max_multiqc_email_size = '25.MB' + multiqc_config = null + multiqc_title = null + multiqc_logo = null + max_multiqc_email_size = '25.MB' multiqc_methods_description = null // Boilerplate options @@ -37,20 +38,21 @@ params { show_hidden = false version = false pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/' - trace_report_suffix = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss')// Config options - config_profile_name = null - config_profile_description = null - custom_config_version = 'master' - custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" - config_profile_contact = null - config_profile_url = null + trace_report_suffix = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') + config_profile_name = null + config_profile_description = null + custom_config_version = 'master' + custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" + config_profile_contact = null + config_profile_url = null // Schema validation default options - validate_params = true + validate_params = true } // Load base.config by default for all pipelines includeConfig 'conf/base.config' +includeConfig 'conf/modules.config' profiles { debug { @@ -209,23 +211,21 @@ dag { manifest { name = 'qbic-pipelines/vcftomat' - author = """Famke Bäuerle, Dorothy Ellis""" // The author field is deprecated from Nextflow version 24.10.0, use contributors instead contributors = [ - // TODO nf-core: Update the field with the details of the contributors to your pipeline. New with Nextflow version 24.10.0 [ name: 'Famke Bäuerle', - affiliation: '', - email: '', - github: '', - contribution: [], // List of contribution types ('author', 'maintainer' or 'contributor') - orcid: '' + affiliation: 'University of Tuebingen', + email: 'famke.baeuerle@uni-tuebingen.de', + github: '@famosab', + contribution: ['author', 'maintainer'], // List of contribution types ('author', 'maintainer' or 'contributor') + orcid: '0000-0003-1387-0251' ], [ name: ' Dorothy Ellis', affiliation: '', email: '', - github: '', - contribution: [], // List of contribution types ('author', 'maintainer' or 'contributor') + github: '@ellisdoro', + contribution: ['contributor'], // List of contribution types ('author', 'maintainer' or 'contributor') orcid: '' ], ] @@ -234,7 +234,7 @@ manifest { mainScript = 'main.nf' defaultBranch = 'master' nextflowVersion = '!>=24.04.2' - version = '1.0.0' + version = '1.1.0' doi = '' } diff --git a/nextflow_schema.json b/nextflow_schema.json index e51d58a..6dc73b2 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -23,6 +23,11 @@ "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row.", "fa_icon": "fas fa-file-csv" }, + "rename": { + "type": "boolean", + "default": true, + "description": "Renaming the sample names within the VCF file to the VCF filename." + }, "outdir": { "type": "string", "format": "directory-path", @@ -65,6 +70,14 @@ "help_text": "This parameter is *mandatory* if `--genome` is not specified. If you don't have a BWA index available this will be generated for you automatically. Combine with `--save_reference` to save BWA index for future runs.", "fa_icon": "far fa-file-code" }, + "fai": { + "type": "string", + "description": "Path to FASTA FAI genome index file." + }, + "dict": { + "type": "string", + "description": "Path to genome dict file" + }, "igenomes_ignore": { "type": "boolean", "description": "Do not load the iGenomes reference config.", @@ -241,13 +254,5 @@ { "$ref": "#/$defs/generic_options" } - ], - "properties": { - "fai": { - "type": "string" - }, - "dict": { - "type": "string" - } - } + ] } diff --git a/nf-test.config b/nf-test.config index b206777..7b60f87 100644 --- a/nf-test.config +++ b/nf-test.config @@ -14,5 +14,6 @@ config { plugins { load "nft-bam@0.5.0" load "nft-utils@0.0.3" + load "nft-vcf@1.0.7" } } diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json index 82d6934..d73a10f 100644 --- a/ro-crate-metadata.json +++ b/ro-crate-metadata.json @@ -19,10 +19,93 @@ ], "@graph": [ { - "@id": "#", - "@type": "Person", - "email": "", - "name": "Famke Ba\u0308uerle" + "@id": "./", + "@type": "Dataset", + "creativeWorkStatus": "Stable", + "datePublished": "2025-01-08T12:21:55+00:00", + "description": "# qbic-pipelines/vcftomat\n\n[![GitHub Actions CI Status](https://github.com/qbic-pipelines/vcftomat/actions/workflows/ci.yml/badge.svg)](https://github.com/qbic-pipelines/vcftomat/actions/workflows/ci.yml)\n[![GitHub Actions Linting Status](https://github.com/qbic-pipelines/vcftomat/actions/workflows/linting.yml/badge.svg)](https://github.com/qbic-pipelines/vcftomat/actions/workflows/linting.yml)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)\n\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.2-23aa62.svg)](https://www.nextflow.io/)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/qbic-pipelines/vcftomat)\n\n## Introduction\n\n**qbic-pipelines/vcftomat** is a bioinformatics pipeline that processes g.vcf files to a matrix suitable for downstream analysis. The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps:\n\n1. Indexes (g.)vcf files ([`tabix`](http://www.htslib.org/doc/tabix.html))\n2. Converts g.vcf files to vcf with `genotypegvcf` ([`GATK`](https://gatk.broadinstitute.org/hc/en-us))\n3. Concatenates all vcfs that have the same id and the same label with `bcftools/concat` ([`bcftools`](https://samtools.github.io/bcftools/bcftools.html))\n4. Changes the sample name in the vcf file to the filename with `bcftools/reheader` ([`bcftools`](https://samtools.github.io/bcftools/bcftools.html)) - This can be turned off by adding `--rename false` to the `nextflow run` command.\n5. Merges all vcfs from the same sample with `bcftools/merge` ([`bcftools`](https://samtools.github.io/bcftools/bcftools.html))\n6. Converts the (merged) vcfs to a matrix using a custom R script written by @ellisdoro ([`R`](https://www.r-project.org/))\n7. Collects all reports into a MultiQC report ([`MultiQC`](http://multiqc.info/))\n\n![](./docs/images/vcftomat.excalidraw.png)\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n`samplesheet.csv`:\n\n```csv\nsample,label,gvcf,vcf_path,vcf_index_path\nSAMPLE-1,pipelineA-callerA,false,path/to/vcf.gz,path/to/.vcf.gz.tbi\nSAMPLE-1,pipelineB-callerA,false,path/to/vcf.gz,path/to/.vcf.gz.tbi\nSAMPLE-2,pipelineB-callerB,true,path/to/g.vcf.gz,path/to/g.vcf.gz.tbi\nSAMPLE-2,pipelineB-callerB,true,path/to/g.vcf.gz,path/to/g.vcf.gz.tbi\n```\n\nEach row represents a VCF file coming from a sample. The `label` column enables concatenation of vcfs (for example when the pipeline produces different vcfs for chrM and chrY). The `gvcf` column indicates whether the file is a g.vcf file or not. The `vcf_path` and `vcf_index_path` columns contain the path to the VCF file and its index, respectively.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run qbic-pipelines/vcftomat \\\n -profile \\\n --input samplesheet.csv \\\n --genome GATK.GRCh38 \\\n --outdir \n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\n## Credits\n\nqbic-pipelines/vcftomat was originally written by Famke B\u00e4uerle, Dorothy Ellis.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\n## Citations\n\n\n\n\n\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nThis pipeline uses code and infrastructure developed and maintained by the [nf-core](https://nf-co.re) community, reused here under the [MIT license](https://github.com/nf-core/tools/blob/main/LICENSE).\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", + "hasPart": [ + { + "@id": "main.nf" + }, + { + "@id": "assets/" + }, + { + "@id": "bin/" + }, + { + "@id": "conf/" + }, + { + "@id": "docs/" + }, + { + "@id": "docs/images/" + }, + { + "@id": "modules/" + }, + { + "@id": "modules/local/" + }, + { + "@id": "modules/nf-core/" + }, + { + "@id": "workflows/" + }, + { + "@id": "subworkflows/" + }, + { + "@id": "nextflow.config" + }, + { + "@id": "README.md" + }, + { + "@id": "nextflow_schema.json" + }, + { + "@id": "CHANGELOG.md" + }, + { + "@id": "LICENSE" + }, + { + "@id": "CITATIONS.md" + }, + { + "@id": "modules.json" + }, + { + "@id": "docs/usage.md" + }, + { + "@id": "docs/output.md" + }, + { + "@id": ".nf-core.yml" + }, + { + "@id": ".pre-commit-config.yaml" + }, + { + "@id": ".prettierignore" + } + ], + "isBasedOn": "https://github.com/qbic-pipelines/vcftomat", + "license": "MIT", + "mainEntity": { + "@id": "main.nf" + }, + "mentions": [ + { + "@id": "#52a401c6-dec9-4dfe-92eb-d33149004223" + } + ], + "name": "qbic-pipelines/vcftomat" }, { "@id": "ro-crate-metadata.json", @@ -44,14 +127,22 @@ "@type": ["File", "SoftwareSourceCode", "ComputationalWorkflow"], "creator": [ { - "@id": "#" + "@id": "https://orcid.org/0000-0003-1387-0251" + }, + { + "@id": "#45968370+famosab@users.noreply.github.com" } ], "dateCreated": "", - "dateModified": "2024-12-16T14:54:00Z", + "dateModified": "2025-01-08T13:21:55Z", "dct:conformsTo": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/", "keywords": ["nf-core", "nextflow"], "license": ["MIT"], + "maintainer": [ + { + "@id": "https://orcid.org/0000-0003-1387-0251" + } + ], "name": ["qbic-pipelines/vcftomat"], "programmingLanguage": { "@id": "https://w3id.org/workflowhub/workflow-ro-crate#nextflow" @@ -59,8 +150,8 @@ "sdPublisher": { "@id": "https://nf-co.re/" }, - "url": ["https://github.com/qbic-pipelines/vcftomat", "https://nf-co.re/qbic-pipelines/vcftomat/dev/"], - "version": ["1.0.0dev"] + "url": ["https://github.com/qbic-pipelines/vcftomat", "https://nf-co.re/qbic-pipelines/vcftomat/1.1.0/"], + "version": ["1.1.0"] }, { "@id": "https://w3id.org/workflowhub/workflow-ro-crate#nextflow", @@ -75,11 +166,11 @@ "version": "!>=24.04.2" }, { - "@id": "#d5224f03-284f-41f1-93d1-e813bdaeb009", + "@id": "#52a401c6-dec9-4dfe-92eb-d33149004223", "@type": "TestSuite", "instance": [ { - "@id": "#812ae302-1686-4339-815d-ec28877e71f6" + "@id": "#2a6b4982-255a-44b9-9ff1-e0690f6c6e9d" } ], "mainEntity": { @@ -88,7 +179,7 @@ "name": "Test suite for qbic-pipelines/vcftomat" }, { - "@id": "#812ae302-1686-4339-815d-ec28877e71f6", + "@id": "#2a6b4982-255a-44b9-9ff1-e0690f6c6e9d", "@type": "TestInstance", "name": "GitHub Actions workflow for testing qbic-pipelines/vcftomat", "resource": "repos/qbic-pipelines/vcftomat/actions/workflows/ci.yml", @@ -110,6 +201,11 @@ "@type": "Dataset", "description": "Additional files" }, + { + "@id": "bin/", + "@type": "Dataset", + "description": "Scripts that must be callable from a pipeline process" + }, { "@id": "conf/", "@type": "Dataset", @@ -120,11 +216,21 @@ "@type": "Dataset", "description": "Markdown files for documenting the pipeline" }, + { + "@id": "docs/images/", + "@type": "Dataset", + "description": "Images for the documentation files" + }, { "@id": "modules/", "@type": "Dataset", "description": "Modules used by the pipeline" }, + { + "@id": "modules/local/", + "@type": "Dataset", + "description": "Pipeline-specific modules" + }, { "@id": "modules/nf-core/", "@type": "Dataset", @@ -205,6 +311,18 @@ "@type": "Organization", "name": "nf-core", "url": "https://nf-co.re/" + }, + { + "@id": "https://orcid.org/0000-0003-1387-0251", + "@type": "Person", + "email": "45968370+famosab@users.noreply.github.com", + "name": "Famke B\u00e4uerle" + }, + { + "@id": "#45968370+famosab@users.noreply.github.com", + "@type": "Person", + "email": "45968370+famosab@users.noreply.github.com", + "name": "Famke Ba\u0308uerle" } ] } diff --git a/tests/.nftignore b/tests/.nftignore index e69de29..4b801d5 100644 --- a/tests/.nftignore +++ b/tests/.nftignore @@ -0,0 +1,19 @@ +**/*.vcf.{gz,gz.tbi,gz.csi} +tabix/*.vcf.{gz,gz.tbi,gz.csi} +gatk/*.vcf.{gz,gz.tbi} +bcftools/concat/*.vcf.{gz,gz.tbi} +bcftools/reheader/*.vcf.{gz,gz.tbi} +bcftools/merge/*.vcf.{gz,gz.tbi} +pipeline_info/*.{html,json,txt} +multiqc/multiqc_data/multiqc.log +multiqc/multiqc_data/multiqc_data.json +multiqc/multiqc_data/multiqc_general_stats.txt +multiqc/multiqc_data/multiqc_picard_dups.txt +multiqc/multiqc_data/multiqc_software_versions.txt +multiqc/multiqc_data/multiqc_sources.txt +multiqc/multiqc_data/picard_deduplication.txt +multiqc/multiqc_data/vcftools_tstv_by_qual.txt +multiqc/multiqc_plots/{svg,pdf,png}/*.{svg,pdf,png} +multiqc/multiqc_report.html +**/toindex_concat_merge.csv +pipeline_info/vcftomat_software_mqc_versions.yml diff --git a/tests/default.nf.test b/tests/default.nf.test index 37c0549..e13a6d8 100644 --- a/tests/default.nf.test +++ b/tests/default.nf.test @@ -18,17 +18,21 @@ nextflow_pipeline { def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) // stable_path: All files in ${params.outdir}/ with stable content def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + // vcf_files: All files in ${params.outdir} + def vcf_files = getAllFilesFromDir(params.outdir, include: ['**/*.vcf.gz', '**/*.vcf']) assertAll( { assert workflow.success}, { assert snapshot( // Number of successful tasks workflow.trace.succeeded().size(), // pipeline versions.yml file for multiqc from which Nextflow version is removed because we tests pipelines on multiple Nextflow versions - removeNextflowVersion("$outputDir/pipeline_info/pipeline_software_mqc_versions.yml"), + removeNextflowVersion("$outputDir/pipeline_info/vcftomat_software_mqc_versions.yml"), // All stable path name, with a relative path stable_name, // All files with stable contents - stable_path + stable_path, + // All vcf files + vcf_files.collect{ file -> file.name + ":md5," + path(file.path).vcf.variantsMD5 } ).match() } ) } @@ -51,17 +55,21 @@ nextflow_pipeline { def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) // stable_path: All files in ${params.outdir}/ with stable content def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + // vcf_files: All files in ${params.outdir} + def vcf_files = getAllFilesFromDir(params.outdir, include: ['**/*.vcf.gz', '**/*.vcf']) assertAll( { assert workflow.success}, { assert snapshot( // Number of successful tasks workflow.trace.succeeded().size(), // pipeline versions.yml file for multiqc from which Nextflow version is removed because we tests pipelines on multiple Nextflow versions - removeNextflowVersion("$outputDir/pipeline_info/pipeline_software_mqc_versions.yml"), + removeNextflowVersion("$outputDir/pipeline_info/vcftomat_software_mqc_versions.yml"), // All stable path name, with a relative path stable_name, // All files with stable contents - stable_path + stable_path, + // All vcf files + vcf_files ).match() } ) } diff --git a/tests/default.nf.test.snap b/tests/default.nf.test.snap index 53659f6..ba51107 100644 --- a/tests/default.nf.test.snap +++ b/tests/default.nf.test.snap @@ -1,8 +1,11 @@ { "Params: default - stub": { "content": [ - 3, + 4, { + "BCFTOOLS_REHEADER": { + "bcftools": 1.2 + }, "TABIX_TABIX": { "tabix": 1.2 }, @@ -10,16 +13,21 @@ "vcf2counts.R": "1.0.0" }, "Workflow": { - "qbic-pipelines/vcftomat": "v1.0.0" + "qbic-pipelines/vcftomat": "v1.1.0" } }, [ + "bcftools", + "bcftools/reheader", + "bcftools/reheader/chr22.reheader.vcf.gz", + "bcftools/reheader/chr22.reheader.vcf.gz.tbi", + "bcftools/reheader/versions.yml", "multiqc", "multiqc/multiqc_data", "multiqc/multiqc_plots", "multiqc/multiqc_report.html", "pipeline_info", - "pipeline_info/pipeline_software_mqc_versions.yml", + "pipeline_info/vcftomat_software_mqc_versions.yml", "tabix", "tabix/NA12878.chr22.1X.vcf.gz.csi", "tabix/NA12878.chr22.1X.vcf.gz.tbi", @@ -27,24 +35,26 @@ "vcf2mat/chr22.csv" ], [ - "multiqc_report.html:md5,d41d8cd98f00b204e9800998ecf8427e", - "params_2024-12-11_11-15-12.json:md5,968166186e8c46974d7165db1f4e84b0", - "pipeline_software_mqc_versions.yml:md5,4a3c08d739de008e8cfee04b52a64b05", - "NA12878.chr22.1X.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e", - "NA12878.chr22.1X.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e", + "versions.yml:md5,c030f20e1b02bd49b0a4ec369f5b4429", "chr22.csv:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + "chr22.reheader.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], "meta": { "nf-test": "0.9.2", "nextflow": "24.10.2" }, - "timestamp": "2024-12-11T11:15:19.55845" + "timestamp": "2024-12-20T12:10:59.547851" }, "Params: default": { "content": [ - 3, + 4, { + "BCFTOOLS_REHEADER": { + "bcftools": 1.2 + }, "TABIX_TABIX": { "tabix": 1.2 }, @@ -52,10 +62,15 @@ "vcf2counts.R": "1.0.0" }, "Workflow": { - "qbic-pipelines/vcftomat": "v1.0.0" + "qbic-pipelines/vcftomat": "v1.1.0" } }, [ + "bcftools", + "bcftools/reheader", + "bcftools/reheader/chr22.reheader.vcf.gz", + "bcftools/reheader/chr22.reheader.vcf.gz.tbi", + "bcftools/reheader/versions.yml", "multiqc", "multiqc/multiqc_data", "multiqc/multiqc_data/multiqc.log", @@ -65,29 +80,25 @@ "multiqc/multiqc_data/multiqc_sources.txt", "multiqc/multiqc_report.html", "pipeline_info", - "pipeline_info/pipeline_software_mqc_versions.yml", + "pipeline_info/vcftomat_software_mqc_versions.yml", "tabix", "tabix/NA12878.chr22.1X.vcf.gz.tbi", "vcf2mat", "vcf2mat/chr22.csv" ], [ - "multiqc.log:md5,5b8c6067298a490674b646971f716100", + "versions.yml:md5,c030f20e1b02bd49b0a4ec369f5b4429", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", - "multiqc_data.json:md5,949e3064280a0e919782c85d64fda606", - "multiqc_software_versions.txt:md5,5b52328f6144cbad9faab2aebb879f2a", - "multiqc_sources.txt:md5,d2a044df39ce3c6abe5cdc2d67473490", - "multiqc_report.html:md5,b7e65cff7abafa2795efb34b52f5a0d7", - "params_2024-12-11_11-14-49.json:md5,b088670a94ed20471600d3ac5928ba66", - "pipeline_software_mqc_versions.yml:md5,4a3c08d739de008e8cfee04b52a64b05", - "NA12878.chr22.1X.vcf.gz.tbi:md5,3ca6e94f86c548be87dd5fcc391a2525", - "chr22.csv:md5,765ece7265a0b6abac7df8d8632a1500" + "chr22.csv:md5,06c743150691cd38f4a2caf65ca6c115" + ], + [ + "chr22.reheader.vcf.gz:md5,11558863c409ca2d6278e699cd5fde1a" ] ], "meta": { "nf-test": "0.9.2", "nextflow": "24.10.2" }, - "timestamp": "2024-12-11T11:15:08.98074" + "timestamp": "2024-12-20T12:10:25.903469" } } diff --git a/tests/full.nf.test b/tests/full.nf.test index 7f1557a..c8319b4 100644 --- a/tests/full.nf.test +++ b/tests/full.nf.test @@ -18,17 +18,25 @@ nextflow_pipeline { def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) // stable_path: All files in ${params.outdir}/ with stable content def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + // vcf_files: All files in ${params.outdir} + def vcf_files = getAllFilesFromDir(params.outdir, + include: ['**/*.vcf.gz', '**/*.vcf'], + ignore: ['**/concat.concat.vcf.gz', + '**/toindex_concat_merge.merge.vcf.gz', + '**/concat.reheader.vcf.gz']) assertAll( { assert workflow.success}, { assert snapshot( // Number of successful tasks workflow.trace.succeeded().size(), // pipeline versions.yml file for multiqc from which Nextflow version is removed because we tests pipelines on multiple Nextflow versions - removeNextflowVersion("$outputDir/pipeline_info/pipeline_software_mqc_versions.yml"), + removeNextflowVersion("$outputDir/pipeline_info/vcftomat_software_mqc_versions.yml"), // All stable path name, with a relative path stable_name, // All files with stable contents - stable_path + stable_path, + // All vcf files + vcf_files.collect{ file -> file.name + ":md5," + path(file.path).vcf.variantsMD5 } ).match() } ) } @@ -51,17 +59,21 @@ nextflow_pipeline { def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) // stable_path: All files in ${params.outdir}/ with stable content def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + // vcf_files: All files in ${params.outdir} + def vcf_files = getAllFilesFromDir(params.outdir, include: ['**/*.vcf.gz', '**/*.vcf']) assertAll( { assert workflow.success}, { assert snapshot( // Number of successful tasks workflow.trace.succeeded().size(), // pipeline versions.yml file for multiqc from which Nextflow version is removed because we tests pipelines on multiple Nextflow versions - removeNextflowVersion("$outputDir/pipeline_info/pipeline_software_mqc_versions.yml"), + removeNextflowVersion("$outputDir/pipeline_info/vcftomat_software_mqc_versions.yml"), // All stable path name, with a relative path stable_name, // All files with stable contents - stable_path + stable_path, + // All vcf files + vcf_files ).match() } ) } diff --git a/tests/full.nf.test.snap b/tests/full.nf.test.snap index 3761602..bda1ed8 100644 --- a/tests/full.nf.test.snap +++ b/tests/full.nf.test.snap @@ -1,11 +1,17 @@ { "Params: default | input full-size": { "content": [ - 15, + 23, { + "BCFTOOLS_CONCAT": { + "bcftools": 1.2 + }, "BCFTOOLS_MERGE": { "bcftools": 1.2 }, + "BCFTOOLS_REHEADER": { + "bcftools": 1.2 + }, "GATK4_GENOTYPEGVCFS": { "gatk4": "4.6.1.0" }, @@ -16,13 +22,35 @@ "vcf2counts.R": "1.0.0" }, "Workflow": { - "qbic-pipelines/vcftomat": "v1.0.0" + "qbic-pipelines/vcftomat": "v1.1.0" } }, [ "bcftools", - "bcftools/indexed_merge.merged.vcf", - "bcftools/toindex_gvcf_merge.merged.vcf", + "bcftools/concat", + "bcftools/concat/concat.concat.vcf.gz", + "bcftools/concat/concat.concat.vcf.gz.tbi", + "bcftools/concat/versions.yml", + "bcftools/merge", + "bcftools/merge/indexed_merge.merge.vcf.gz", + "bcftools/merge/indexed_merge.merge.vcf.gz.tbi", + "bcftools/merge/toindex_concat_merge.merge.vcf.gz", + "bcftools/merge/toindex_concat_merge.merge.vcf.gz.tbi", + "bcftools/merge/toindex_gvcf_merge.merge.vcf.gz", + "bcftools/merge/toindex_gvcf_merge.merge.vcf.gz.tbi", + "bcftools/merge/versions.yml", + "bcftools/reheader", + "bcftools/reheader/callerA.reheader.vcf.gz", + "bcftools/reheader/callerA.reheader.vcf.gz.tbi", + "bcftools/reheader/callerB.reheader.vcf.gz", + "bcftools/reheader/callerB.reheader.vcf.gz.tbi", + "bcftools/reheader/concat.reheader.vcf.gz", + "bcftools/reheader/concat.reheader.vcf.gz.tbi", + "bcftools/reheader/empty.reheader.vcf.gz", + "bcftools/reheader/empty.reheader.vcf.gz.tbi", + "bcftools/reheader/merge.reheader.vcf.gz", + "bcftools/reheader/merge.reheader.vcf.gz.tbi", + "bcftools/reheader/versions.yml", "gatk4", "gatk4/test.vcf.gz", "gatk4/test.vcf.gz.tbi", @@ -37,56 +65,58 @@ "multiqc/multiqc_data/multiqc_sources.txt", "multiqc/multiqc_report.html", "pipeline_info", - "pipeline_info/pipeline_software_mqc_versions.yml", + "pipeline_info/vcftomat_software_mqc_versions.yml", "tabix", "tabix/NA12878.chr22.1X.vcf.gz.tbi", + "tabix/NA12878_GIAB.chr22.vcf.gz.tbi", + "tabix/NA24385_sv.vcf.gz.tbi", "tabix/test.genome.g.vcf.gz.tbi", "tabix/test2.genome.vcf.gz.tbi", "vcf2mat", "vcf2mat/empty.csv", "vcf2mat/indexed_merge.csv", - "vcf2mat/toindex_gvcf_merge.csv", - "vcf2mat/toindex_gvcf_nomerge.csv", - "vcf2mat/toindex_nomerge.csv" + "vcf2mat/toindex_concat_merge.csv", + "vcf2mat/toindex_gvcf_merge.csv" ], [ - "indexed_merge.merged.vcf:md5,6e7b63a467a7490760a772503dc45e2f", - "toindex_gvcf_merge.merged.vcf:md5,89b6ab10b8c019287cf987f9eaaa6490", - "test.vcf.gz:md5,ab2e3acf677384a527d6fd15279e1308", - "test.vcf.gz.tbi:md5,0613c73f14e6004eadbfc526e0fc65a0", - "test2.vcf.gz:md5,ef3d1e3200d95bc2a4b622e74b6c7dad", - "test2.vcf.gz.tbi:md5,fea49a9f40baf32f545a7751e83349f5", - "multiqc.log:md5,a4724d0703c65afa594812ffbbe2cee6", + "versions.yml:md5,8e4cc1068ff64a583811f3e3bd8a4771", + "versions.yml:md5,0567f354f7d4ddac23b392f94e760ea2", + "versions.yml:md5,c030f20e1b02bd49b0a4ec369f5b4429", "multiqc_citations.txt:md5,4c806e63a283ec1b7e78cdae3a923d4f", - "multiqc_data.json:md5,2458fdbc4e37592ee1369405a45fff8f", - "multiqc_software_versions.txt:md5,7cd6280ad9aab78724d0729c9a3e22ef", - "multiqc_sources.txt:md5,d2a044df39ce3c6abe5cdc2d67473490", - "multiqc_report.html:md5,7a88b785de013b9be1e80d10599f6623", - "params_2024-12-11_13-01-24.json:md5,b230863e6e2b61cce98fb66078d01f59", - "pipeline_software_mqc_versions.yml:md5,1b2f9328e5885175a2b6f3a5cd03f323", - "NA12878.chr22.1X.vcf.gz.tbi:md5,3ca6e94f86c548be87dd5fcc391a2525", - "test.genome.g.vcf.gz.tbi:md5,e7611a7f7d2eb07d5adeb4d4569578ab", - "test2.genome.vcf.gz.tbi:md5,6fe54f8e8d38cbbc0046f4427b21e11a", - "empty.csv:md5,77f0670b6b6ac7830e02d4c0222075c5", - "indexed_merge.csv:md5,c79bcada2daab2953452c076920e1c00", - "toindex_gvcf_merge.csv:md5,22a6203ce9bdf4763f30c54c6e4d2b77", - "toindex_gvcf_nomerge.csv:md5,317e9254f321fa886a990221d261e3de", - "toindex_nomerge.csv:md5,765ece7265a0b6abac7df8d8632a1500" + "empty.csv:md5,1bd32757621943a049881f0f99e4a2c4", + "indexed_merge.csv:md5,b59aae2be818fd1ddc1fb3b9e8e8e89b", + "toindex_gvcf_merge.csv:md5,bb0cc37d7f55f611a8d0f11c4d80dece" + ], + [ + "indexed_merge.merge.vcf.gz:md5,5089e377e380c78d3ff29a8d36db14dd", + "toindex_gvcf_merge.merge.vcf.gz:md5,6f6b4664d06ccb50ceead526cec7aa0d", + "callerA.reheader.vcf.gz:md5,16b9e984ed9af8f4d147900f9b8e0bee", + "callerB.reheader.vcf.gz:md5,9faa7fb1a37a48c4444b498e39ca4c01", + "empty.reheader.vcf.gz:md5,d41d8cd98f00b204e9800998ecf8427e", + "merge.reheader.vcf.gz:md5,9faa7fb1a37a48c4444b498e39ca4c01", + "test.vcf.gz:md5,1ab95fbc5ec55b208f3001572bec54fa", + "test2.vcf.gz:md5,4046f52f4ea6616b7295c7f16b987710" ] ], "meta": { "nf-test": "0.9.2", "nextflow": "24.10.2" }, - "timestamp": "2024-12-11T13:02:46.414504" + "timestamp": "2024-12-20T12:11:20.624762" }, "Params: default | input full-size - stub": { "content": [ - 15, + 23, { + "BCFTOOLS_CONCAT": { + "bcftools": 1.2 + }, "BCFTOOLS_MERGE": { "bcftools": 1.2 }, + "BCFTOOLS_REHEADER": { + "bcftools": 1.2 + }, "GATK4_GENOTYPEGVCFS": { "gatk4": "4.6.1.0" }, @@ -97,13 +127,35 @@ "vcf2counts.R": "1.0.0" }, "Workflow": { - "qbic-pipelines/vcftomat": "v1.0.0" + "qbic-pipelines/vcftomat": "v1.1.0" } }, [ "bcftools", - "bcftools/indexed_merge.merged.vcf", - "bcftools/toindex_gvcf_merge.merged.vcf", + "bcftools/concat", + "bcftools/concat/concat.concat.vcf.gz", + "bcftools/concat/concat.concat.vcf.gz.tbi", + "bcftools/concat/versions.yml", + "bcftools/merge", + "bcftools/merge/indexed_merge.merge.vcf.gz", + "bcftools/merge/indexed_merge.merge.vcf.gz.tbi", + "bcftools/merge/toindex_concat_merge.merge.vcf.gz", + "bcftools/merge/toindex_concat_merge.merge.vcf.gz.tbi", + "bcftools/merge/toindex_gvcf_merge.merge.vcf.gz", + "bcftools/merge/toindex_gvcf_merge.merge.vcf.gz.tbi", + "bcftools/merge/versions.yml", + "bcftools/reheader", + "bcftools/reheader/callerA.reheader.vcf.gz", + "bcftools/reheader/callerA.reheader.vcf.gz.tbi", + "bcftools/reheader/callerB.reheader.vcf.gz", + "bcftools/reheader/callerB.reheader.vcf.gz.tbi", + "bcftools/reheader/concat.reheader.vcf.gz", + "bcftools/reheader/concat.reheader.vcf.gz.tbi", + "bcftools/reheader/empty.reheader.vcf.gz", + "bcftools/reheader/empty.reheader.vcf.gz.tbi", + "bcftools/reheader/merge.reheader.vcf.gz", + "bcftools/reheader/merge.reheader.vcf.gz.tbi", + "bcftools/reheader/versions.yml", "gatk4", "gatk4/test.vcf.gz", "gatk4/test.vcf.gz.tbi", @@ -114,10 +166,14 @@ "multiqc/multiqc_plots", "multiqc/multiqc_report.html", "pipeline_info", - "pipeline_info/pipeline_software_mqc_versions.yml", + "pipeline_info/vcftomat_software_mqc_versions.yml", "tabix", "tabix/NA12878.chr22.1X.vcf.gz.csi", "tabix/NA12878.chr22.1X.vcf.gz.tbi", + "tabix/NA12878_GIAB.chr22.vcf.gz.csi", + "tabix/NA12878_GIAB.chr22.vcf.gz.tbi", + "tabix/NA24385_sv.vcf.gz.csi", + "tabix/NA24385_sv.vcf.gz.tbi", "tabix/test.genome.g.vcf.gz.csi", "tabix/test.genome.g.vcf.gz.tbi", "tabix/test2.genome.vcf.gz.csi", @@ -125,37 +181,35 @@ "vcf2mat", "vcf2mat/empty.csv", "vcf2mat/indexed_merge.csv", - "vcf2mat/toindex_gvcf_merge.csv", - "vcf2mat/toindex_gvcf_nomerge.csv", - "vcf2mat/toindex_nomerge.csv" + "vcf2mat/toindex_concat_merge.csv", + "vcf2mat/toindex_gvcf_merge.csv" ], [ - "indexed_merge.merged.vcf:md5,d41d8cd98f00b204e9800998ecf8427e", - "toindex_gvcf_merge.merged.vcf:md5,d41d8cd98f00b204e9800998ecf8427e", - "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", - "test.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e", - "test2.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", - "test2.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e", - "multiqc_report.html:md5,d41d8cd98f00b204e9800998ecf8427e", - "params_2024-12-11_13-02-50.json:md5,9d3347ebf0840fd1553b6a3cf4bf5061", - "pipeline_software_mqc_versions.yml:md5,1b2f9328e5885175a2b6f3a5cd03f323", - "NA12878.chr22.1X.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e", - "NA12878.chr22.1X.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e", - "test.genome.g.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e", - "test.genome.g.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e", - "test2.genome.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e", - "test2.genome.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e", + "versions.yml:md5,8e4cc1068ff64a583811f3e3bd8a4771", + "versions.yml:md5,0567f354f7d4ddac23b392f94e760ea2", + "versions.yml:md5,c030f20e1b02bd49b0a4ec369f5b4429", "empty.csv:md5,d41d8cd98f00b204e9800998ecf8427e", "indexed_merge.csv:md5,d41d8cd98f00b204e9800998ecf8427e", - "toindex_gvcf_merge.csv:md5,d41d8cd98f00b204e9800998ecf8427e", - "toindex_gvcf_nomerge.csv:md5,d41d8cd98f00b204e9800998ecf8427e", - "toindex_nomerge.csv:md5,d41d8cd98f00b204e9800998ecf8427e" + "toindex_gvcf_merge.csv:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + [ + "concat.concat.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "indexed_merge.merge.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "toindex_concat_merge.merge.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "toindex_gvcf_merge.merge.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "callerA.reheader.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "callerB.reheader.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "concat.reheader.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "empty.reheader.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "merge.reheader.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test2.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], "meta": { "nf-test": "0.9.2", "nextflow": "24.10.2" }, - "timestamp": "2024-12-11T13:03:31.768336" + "timestamp": "2024-12-20T12:12:11.055567" } } diff --git a/tests/input-full.csv b/tests/input-full.csv index 212e1f4..d525459 100644 --- a/tests/input-full.csv +++ b/tests/input-full.csv @@ -1,8 +1,9 @@ -sample,gvcf,vcf_path,vcf_index_path -toindex_nomerge,false,https://github.com/nf-core/test-datasets/raw/refs/heads/modules/data/genomics/homo_sapiens/illumina/vcf/NA12878.chr22.1X.vcf.gz, -toindex_gvcf_nomerge,true,https://github.com/nf-core/test-datasets/raw/refs/heads/modules/data/genomics/homo_sapiens/illumina/gvcf/test.genome.g.vcf.gz, -indexed_merge,false,https://github.com/nf-core/test-datasets/raw/refs/heads/modules/data/genomics/homo_sapiens/illumina/vcf/NA12878_chrM.vcf.gz,https://github.com/nf-core/test-datasets/raw/refs/heads/modules/data/genomics/homo_sapiens/illumina/vcf/NA12878_chrM.vcf.gz.tbi -indexed_merge,false,https://github.com/nf-core/test-datasets/raw/refs/heads/modules/data/genomics/homo_sapiens/illumina/vcf/NA24385_sv.vcf.gz,https://github.com/nf-core/test-datasets/raw/refs/heads/modules/data/genomics/homo_sapiens/illumina/vcf/NA24385_sv.vcf.gz.tbi -toindex_gvcf_merge,true,https://github.com/nf-core/test-datasets/raw/refs/heads/modules/data/genomics/homo_sapiens/illumina/gvcf/test.genome.g.vcf.gz, -toindex_gvcf_merge,true,https://github.com/nf-core/test-datasets/raw/refs/heads/modules/data/genomics/homo_sapiens/illumina/gvcf/test2.genome.vcf.gz, -empty,false,https://github.com/nf-core/test-datasets/raw/refs/heads/modules/data/genomics/homo_sapiens/illumina/vcf/empty.vcf.gz,https://github.com/nf-core/test-datasets/raw/refs/heads/modules/data/genomics/homo_sapiens/illumina/vcf/empty.vcf.gz.tbi +sample,label,gvcf,vcf_path,vcf_index_path +indexed_merge,callerA,false,https://github.com/nf-core/test-datasets/raw/refs/heads/modules/data/genomics/homo_sapiens/illumina/vcf/NA12878_chrM.vcf.gz,https://github.com/nf-core/test-datasets/raw/refs/heads/modules/data/genomics/homo_sapiens/illumina/vcf/NA12878_chrM.vcf.gz.tbi +indexed_merge,callerB,false,https://github.com/nf-core/test-datasets/raw/refs/heads/modules/data/genomics/homo_sapiens/illumina/vcf/NA24385_sv.vcf.gz,https://github.com/nf-core/test-datasets/raw/refs/heads/modules/data/genomics/homo_sapiens/illumina/vcf/NA24385_sv.vcf.gz.tbi +toindex_gvcf_merge,callerA,true,https://github.com/nf-core/test-datasets/raw/refs/heads/modules/data/genomics/homo_sapiens/illumina/gvcf/test.genome.g.vcf.gz, +toindex_gvcf_merge,callerB,true,https://github.com/nf-core/test-datasets/raw/refs/heads/modules/data/genomics/homo_sapiens/illumina/gvcf/test2.genome.vcf.gz, +empty,empty,false,https://github.com/nf-core/test-datasets/raw/refs/heads/modules/data/genomics/homo_sapiens/illumina/vcf/empty.vcf.gz,https://github.com/nf-core/test-datasets/raw/refs/heads/modules/data/genomics/homo_sapiens/illumina/vcf/empty.vcf.gz.tbi +toindex_concat_merge,concat,false,https://github.com/nf-core/test-datasets/raw/refs/heads/modules/data/genomics/homo_sapiens/illumina/vcf/NA12878_GIAB.chr22.vcf.gz, +toindex_concat_merge,concat,false,https://github.com/nf-core/test-datasets/raw/refs/heads/modules/data/genomics/homo_sapiens/illumina/vcf/NA12878.chr22.1X.vcf.gz, +toindex_concat_merge,merge,false,https://github.com/nf-core/test-datasets/raw/refs/heads/modules/data/genomics/homo_sapiens/illumina/vcf/NA24385_sv.vcf.gz, diff --git a/tests/input.csv b/tests/input.csv index c81c040..d97d5ea 100644 --- a/tests/input.csv +++ b/tests/input.csv @@ -1,2 +1,2 @@ -sample,gvcf,vcf_path,vcf_index_path -chr22,false,https://github.com/nf-core/test-datasets/raw/refs/heads/modules/data/genomics/homo_sapiens/illumina/vcf/NA12878.chr22.1X.vcf.gz, +sample,label,gvcf,vcf_path,vcf_index_path +chr22,chr22,false,https://github.com/nf-core/test-datasets/raw/refs/heads/modules/data/genomics/homo_sapiens/illumina/vcf/NA12878.chr22.1X.vcf.gz, diff --git a/workflows/vcftomat.nf b/workflows/vcftomat.nf index 1191aef..35f0b24 100644 --- a/workflows/vcftomat.nf +++ b/workflows/vcftomat.nf @@ -5,6 +5,8 @@ */ include { MULTIQC } from '../modules/nf-core/multiqc/main' include { GATK4_GENOTYPEGVCFS } from '../modules/nf-core/gatk4/genotypegvcfs/main' +include { BCFTOOLS_CONCAT } from '../modules/nf-core/bcftools/concat/main' +include { BCFTOOLS_REHEADER } from '../modules/nf-core/bcftools/reheader/main' include { BCFTOOLS_MERGE } from '../modules/nf-core/bcftools/merge/main' include { TABIX_TABIX } from '../modules/nf-core/tabix/tabix/main' include { VCF2MAT } from '../modules/local/vcf2mat/main' @@ -34,19 +36,28 @@ workflow VCFTOMAT { // // add index to non-indexed VCFs // - (ch_has_index, ch_has_no_index) = ch_samplesheet.branch{ - has_index: !it[0].to_index - to_index: it[0].to_index - } - - // Remove empty index [] from channel = it[2] and add file name for joining - input_to_index = ch_has_no_index.map{ it -> [ it[0] + [name:it[1][0].baseName], it[1] ] } + (ch_has_index, ch_has_no_index) = ch_samplesheet + .map{ it -> + def name = it[1][0].baseName + name = name + .replaceFirst(/\.g\.vcf$/, "") + .replaceFirst(/\.genome\.vcf$/, "") + .replaceFirst(/\.genome\.g\.vcf$/, "") + .replaceFirst(/\.g$/, "") + .replaceFirst(/\.genome$/, "") + .replaceFirst(/\.vcf$/, "") + [ it[0] + [ name:name ], it[1] ] + } + .branch{ + has_index: !it[0].to_index + to_index: it[0].to_index + } - TABIX_TABIX( input_to_index ) + TABIX_TABIX( ch_has_no_index ) ch_versions = ch_versions.mix(TABIX_TABIX.out.versions.first()) - ch_indexed = input_to_index.join( + ch_indexed = ch_has_no_index.join( TABIX_TABIX.out.tbi .map{ it -> [ it[0], [it[1]] ] } ).map { meta, vcf, tbi -> [ meta, [ vcf[0], tbi[0] ] ] } @@ -80,12 +91,63 @@ workflow VCFTOMAT { ch_versions = ch_versions.mix(GATK4_GENOTYPEGVCFS.out.versions) // - // Merge multiple VCFs per sample with BCFTOOLS_MERGE + // Concatenate converted VCFs if the entries for "id" and "label" are the same + // + (ch_single_vcf, ch_multiple_vcf) = ch_vcf + .map { meta, files -> + // Assuming files is a list of all VCF and TBI files + def vcfs = files.findAll { it.name.endsWith('.vcf.gz') } + def tbis = files.findAll { it.name.endsWith('.vcf.gz.tbi') } + [ [meta.id, meta.label], meta, vcfs, tbis] + } + .groupTuple(by: 0) + .map { id_label, metas, vcfs, tbis -> + def meta = metas[0] + def vcf_count = vcfs.flatten().size() + meta.single_vcf = (vcf_count == 1) + [meta, vcfs.flatten(), tbis.flatten()] + }.branch { + single: it[0].single_vcf + multiple: !it[0].single_vcf + } + + BCFTOOLS_CONCAT( ch_multiple_vcf ) + + ch_vcf_index = BCFTOOLS_CONCAT.out.vcf + .join(BCFTOOLS_CONCAT.out.tbi) + + ch_vcf_concat = ch_single_vcf.mix(ch_vcf_index) + .map { meta, vcf, tbi + -> [ meta.findAll { it.key != 'name' }, [ vcf, tbi ] ] } + + ch_versions = ch_versions.mix(BCFTOOLS_CONCAT.out.versions) + + if (params.rename) { + // + // Rename samples in vcf with the label + // + BCFTOOLS_REHEADER( + ch_vcf_concat.map{ it -> [ it[0], it[1][0], [], [] ] }, + [[],[]] + ) + + ch_vcf_index_rh = BCFTOOLS_REHEADER.out.vcf + .join(BCFTOOLS_REHEADER.out.index) + .map { meta, vcf, tbi -> [ meta, [ vcf, tbi ] ] } + + ch_versions = ch_versions.mix(BCFTOOLS_REHEADER.out.versions) + } else { + ch_vcf_index_rh = ch_vcf_concat + } + + + // + // Merge multiple VCFs per sample (patient) with BCFTOOLS_MERGE // // Bring all vcfs from one sample into a channel // Branch based on the number of VCFs per sample - (ch_single_vcf, ch_multiple_vcf) = ch_vcf + (ch_single_id, ch_multiple_id) = ch_vcf_index_rh .map { meta, files -> // Assuming files is a list of all VCF and TBI files def vcfs = files.findAll { it.name.endsWith('.vcf.gz') } @@ -96,23 +158,23 @@ workflow VCFTOMAT { .map { id, metas, vcfs, tbis -> def meta = metas[0] // Take the first meta, they should all be the same for a given ID def vcf_count = vcfs.flatten().size() - meta.single_vcf = (vcf_count == 1) + meta.single_id = (vcf_count == 1) [meta, vcfs.flatten(), tbis.flatten()] }.branch { - single: it[0].single_vcf - multiple: !it[0].single_vcf + single: it[0].single_id + multiple: !it[0].single_id } // Run BCFTOOLS_MERGE only on samples with multiple VCFs BCFTOOLS_MERGE( - ch_multiple_vcf, + ch_multiple_id, [[],[]], // fasta reference only needed for gvcf [[],[]], // fasta.fai reference only needed for gvcf [[],[]] // bed ) // Merge the results back into a single channel - ch_merged_vcfs = ch_single_vcf.mix(BCFTOOLS_MERGE.out.vcf) + ch_merged_vcfs = ch_single_id.mix(BCFTOOLS_MERGE.out.vcf) ch_versions = ch_versions.mix(BCFTOOLS_MERGE.out.versions)