diff --git a/modules.json b/modules.json index 5fc3c89c..a949fdda 100644 --- a/modules.json +++ b/modules.json @@ -246,7 +246,7 @@ }, "svdb/merge": { "branch": "master", - "git_sha": "847683c1bb3a94d846e18916f14c2bc60a447041", + "git_sha": "b80931c5be241910c62795bbf70534df18b3905f", "installed_by": ["modules"] }, "svdb/query": { diff --git a/modules/nf-core/svdb/merge/main.nf b/modules/nf-core/svdb/merge/main.nf index afc34bb4..d28f2a97 100644 --- a/modules/nf-core/svdb/merge/main.nf +++ b/modules/nf-core/svdb/merge/main.nf @@ -8,11 +8,12 @@ process SVDB_MERGE { input: tuple val(meta), path(vcfs) - val (priority) + val(priority) + val(sort_inputs) output: tuple val(meta), path("*.vcf.gz"), emit: vcf - path "versions.yml" , emit: versions + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -20,15 +21,37 @@ process SVDB_MERGE { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def input = "${vcfs.join(" ")}" - def prio = "" + + // Ensure priority list matches the number of VCFs if priority is provided + if (priority && vcfs.collect().size() != priority.collect().size()) { + error "If priority is used, one tag per VCF is needed" + } + + if (sort_inputs && vcfs.collect().size() > 1) { + if (priority) { + // make vcf-prioprity pairs and sort on VCF name, so priority is also sorted the same + def pairs = vcfs.indices.collect { [vcfs[it], priority[it]] } + pairs = pairs.sort { a, b -> a[0].name <=> b[0].name } + vcfs = pairs.collect { it[0] } + priority = pairs.collect { it[1] } + } else { + // if there's no priority input just sort the vcfs by name + vcfs = vcfs.sort { it.name } + } + } + + // If there's only one input VCF the code above is not executed, and that VCF becomes the input + input = vcfs + + def prio = "" if(priority) { prio = "--priority ${priority.join(',')}" input = "" - for (int index = 0; index < vcfs.size(); index++) { - input += " ${vcfs[index]}:${priority[index]}" + for (int index = 0; index < vcfs.collect().size(); index++) { + input += "${vcfs[index]}:${priority[index]} " } } + """ svdb \\ --merge \\ diff --git a/modules/nf-core/svdb/merge/meta.yml b/modules/nf-core/svdb/merge/meta.yml index 7dc7c675..d6cc1758 100644 --- a/modules/nf-core/svdb/merge/meta.yml +++ b/modules/nf-core/svdb/merge/meta.yml @@ -19,12 +19,22 @@ input: e.g. [ id:'test' ] - vcfs: type: list - description: Two or more VCF files. Order of files should correspond to the - order of tags used for priority. + description: | + One or more VCF files. The order and number of files should correspond to + the order and number of tags in the `priority` input channel. pattern: "*.{vcf,vcf.gz}" - - priority: type: list - description: prioritise the input vcf files according to this list, e.g ['tiddit','cnvnator'] + description: | + Prioritize the input VCF files according to this list, + e.g ['tiddit','cnvnator']. The order and number of tags should correspond to + the order and number of VCFs in the `vcfs` input channel. + - - sort_inputs: + type: boolean + description: | + Should the input files be sorted by name. The priority tag will be sorted + together with it's corresponding VCF file. + output: - vcf: - meta: diff --git a/modules/nf-core/svdb/merge/tests/main.nf.test b/modules/nf-core/svdb/merge/tests/main.nf.test index b0743a69..0dddffa3 100644 --- a/modules/nf-core/svdb/merge/tests/main.nf.test +++ b/modules/nf-core/svdb/merge/tests/main.nf.test @@ -2,23 +2,86 @@ nextflow_process { name "Test Process SVDB_MERGE" script "modules/nf-core/svdb/merge/main.nf" + config "./nextflow.config" process "SVDB_MERGE" tag "modules" tag "modules_nfcore" tag "svdb" tag "svdb/merge" - test("test_svdb_merge") { + test("1 sample, [], []") { when { process { """ input[0] = Channel.of([ [ id:'test' ], // meta map - [file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf', checkIfExists: true) ] + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true) + ] ]) - input[1] = [ 'tiddit', 'cnvnator'] + input[1] = [] + input[2] = [] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert path(process.out.vcf.get(0).get(1)).linesGzip[2].contains("--vcf test.vcf") }, // SVDB command line + { assert snapshot( + path(process.out.vcf.get(0).get(1)).vcf.summary, + path(process.out.vcf.get(0).get(1)).vcf.variantsMD5, + process.out.versions + ).match() } + ) + } + } + + test("1 sample, [], true") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true) + ] + ]) + input[1] = [] + input[2] = [] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert path(process.out.vcf.get(0).get(1)).linesGzip[2].contains("--vcf test.vcf") }, // SVDB command line + { assert snapshot( + path(process.out.vcf.get(0).get(1)).vcf.summary, + path(process.out.vcf.get(0).get(1)).vcf.variantsMD5, + process.out.versions + ).match() } + ) + } + } + + test("1 sample, ['tiddit'], []") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true) + ] + ]) + input[1] = ['tiddit'] + input[2] = [] """ } } @@ -26,23 +89,122 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert path(process.out.vcf.get(0).get(1)).linesGzip.contains("##fileformat=VCFv4.1") } + { assert path(process.out.vcf.get(0).get(1)).linesGzip[2].contains("--vcf test.vcf:tiddit") }, // SVDB command line + { assert snapshot( + path(process.out.vcf.get(0).get(1)).vcf.summary, + path(process.out.vcf.get(0).get(1)).vcf.variantsMD5, + process.out.versions + ).match() } ) } + } + + test("1 sample, ['tiddit'], true") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true) + ] + ]) + input[1] = ['tiddit'] + input[2] = true + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert path(process.out.vcf.get(0).get(1)).linesGzip[2].contains("--vcf test.vcf:tiddit") }, // SVDB command line + { assert snapshot( + path(process.out.vcf.get(0).get(1)).vcf.summary, + path(process.out.vcf.get(0).get(1)).vcf.variantsMD5, + process.out.versions + ).match() } + ) + } + } + test("2 samples, [], []") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true) + ] + ]) + input[1] = [] + input[2] = [] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert path(process.out.vcf.get(0).get(1)).linesGzip[2].contains("--vcf test2.vcf test.vcf") }, // SVDB command line + { assert snapshot( + path(process.out.vcf.get(0).get(1)).vcf.summary, + path(process.out.vcf.get(0).get(1)).vcf.variantsMD5, + process.out.versions + ).match() } + ) + } } - test("test_svdb_merge_noprio") { + test("2 samples, [], true") { when { process { """ input[0] = Channel.of([ [ id:'test' ], // meta map - [file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf', checkIfExists: true) ] + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true) + ] ]) input[1] = [] + input[2] = true + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert path(process.out.vcf.get(0).get(1)).linesGzip[2].contains("--vcf test.vcf test2.vcf") }, // SVDB command line + { assert snapshot( + path(process.out.vcf.get(0).get(1)).vcf.summary, + path(process.out.vcf.get(0).get(1)).vcf.variantsMD5, + process.out.versions + ).match() } + ) + } + } + + test("2 samples, ['tiddit', 'cnvnator'], []") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true) + ] + ]) + input[1] = ['tiddit', 'cnvnator'] + input[2] = [] """ } } @@ -50,13 +212,48 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert path(process.out.vcf.get(0).get(1)).linesGzip.contains("##fileformat=VCFv4.1") } + { assert path(process.out.vcf.get(0).get(1)).linesGzip[2].contains("--vcf test2.vcf:tiddit test.vcf:cnvnator") }, // SVDB command line + { assert snapshot( + path(process.out.vcf.get(0).get(1)).vcf.summary, + path(process.out.vcf.get(0).get(1)).vcf.variantsMD5, + process.out.versions + ).match() } ) } + } + + test("2 samples, ['tiddit', 'cnvnator'], true") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true) + ] + ]) + input[1] = ['tiddit', 'cnvnator'] + input[2] = true + """ + } + } + then { + assertAll ( + { assert process.success }, + { assert path(process.out.vcf.get(0).get(1)).linesGzip[2].contains("--vcf test.vcf:cnvnator test2.vcf:tiddit") }, // SVDB command line + { assert snapshot( + path(process.out.vcf.get(0).get(1)).vcf.summary, + path(process.out.vcf.get(0).get(1)).vcf.variantsMD5, + process.out.versions + ).match() } + ) + } } - test("test_svdb_merge - stub") { + test("2 samples, [], [] - stub") { options "-stub" @@ -65,10 +262,13 @@ nextflow_process { """ input[0] = Channel.of([ [ id:'test' ], // meta map - [file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf', checkIfExists: true) ] + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true) + ] ]) - input[1] = [ 'tiddit', 'cnvnator'] + input[1] = [] + input[2] = [] """ } } @@ -79,10 +279,9 @@ nextflow_process { { assert snapshot(process.out).match() } ) } - } - test("test_svdb_merge_noprio - stub") { + test("2 samples, [], true - stub") { options "-stub" @@ -91,10 +290,41 @@ nextflow_process { """ input[0] = Channel.of([ [ id:'test' ], // meta map - [file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf', checkIfExists: true) ] + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true) + ] ]) input[1] = [] + input[2] = true + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("2 samples, ['tiddit', 'cnvnator'], [] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true) + ] + ]) + input[1] = ['tiddit', 'cnvnator'] + input[2] = [] """ } } @@ -105,7 +335,34 @@ nextflow_process { { assert snapshot(process.out).match() } ) } + } + + test("2 samples, ['tiddit', 'cnvnator'], true - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true) + ] + ]) + input[1] = ['tiddit', 'cnvnator'] + input[2] = true + """ + } + } + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } } } diff --git a/modules/nf-core/svdb/merge/tests/main.nf.test.snap b/modules/nf-core/svdb/merge/tests/main.nf.test.snap index e79b2583..a5daf368 100644 --- a/modules/nf-core/svdb/merge/tests/main.nf.test.snap +++ b/modules/nf-core/svdb/merge/tests/main.nf.test.snap @@ -1,5 +1,183 @@ { - "test_svdb_merge - stub": { + "1 sample, [], []": { + "content": [ + "VcfFile [chromosomes=[MT192765.1], sampleCount=1, variantCount=9, phased=false, phasedAutodetect=false]", + "60fb4cab2aa891bebef8ffdbd0e41bc3", + [ + "versions.yml:md5,772f39343052d54d9bcb21d4892da203" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-17T08:30:08.029708713" + }, + "2 samples, ['tiddit', 'cnvnator'], true - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "merged.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + "versions.yml:md5,772f39343052d54d9bcb21d4892da203" + ], + "vcf": [ + [ + { + "id": "test" + }, + "merged.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,772f39343052d54d9bcb21d4892da203" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-17T08:32:10.741546957" + }, + "2 samples, ['tiddit', 'cnvnator'], []": { + "content": [ + "VcfFile [chromosomes=[MT192765.1], sampleCount=2, variantCount=9, phased=false, phasedAutodetect=false]", + "254e56e4fc8356d68424828438da66e3", + [ + "versions.yml:md5,772f39343052d54d9bcb21d4892da203" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-17T08:31:15.105041662" + }, + "2 samples, [], []": { + "content": [ + "VcfFile [chromosomes=[MT192765.1], sampleCount=2, variantCount=9, phased=false, phasedAutodetect=false]", + "7ad648266e57d405b5b01aaea4613d1c", + [ + "versions.yml:md5,772f39343052d54d9bcb21d4892da203" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-17T08:41:54.793936976" + }, + "2 samples, ['tiddit', 'cnvnator'], true": { + "content": [ + "VcfFile [chromosomes=[MT192765.1], sampleCount=2, variantCount=9, phased=false, phasedAutodetect=false]", + "74ed58e115db54f30036bfd68a7dc432", + [ + "versions.yml:md5,772f39343052d54d9bcb21d4892da203" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-17T08:31:29.320496992" + }, + "1 sample, ['tiddit'], []": { + "content": [ + "VcfFile [chromosomes=[MT192765.1], sampleCount=1, variantCount=9, phased=false, phasedAutodetect=false]", + "9dd588cd870672b78192f48ad440b5d", + [ + "versions.yml:md5,772f39343052d54d9bcb21d4892da203" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-17T08:30:30.590239659" + }, + "1 sample, [], true": { + "content": [ + "VcfFile [chromosomes=[MT192765.1], sampleCount=1, variantCount=9, phased=false, phasedAutodetect=false]", + "60fb4cab2aa891bebef8ffdbd0e41bc3", + [ + "versions.yml:md5,772f39343052d54d9bcb21d4892da203" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-17T08:30:22.670145479" + }, + "1 sample, ['tiddit'], true": { + "content": [ + "VcfFile [chromosomes=[MT192765.1], sampleCount=1, variantCount=9, phased=false, phasedAutodetect=false]", + "9dd588cd870672b78192f48ad440b5d", + [ + "versions.yml:md5,772f39343052d54d9bcb21d4892da203" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-17T08:30:40.958649472" + }, + "2 samples, [], true": { + "content": [ + "VcfFile [chromosomes=[MT192765.1], sampleCount=2, variantCount=9, phased=false, phasedAutodetect=false]", + "de0a3b56cdee89e4c9cd4fbb4ad3391d", + [ + "versions.yml:md5,772f39343052d54d9bcb21d4892da203" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-17T08:31:01.933044815" + }, + "2 samples, ['tiddit', 'cnvnator'], [] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "merged.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + "versions.yml:md5,772f39343052d54d9bcb21d4892da203" + ], + "vcf": [ + [ + { + "id": "test" + }, + "merged.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,772f39343052d54d9bcb21d4892da203" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-17T08:31:59.830493054" + }, + "2 samples, [], [] - stub": { "content": [ { "0": [ @@ -7,7 +185,7 @@ { "id": "test" }, - "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + "merged.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], "1": [ @@ -18,7 +196,7 @@ { "id": "test" }, - "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + "merged.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], "versions": [ @@ -30,9 +208,9 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-10-16T09:00:41.058996433" + "timestamp": "2024-10-17T08:41:07.289491939" }, - "test_svdb_merge_noprio - stub": { + "2 samples, [], true - stub": { "content": [ { "0": [ @@ -40,7 +218,7 @@ { "id": "test" }, - "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + "merged.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], "1": [ @@ -51,7 +229,7 @@ { "id": "test" }, - "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + "merged.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], "versions": [ @@ -63,6 +241,6 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-10-16T09:00:49.58223306" + "timestamp": "2024-10-17T08:31:48.40721064" } } \ No newline at end of file diff --git a/modules/nf-core/svdb/merge/tests/nextflow.config b/modules/nf-core/svdb/merge/tests/nextflow.config new file mode 100644 index 00000000..25f38031 --- /dev/null +++ b/modules/nf-core/svdb/merge/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: 'SVDB_MERGE' { + ext.prefix = "merged" + } +} diff --git a/subworkflows/local/call_svs/main.nf b/subworkflows/local/call_svs/main.nf index d176e841..1ea76469 100644 --- a/subworkflows/local/call_svs/main.nf +++ b/subworkflows/local/call_svs/main.nf @@ -83,7 +83,11 @@ workflow CALL_SVS { .set { ch_svdb_merge_in } // Merge the files with new sample names - SVDB_MERGE ( ch_svdb_merge_in, []) + SVDB_MERGE ( + ch_svdb_merge_in, + [], + true + ) ch_versions = ch_versions.mix(SVDB_MERGE.out.versions) TABIX_SVDB_MERGE ( SVDB_MERGE.out.vcf )