From 386302dddeccc78db86f21667d529e074777d645 Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Mon, 14 Oct 2024 16:51:50 +0200 Subject: [PATCH 1/4] add bait regions to deepvariant --- subworkflows/local/call_snv.nf | 2 + subworkflows/local/prepare_references.nf | 76 +++++++++---------- .../variant_calling/call_snv_deepvariant.nf | 15 +++- workflows/raredisease.nf | 1 + 4 files changed, 52 insertions(+), 42 deletions(-) diff --git a/subworkflows/local/call_snv.nf b/subworkflows/local/call_snv.nf index 8060e0cc..ed6bde4b 100644 --- a/subworkflows/local/call_snv.nf +++ b/subworkflows/local/call_snv.nf @@ -30,6 +30,7 @@ workflow CALL_SNV { ch_dbsnp // channel: [optional] [ val(meta), path(vcf) ] ch_dbsnp_tbi // channel: [optional] [ val(meta), path(tbi) ] ch_call_interval // channel: [mandatory] [ path(intervals) ] + ch_bait_intervals // channel: [mandatory] [ path(intervals) ] ch_ml_model // channel: [mandatory] [ path(model) ] ch_par_bed // channel: [optional] [ val(meta), path(bed) ] ch_case_info // channel: [mandatory] [ val(case_info) ] @@ -54,6 +55,7 @@ workflow CALL_SNV { ch_genome_bam_bai, ch_genome_fasta, ch_genome_fai, + ch_bait_intervals, ch_par_bed, ch_case_info, ch_foundin_header, diff --git a/subworkflows/local/prepare_references.nf b/subworkflows/local/prepare_references.nf index 4af09860..99b291c9 100644 --- a/subworkflows/local/prepare_references.nf +++ b/subworkflows/local/prepare_references.nf @@ -33,15 +33,15 @@ include { UNTAR as UNTAR_VEP_CACHE } from '../../modul workflow PREPARE_REFERENCES { take: - ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] - ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] - ch_genome_dictionary // channel: [mandatory] [ val(meta), path(fai) ] - ch_mt_fasta // channel: [mandatory for dedicated mt analysis] [ val(meta), path(fasta) ] - ch_gnomad_af_tab // channel: [optional; used in for snv annotation] [ val(meta), path(tab) ] - ch_known_dbsnp // channel: [optional; used only by sentieon] [ val(meta), path(vcf) ] - ch_target_bed // channel: [mandatory for WES] [ path(bed) ] - ch_vcfanno_extra_unprocessed // channel: [mandatory] [ val(meta), path(vcf) ] - ch_vep_cache // channel: [mandatory for annotation] [ path(cache) ] + ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] + ch_genome_dictionary // channel: [mandatory] [ val(meta), path(fai) ] + ch_mt_fasta // channel: [mandatory for dedicated mt analysis] [ val(meta), path(fasta) ] + ch_gnomad_af_tab // channel: [optional; used in for snv annotation] [ val(meta), path(tab) ] + ch_known_dbsnp // channel: [optional; used only by sentieon] [ val(meta), path(vcf) ] + ch_target_bed // channel: [mandatory for WES] [ path(bed) ] + ch_vcfanno_extra_unprocessed // channel: [mandatory] [ val(meta), path(vcf) ] + ch_vep_cache // channel: [mandatory for annotation] [ path(cache) ] main: ch_versions = Channel.empty() @@ -170,34 +170,34 @@ workflow PREPARE_REFERENCES { ch_versions = ch_versions.mix(RTGTOOLS_FORMAT.out.versions) emit: - genome_bwa_index = Channel.empty().mix(ch_bwa, ch_sentieonbwa).collect() // channel: [ val(meta), path(index) ] - genome_bwamem2_index = BWAMEM2_INDEX_GENOME.out.index.collect() // channel: [ val(meta), path(index) ] - genome_bwameme_index = BWAMEME_INDEX_GENOME.out.index.collect() // channel: [ val(meta), path(index) ] - genome_chrom_sizes = GET_CHROM_SIZES.out.sizes.collect() // channel: [ path(sizes) ] - genome_fai = ch_fai // channel: [ val(meta), path(fai) ] - genome_dict = ch_dict // channel: [ val(meta), path(dict) ] - sdf = RTGTOOLS_FORMAT.out.sdf // channel: [ val (meta), path(intervals) ] - mt_intervals = ch_shiftfasta_mtintervals.intervals.collect() // channel: [ path(intervals) ] - mt_bwa_index = ch_bwa_mt // channel: [ val(meta), path(index) ] - mt_bwamem2_index = BWAMEM2_INDEX_MT.out.index.collect() // channel: [ val(meta), path(index) ] - mt_dict = GATK_SD_MT.out.dict.collect() // channel: [ val(meta), path(dict) ] - mt_fasta = ch_mt_fasta_in.collect() // channel: [ val(meta), path(fasta) ] - mt_fai = SAMTOOLS_FAIDX_MT.out.fai.collect() // channel: [ val(meta), path(fai) ] - mtshift_intervals = ch_shiftfasta_mtintervals.shift_intervals.collect() // channel: [ path(intervals) ] - mtshift_backchain = GATK_SHIFTFASTA.out.shift_back_chain.collect() // channel: [ val(meta), path(backchain) ] - mtshift_dict = GATK_SHIFTFASTA.out.dict // channel: [ val(meta), path(dict) ] - mtshift_fai = GATK_SHIFTFASTA.out.shift_fai.collect() // channel: [ val(meta), path(fai) ] - mtshift_fasta = GATK_SHIFTFASTA.out.shift_fa.collect() // channel: [ val(meta), path(fasta) ] - mtshift_bwa_index = ch_bwa_mtshift // channel: [ val(meta), path(index) ] - mtshift_bwamem2_index = BWAMEM2_INDEX_MT_SHIFT.out.index.collect() // channel: [ val(meta), path(index) ] - - gnomad_af_idx = TABIX_GNOMAD_AF.out.tbi.collect() // channel: [ val(meta), path(fasta) ] - known_dbsnp_tbi = TABIX_DBSNP.out.tbi.collect() // channel: [ val(meta), path(fasta) ] - target_bed = Channel.empty().mix(ch_tbi, ch_bgzip_tbi).collect() // channel: [ val(meta), path(bed), path(tbi) ] - vcfanno_extra = ch_vcfanno_extra.ifEmpty([[]]) // channel: [ [path(vcf), path(tbi)] ] - bait_intervals = CAT_CAT_BAIT.out.file_out.map{ meta, inter -> inter}.collect() // channel: [ path(intervals) ] - target_intervals = GATK_BILT.out.interval_list.map{ meta, inter -> inter}.collect() // channel: [ path(interval_list) ] - vep_resources = UNTAR_VEP_CACHE.out.untar.map{meta, files -> [files]}.collect() // channel: [ path(cache) ] - versions = ch_versions // channel: [ path(versions.yml) ] + genome_bwa_index = Channel.empty().mix(ch_bwa, ch_sentieonbwa).collect() // channel: [ val(meta), path(index) ] + genome_bwamem2_index = BWAMEM2_INDEX_GENOME.out.index.collect() // channel: [ val(meta), path(index) ] + genome_bwameme_index = BWAMEME_INDEX_GENOME.out.index.collect() // channel: [ val(meta), path(index) ] + genome_chrom_sizes = GET_CHROM_SIZES.out.sizes.collect() // channel: [ path(sizes) ] + genome_fai = ch_fai // channel: [ val(meta), path(fai) ] + genome_dict = ch_dict // channel: [ val(meta), path(dict) ] + sdf = RTGTOOLS_FORMAT.out.sdf // channel: [ val (meta), path(intervals) ] + mt_intervals = ch_shiftfasta_mtintervals.intervals.collect() // channel: [ path(intervals) ] + mt_bwa_index = ch_bwa_mt // channel: [ val(meta), path(index) ] + mt_bwamem2_index = BWAMEM2_INDEX_MT.out.index.collect() // channel: [ val(meta), path(index) ] + mt_dict = GATK_SD_MT.out.dict.collect() // channel: [ val(meta), path(dict) ] + mt_fasta = ch_mt_fasta_in.collect() // channel: [ val(meta), path(fasta) ] + mt_fai = SAMTOOLS_FAIDX_MT.out.fai.collect() // channel: [ val(meta), path(fai) ] + mtshift_intervals = ch_shiftfasta_mtintervals.shift_intervals.collect() // channel: [ path(intervals) ] + mtshift_backchain = GATK_SHIFTFASTA.out.shift_back_chain.collect() // channel: [ val(meta), path(backchain) ] + mtshift_dict = GATK_SHIFTFASTA.out.dict // channel: [ val(meta), path(dict) ] + mtshift_fai = GATK_SHIFTFASTA.out.shift_fai.collect() // channel: [ val(meta), path(fai) ] + mtshift_fasta = GATK_SHIFTFASTA.out.shift_fa.collect() // channel: [ val(meta), path(fasta) ] + mtshift_bwa_index = ch_bwa_mtshift // channel: [ val(meta), path(index) ] + mtshift_bwamem2_index = BWAMEM2_INDEX_MT_SHIFT.out.index.collect() // channel: [ val(meta), path(index) ] + + gnomad_af_idx = TABIX_GNOMAD_AF.out.tbi.collect() // channel: [ val(meta), path(fasta) ] + known_dbsnp_tbi = TABIX_DBSNP.out.tbi.collect() // channel: [ val(meta), path(fasta) ] + target_bed = Channel.empty().mix(ch_tbi, ch_bgzip_tbi).collect() // channel: [ val(meta), path(bed), path(tbi) ] + vcfanno_extra = ch_vcfanno_extra.ifEmpty([[]]) // channel: [ [path(vcf), path(tbi)] ] + bait_intervals = CAT_CAT_BAIT.out.file_out.map{ meta, inter -> inter}.collect().ifEmpty([[]]) // channel: [ path(intervals) ] + target_intervals = GATK_BILT.out.interval_list.map{ meta, inter -> inter}.collect() // channel: [ path(interval_list) ] + vep_resources = UNTAR_VEP_CACHE.out.untar.map{meta, files -> [files]}.collect() // channel: [ path(cache) ] + versions = ch_versions // channel: [ path(versions.yml) ] } diff --git a/subworkflows/local/variant_calling/call_snv_deepvariant.nf b/subworkflows/local/variant_calling/call_snv_deepvariant.nf index c535a1c4..5db7aafd 100644 --- a/subworkflows/local/variant_calling/call_snv_deepvariant.nf +++ b/subworkflows/local/variant_calling/call_snv_deepvariant.nf @@ -16,6 +16,7 @@ workflow CALL_SNV_DEEPVARIANT { ch_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] + ch_bait_intervals // channel: [mandatory] [ path(intervals) ] ch_par_bed // channel: [optional] [ val(meta), path(bed) ] ch_case_info // channel: [mandatory] [ val(case_info) ] ch_foundin_header // channel: [mandatory] [ path(header) ] @@ -24,10 +25,16 @@ workflow CALL_SNV_DEEPVARIANT { main: ch_versions = Channel.empty() - ch_bam_bai.map { meta, bam, bai -> - return [meta, bam, bai, []] - } - .set { ch_deepvar_in } + if (params.analysis_type.equals("wes")) { + ch_bam_bai + .combine (ch_bait_intervals) + .set { ch_deepvar_in } + } else if (params.analysis_type.equals("wgs")) { + ch_bam_bai + .map { meta, bam, bai -> + return [meta, bam, bai, []] } + .set { ch_deepvar_in } + } DEEPVARIANT ( ch_deepvar_in, ch_genome_fasta, ch_genome_fai, [[],[]], ch_par_bed ) DEEPVARIANT.out.gvcf diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index dcef6a49..8fa10bd6 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -517,6 +517,7 @@ workflow RAREDISEASE { ch_dbsnp, ch_dbsnp_tbi, ch_call_interval, + ch_bait_intervals, ch_ml_model, ch_par_bed, ch_case_info, From 4b1b05fb51f01ec30a1f3feb430b10db62ee3a5c Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Mon, 14 Oct 2024 16:54:26 +0200 Subject: [PATCH 2/4] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7529bb6e..7fae5456 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Fixed` +- Restrict deepvariant analysis of WES samples to bait regions [#632](https://github.com/nf-core/raredisease/pull/632) - bcftools annotate declaration in annotate CADD subworkflow [#624](https://github.com/nf-core/raredisease/pull/624) - Rhocallviz subworkflow will only be invocated once per sample [#621](https://github.com/nf-core/raredisease/pull/621) - Allow for VEP version 112 to be used and set it to default [#617](https://github.com/nf-core/raredisease/pull/617) From 7e2d68accd5731f19bc6bc8c60ab8b968b425aba Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Tue, 15 Oct 2024 08:40:11 +0200 Subject: [PATCH 3/4] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7fae5456..7f082a8c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,7 +18,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Fixed` -- Restrict deepvariant analysis of WES samples to bait regions [#632](https://github.com/nf-core/raredisease/pull/632) +- Restrict deepvariant analysis of WES samples to bait regions [#633](https://github.com/nf-core/raredisease/pull/633) - bcftools annotate declaration in annotate CADD subworkflow [#624](https://github.com/nf-core/raredisease/pull/624) - Rhocallviz subworkflow will only be invocated once per sample [#621](https://github.com/nf-core/raredisease/pull/621) - Allow for VEP version 112 to be used and set it to default [#617](https://github.com/nf-core/raredisease/pull/617) From d4f60d99fce55afa3dec190f5706e1ff57126caa Mon Sep 17 00:00:00 2001 From: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> Date: Wed, 16 Oct 2024 19:29:25 +0200 Subject: [PATCH 4/4] update schema --- nextflow_schema.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index bd456728..74d58d05 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -698,7 +698,7 @@ "type": "string", "format": "path", "fa_icon": "fas fa-align-center", - "description": "Interval in the reference that will be used in the software" + "description": "Interval in the reference that will be used in the software. Used only by sentieon." }, "cnvnator_binsize": { "type": "integer",