Skip to content

Commit

Permalink
Merge pull request #633 from nf-core/deepvar-for-regions
Browse files Browse the repository at this point in the history
add bait regions to deepvariant for WES
  • Loading branch information
ramprasadn authored Oct 16, 2024
2 parents f94e6e2 + 7e0cad7 commit cdf7263
Show file tree
Hide file tree
Showing 6 changed files with 54 additions and 43 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### `Fixed`

- Restrict deepvariant analysis of WES samples to bait regions [#633](https://github.com/nf-core/raredisease/pull/633)
- bcftools annotate declaration in annotate CADD subworkflow [#624](https://github.com/nf-core/raredisease/pull/624)
- Rhocallviz subworkflow will only be invocated once per sample [#621](https://github.com/nf-core/raredisease/pull/621)
- Allow for VEP version 112 to be used and set it to default [#617](https://github.com/nf-core/raredisease/pull/617)
Expand Down
2 changes: 1 addition & 1 deletion nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -698,7 +698,7 @@
"type": "string",
"format": "path",
"fa_icon": "fas fa-align-center",
"description": "Interval in the reference that will be used in the software"
"description": "Interval in the reference that will be used in the software. Used only by sentieon."
},
"cnvnator_binsize": {
"type": "integer",
Expand Down
2 changes: 2 additions & 0 deletions subworkflows/local/call_snv.nf
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ workflow CALL_SNV {
ch_dbsnp // channel: [optional] [ val(meta), path(vcf) ]
ch_dbsnp_tbi // channel: [optional] [ val(meta), path(tbi) ]
ch_call_interval // channel: [mandatory] [ path(intervals) ]
ch_bait_intervals // channel: [mandatory] [ path(intervals) ]
ch_ml_model // channel: [mandatory] [ path(model) ]
ch_par_bed // channel: [optional] [ val(meta), path(bed) ]
ch_case_info // channel: [mandatory] [ val(case_info) ]
Expand All @@ -54,6 +55,7 @@ workflow CALL_SNV {
ch_genome_bam_bai,
ch_genome_fasta,
ch_genome_fai,
ch_bait_intervals,
ch_par_bed,
ch_case_info,
ch_foundin_header,
Expand Down
76 changes: 38 additions & 38 deletions subworkflows/local/prepare_references.nf
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,15 @@ include { UNTAR as UNTAR_VEP_CACHE } from '../../modul

workflow PREPARE_REFERENCES {
take:
ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ]
ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ]
ch_genome_dictionary // channel: [mandatory] [ val(meta), path(fai) ]
ch_mt_fasta // channel: [mandatory for dedicated mt analysis] [ val(meta), path(fasta) ]
ch_gnomad_af_tab // channel: [optional; used in for snv annotation] [ val(meta), path(tab) ]
ch_known_dbsnp // channel: [optional; used only by sentieon] [ val(meta), path(vcf) ]
ch_target_bed // channel: [mandatory for WES] [ path(bed) ]
ch_vcfanno_extra_unprocessed // channel: [mandatory] [ val(meta), path(vcf) ]
ch_vep_cache // channel: [mandatory for annotation] [ path(cache) ]
ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ]
ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ]
ch_genome_dictionary // channel: [mandatory] [ val(meta), path(fai) ]
ch_mt_fasta // channel: [mandatory for dedicated mt analysis] [ val(meta), path(fasta) ]
ch_gnomad_af_tab // channel: [optional; used in for snv annotation] [ val(meta), path(tab) ]
ch_known_dbsnp // channel: [optional; used only by sentieon] [ val(meta), path(vcf) ]
ch_target_bed // channel: [mandatory for WES] [ path(bed) ]
ch_vcfanno_extra_unprocessed // channel: [mandatory] [ val(meta), path(vcf) ]
ch_vep_cache // channel: [mandatory for annotation] [ path(cache) ]

main:
ch_versions = Channel.empty()
Expand Down Expand Up @@ -170,34 +170,34 @@ workflow PREPARE_REFERENCES {
ch_versions = ch_versions.mix(RTGTOOLS_FORMAT.out.versions)

emit:
genome_bwa_index = Channel.empty().mix(ch_bwa, ch_sentieonbwa).collect() // channel: [ val(meta), path(index) ]
genome_bwamem2_index = BWAMEM2_INDEX_GENOME.out.index.collect() // channel: [ val(meta), path(index) ]
genome_bwameme_index = BWAMEME_INDEX_GENOME.out.index.collect() // channel: [ val(meta), path(index) ]
genome_chrom_sizes = GET_CHROM_SIZES.out.sizes.collect() // channel: [ path(sizes) ]
genome_fai = ch_fai // channel: [ val(meta), path(fai) ]
genome_dict = ch_dict // channel: [ val(meta), path(dict) ]
sdf = RTGTOOLS_FORMAT.out.sdf // channel: [ val (meta), path(intervals) ]
mt_intervals = ch_shiftfasta_mtintervals.intervals.collect() // channel: [ path(intervals) ]
mt_bwa_index = ch_bwa_mt // channel: [ val(meta), path(index) ]
mt_bwamem2_index = BWAMEM2_INDEX_MT.out.index.collect() // channel: [ val(meta), path(index) ]
mt_dict = GATK_SD_MT.out.dict.collect() // channel: [ val(meta), path(dict) ]
mt_fasta = ch_mt_fasta_in.collect() // channel: [ val(meta), path(fasta) ]
mt_fai = SAMTOOLS_FAIDX_MT.out.fai.collect() // channel: [ val(meta), path(fai) ]
mtshift_intervals = ch_shiftfasta_mtintervals.shift_intervals.collect() // channel: [ path(intervals) ]
mtshift_backchain = GATK_SHIFTFASTA.out.shift_back_chain.collect() // channel: [ val(meta), path(backchain) ]
mtshift_dict = GATK_SHIFTFASTA.out.dict // channel: [ val(meta), path(dict) ]
mtshift_fai = GATK_SHIFTFASTA.out.shift_fai.collect() // channel: [ val(meta), path(fai) ]
mtshift_fasta = GATK_SHIFTFASTA.out.shift_fa.collect() // channel: [ val(meta), path(fasta) ]
mtshift_bwa_index = ch_bwa_mtshift // channel: [ val(meta), path(index) ]
mtshift_bwamem2_index = BWAMEM2_INDEX_MT_SHIFT.out.index.collect() // channel: [ val(meta), path(index) ]

gnomad_af_idx = TABIX_GNOMAD_AF.out.tbi.collect() // channel: [ val(meta), path(fasta) ]
known_dbsnp_tbi = TABIX_DBSNP.out.tbi.collect() // channel: [ val(meta), path(fasta) ]
target_bed = Channel.empty().mix(ch_tbi, ch_bgzip_tbi).collect() // channel: [ val(meta), path(bed), path(tbi) ]
vcfanno_extra = ch_vcfanno_extra.ifEmpty([[]]) // channel: [ [path(vcf), path(tbi)] ]
bait_intervals = CAT_CAT_BAIT.out.file_out.map{ meta, inter -> inter}.collect() // channel: [ path(intervals) ]
target_intervals = GATK_BILT.out.interval_list.map{ meta, inter -> inter}.collect() // channel: [ path(interval_list) ]
vep_resources = UNTAR_VEP_CACHE.out.untar.map{meta, files -> [files]}.collect() // channel: [ path(cache) ]
versions = ch_versions // channel: [ path(versions.yml) ]
genome_bwa_index = Channel.empty().mix(ch_bwa, ch_sentieonbwa).collect() // channel: [ val(meta), path(index) ]
genome_bwamem2_index = BWAMEM2_INDEX_GENOME.out.index.collect() // channel: [ val(meta), path(index) ]
genome_bwameme_index = BWAMEME_INDEX_GENOME.out.index.collect() // channel: [ val(meta), path(index) ]
genome_chrom_sizes = GET_CHROM_SIZES.out.sizes.collect() // channel: [ path(sizes) ]
genome_fai = ch_fai // channel: [ val(meta), path(fai) ]
genome_dict = ch_dict // channel: [ val(meta), path(dict) ]
sdf = RTGTOOLS_FORMAT.out.sdf // channel: [ val (meta), path(intervals) ]
mt_intervals = ch_shiftfasta_mtintervals.intervals.collect() // channel: [ path(intervals) ]
mt_bwa_index = ch_bwa_mt // channel: [ val(meta), path(index) ]
mt_bwamem2_index = BWAMEM2_INDEX_MT.out.index.collect() // channel: [ val(meta), path(index) ]
mt_dict = GATK_SD_MT.out.dict.collect() // channel: [ val(meta), path(dict) ]
mt_fasta = ch_mt_fasta_in.collect() // channel: [ val(meta), path(fasta) ]
mt_fai = SAMTOOLS_FAIDX_MT.out.fai.collect() // channel: [ val(meta), path(fai) ]
mtshift_intervals = ch_shiftfasta_mtintervals.shift_intervals.collect() // channel: [ path(intervals) ]
mtshift_backchain = GATK_SHIFTFASTA.out.shift_back_chain.collect() // channel: [ val(meta), path(backchain) ]
mtshift_dict = GATK_SHIFTFASTA.out.dict // channel: [ val(meta), path(dict) ]
mtshift_fai = GATK_SHIFTFASTA.out.shift_fai.collect() // channel: [ val(meta), path(fai) ]
mtshift_fasta = GATK_SHIFTFASTA.out.shift_fa.collect() // channel: [ val(meta), path(fasta) ]
mtshift_bwa_index = ch_bwa_mtshift // channel: [ val(meta), path(index) ]
mtshift_bwamem2_index = BWAMEM2_INDEX_MT_SHIFT.out.index.collect() // channel: [ val(meta), path(index) ]

gnomad_af_idx = TABIX_GNOMAD_AF.out.tbi.collect() // channel: [ val(meta), path(fasta) ]
known_dbsnp_tbi = TABIX_DBSNP.out.tbi.collect() // channel: [ val(meta), path(fasta) ]
target_bed = Channel.empty().mix(ch_tbi, ch_bgzip_tbi).collect() // channel: [ val(meta), path(bed), path(tbi) ]
vcfanno_extra = ch_vcfanno_extra.ifEmpty([[]]) // channel: [ [path(vcf), path(tbi)] ]
bait_intervals = CAT_CAT_BAIT.out.file_out.map{ meta, inter -> inter}.collect().ifEmpty([[]]) // channel: [ path(intervals) ]
target_intervals = GATK_BILT.out.interval_list.map{ meta, inter -> inter}.collect() // channel: [ path(interval_list) ]
vep_resources = UNTAR_VEP_CACHE.out.untar.map{meta, files -> [files]}.collect() // channel: [ path(cache) ]
versions = ch_versions // channel: [ path(versions.yml) ]

}
15 changes: 11 additions & 4 deletions subworkflows/local/variant_calling/call_snv_deepvariant.nf
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ workflow CALL_SNV_DEEPVARIANT {
ch_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ]
ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ]
ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ]
ch_bait_intervals // channel: [mandatory] [ path(intervals) ]
ch_par_bed // channel: [optional] [ val(meta), path(bed) ]
ch_case_info // channel: [mandatory] [ val(case_info) ]
ch_foundin_header // channel: [mandatory] [ path(header) ]
Expand All @@ -24,10 +25,16 @@ workflow CALL_SNV_DEEPVARIANT {
main:
ch_versions = Channel.empty()

ch_bam_bai.map { meta, bam, bai ->
return [meta, bam, bai, []]
}
.set { ch_deepvar_in }
if (params.analysis_type.equals("wes")) {
ch_bam_bai
.combine (ch_bait_intervals)
.set { ch_deepvar_in }
} else if (params.analysis_type.equals("wgs")) {
ch_bam_bai
.map { meta, bam, bai ->
return [meta, bam, bai, []] }
.set { ch_deepvar_in }
}

DEEPVARIANT ( ch_deepvar_in, ch_genome_fasta, ch_genome_fai, [[],[]], ch_par_bed )
DEEPVARIANT.out.gvcf
Expand Down
1 change: 1 addition & 0 deletions workflows/raredisease.nf
Original file line number Diff line number Diff line change
Expand Up @@ -517,6 +517,7 @@ workflow RAREDISEASE {
ch_dbsnp,
ch_dbsnp_tbi,
ch_call_interval,
ch_bait_intervals,
ch_ml_model,
ch_par_bed,
ch_case_info,
Expand Down

0 comments on commit cdf7263

Please sign in to comment.