diff --git a/CHANGELOG.md b/CHANGELOG.md index 82508df..45b011c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # CHANGELOG +### 3.11.1 +* Add a process to get contamination values from verifybamid2 software. +* Update configs/nextflow.hopper.config with a specific verifybamid2 container. +* Update configs/nextflow.hopper.config with specific SVDPrefix files for panel and wgs. + ### 3.10.4 * Added --format vcf to `vep_sv` to fix for cases where vcf file carries no variants. diff --git a/configs/nextflow.hopper.config b/configs/nextflow.hopper.config index 1f8a14c..b5bacb3 100644 --- a/configs/nextflow.hopper.config +++ b/configs/nextflow.hopper.config @@ -87,10 +87,11 @@ params { queue = 'normal' // CONTAINERS // - container_vep = "/fs1/resources/containers/ensembl-vep_release_111.0.sif" - container_fastp = '/fs1/resources/containers/fastp_0.23.4.sif' - container_sentieon = '/fs1/resources/containers/sentieon_202308.sif' - container_d4tools = "/fs1/resources/containers/d4tools_0.3.8.sif" + container_vep = "/fs1/resources/containers/ensembl-vep_release_111.0.sif" + container_fastp = '/fs1/resources/containers/fastp_0.23.4.sif' + container_sentieon = '/fs1/resources/containers/sentieon_202308.sif' + container_d4tools = "/fs1/resources/containers/d4tools_0.3.8.sif" + container_verifybamid2 = "/fs1/resources/containers/verifybamid2_2.0.1.sif" run_chanjo2 = true reanalyze = false @@ -147,6 +148,7 @@ profiles { params.gatkreffolders = "${params.refpath}/gatk_cnv/cnvref/masked_hg38/gatk_ref" params.gens_accessdir = "/access/wgs/plot_data" params.accessdir = params.loaddir + "/wgs/" + params.verifybamid2_svdprefix = "/fs1/resources/ref/hg38/VerifyBamID/resource/no_chr/1000g.phase3.100k.b38.vcf.gz.dat" } oncov1 { @@ -193,6 +195,7 @@ profiles { params.gatkreffolders = "${params.refpath}/annotation_dbs/onco-solid/gatk_ref/gatk_ref_oncov1-0" params.gens_accessdir = "/access/twist-onco/plot_data" params.accessdir = params.loaddir + "/twist-onco/" + params.verifybamid2_svdprefix = "/fs1/resources/ref/hg38/VerifyBamID/resource/exome/no_chr/1000g.phase3.10k.b38.exome.vcf.gz.dat" } onco { @@ -237,6 +240,7 @@ profiles { params.gatkreffolders = "${params.refpath}/annotation_dbs/onco-solid/gatk_ref/gatk_ref_oncov2-0" params.gens_accessdir = "/access/twist-onco/plot_data" params.accessdir = "/access/twist-onco/" + params.verifybamid2_svdprefix = "/fs1/resources/ref/hg38/VerifyBamID/resource/exome/no_chr/1000g.phase3.10k.b38.exome.vcf.gz.dat" } exome { @@ -276,6 +280,7 @@ exome { params.gatkreffolders = "/fs1/viktor/gatk_ref/gatk_ref" params.gens_accessdir = "/access/wgs/plot_data" params.accessdir = "/access/exome/" + params.verifybamid2_svdprefix = "/fs1/resources/ref/hg38/VerifyBamID/resource/exome/no_chr/1000g.phase3.10k.b38.exome.vcf.gz.dat" } myeloid { @@ -316,6 +321,7 @@ myeloid { params.gatkreffolders = "${params.refpath}/gatk_cnv/onco/cnvref/gatk_ref" params.gens_accessdir = "/access/myeloid_const/plot_data" params.accessdir = "/access/myeloid_const/" + params.verifybamid2_svdprefix = "/fs1/resources/ref/hg38/VerifyBamID/resource/exome/no_chr/1000g.phase3.10k.b38.exome.vcf.gz.dat" } modycf { @@ -360,6 +366,7 @@ modycf { params.gatkreffolders = "${params.refpath}/annotation_dbs/modycf/gatk_ref/masked/gatk_ref_modycf_masked" params.gens_accessdir = "/access/wgs/plot_data" params.accessdir = "/access/modycf/" + params.verifybamid2_svdprefix = "/fs1/resources/ref/hg38/VerifyBamID/resource/exome/no_chr/1000g.phase3.10k.b38.exome.vcf.gz.dat" } } diff --git a/docs/list_of_all_used_software.md b/docs/list_of_all_used_software.md index 172e493..dced8d3 100644 --- a/docs/list_of_all_used_software.md +++ b/docs/list_of_all_used_software.md @@ -36,4 +36,3 @@ | vcflib | 1.0.9 | Open-Source | https://github.com/vcflib/vcflib | https://github.com/vcflib/vcflib/issues | Alexander Koc | Parse and manipulate VCF files with python and zig bindings | | vep | 111.0 | Open-Source | https://www.ensembl.org/info/docs/tools/vep/index.html | https://github.com/Ensembl/ensembl-vep/issues | Ram Sai Nanduri | Predict functional effects of genomic variants | | verifybamid2 | 2.0.1 | Open-Source | https://github.com/Griffan/VerifyBamID | https://github.com/Griffan/VerifyBamID/issues | Paul Piccinelli | Detecting and estimating inter-sample DNA contamination | - diff --git a/main.nf b/main.nf index 31f0891..fd510c2 100644 --- a/main.nf +++ b/main.nf @@ -107,7 +107,8 @@ bam_choice.into{ bam_qc_choice; dedup_dummy_choice; bam_bqsr_choice; - bam_gatk_choice } + bam_gatk_choice; + verifybamid2_bam_choice } // bqsr expects sample_id to come first, instead of group_id bam_bqsr_choice.map { @@ -399,7 +400,7 @@ process markdup { set id, group, file(bam), file(bai) from bam_markdup.mix(merged_bam_dedup) output: - set group, id, file("${id}_dedup.bam"), file("${id}_dedup.bam.bai") into complete_bam, chanjo_bam, d4_bam, expansionhunter_bam, yaml_bam, cov_bam, bam_manta, bam_nator, bam_tiddit, bam_manta_panel, bam_delly_panel, bam_cnvkit_panel, bam_freebayes, bam_mito, smncnc_bam, bam_gatk, depth_onco + set group, id, file("${id}_dedup.bam"), file("${id}_dedup.bam.bai") into complete_bam, chanjo_bam, d4_bam, verifybamid2_bam, expansionhunter_bam, yaml_bam, cov_bam, bam_manta, bam_nator, bam_tiddit, bam_manta_panel, bam_delly_panel, bam_cnvkit_panel, bam_freebayes, bam_mito, smncnc_bam, bam_gatk, depth_onco set id, group, file("${id}_dedup.bam"), file("${id}_dedup.bam.bai") into qc_bam, bam_melt, bam_bqsr set val(id), file("dedup_metrics.txt") into dedupmet_sentieonqc set group, file("${group}_bam.INFO") into bam_INFO @@ -646,6 +647,67 @@ def d4_coverage_version(task) { """ } +process verifybamid2 { + cpus 16 + memory '10 GB' + // publishDir "${OUTDIR}/contamination", mode: 'copy', overwrite: 'true', pattern: '*.selfSM' + tag "$id" + container = "${params.container_verifybamid2}" + + input: + set group, id, file(bam), file(bai) from verifybamid2_bam + + output: + file("${id}.result.selfSM") + file("${id}.result.Ancestry") + set group, file("*versions.yml") into ch_verifybamid2_versions + + script: + + if ( params.antype == "wgs") { + """ + verifybamid2 \ + --SVDPrefix ${params.verifybamid2_svdprefix} \ + --Reference ${genome_file} \ + --BamFile ${bam} + + mv result.selfSM ${id}.result.selfSM + mv result.Ancestry ${id}.result.Ancestry + ${verifybamid2_version(task)} + """ + } + else { + """ + verifybamid2 \ + --DisableSanityCheck \ + --SVDPrefix ${params.verifybamid2_svdprefix} \ + --Reference ${genome_file} \ + --BamFile ${bam} + + mv result.selfSM ${id}.result.selfSM + mv result.Ancestry ${id}.result.Ancestry + ${verifybamid2_version(task)} + """ + } + + + stub: + """ + touch "${id}.result.selfSM" + touch "${id}.result.Ancestry" + + ${verifybamid2_version(task)} + """ +} +def verifybamid2_version(task) { + """ + cat <<-END_VERSIONS > ${task.process}_versions.yml + ${task.process}: + VerifyBamID2: \$( echo \$( verifybamid2 --help 2>&1 | grep Version ) | sed "s/^.*Version://" ) + END_VERSIONS + """ +} + // Calculate coverage for paneldepth process depth_onco { cpus 2 @@ -3931,6 +3993,7 @@ process combine_versions { ch_bqsr_versions.first(), ch_sentieon_qc_versions.first(), ch_d4_coverage_versions.first(), + ch_verifybamid2_versions.first(), ch_smn_copy_number_caller_versions.first(), ch_expansionhunter_versions.first(), ch_stranger_versions.first(),