diff --git a/configs/nextflow.hopper.config b/configs/nextflow.hopper.config index 1f8a14c..04fe350 100644 --- a/configs/nextflow.hopper.config +++ b/configs/nextflow.hopper.config @@ -88,9 +88,14 @@ params { // CONTAINERS // container_vep = "/fs1/resources/containers/ensembl-vep_release_111.0.sif" - container_fastp = '/fs1/resources/containers/fastp_0.23.4.sif' - container_sentieon = '/fs1/resources/containers/sentieon_202308.sif' + container_fastp = "/fs1/resources/containers/fastp_0.23.4.sif" + container_sentieon = "/fs1/resources/containers/sentieon_202308.sif" container_d4tools = "/fs1/resources/containers/d4tools_0.3.8.sif" + container_genmod = "/fs1/resources/containers/genmod_v3.9.sif" + container_bcftools = "/fs1/resources/containers/bcftools_1.20.sif" + + genmod_compound_trio_threshold = 12 + genmod_compound_trio_penalty = 8 run_chanjo2 = true reanalyze = false diff --git a/container/genmod.patch b/container/genmod.patch deleted file mode 100644 index 68a3eba..0000000 --- a/container/genmod.patch +++ /dev/null @@ -1,24 +0,0 @@ ---- compound_scorer.py 2019-09-12 12:39:42.842725942 +0200 -+++ /home/bjorn/miniconda2/lib/python2.7/site-packages/genmod/score_variants/compound_scorer.py 2019-09-12 08:29:05.600059286 +0200 -@@ -141,18 +141,18 @@ - - logger.debug("Checking compounds for family {0}".format( - compound_family_id)) -- -+ - #Loop through compounds to check if they are only low scored - for compound_id in compound_list: - compound_rank_score = rank_scores[compound_id] -- if compound_rank_score > 9: -+ if compound_rank_score > 12: - only_low = False - logger.debug("Setting only_low to {0}".format(only_low)) - - if (correct_score and only_low): - logger.debug("correcting rank score for {0}".format( - variant_id)) -- current_rank_score -= 6 -+ current_rank_score -= 8 - - for compound_id in compound_list: - logger.debug("Checking compound {0}".format(compound_id)) diff --git a/container/genmod/Singularity b/container/genmod/Singularity deleted file mode 100644 index b62471b..0000000 --- a/container/genmod/Singularity +++ /dev/null @@ -1,25 +0,0 @@ -Bootstrap:docker -From:nfcore/base:1.8 - -%labels - MAINTAINER Viktor henmyr - DESCRIPTION Singularity container for CMD WGS pipeline - VERSION 0.0.2 - -%environment - PATH=/opt/conda/envs/CMD-WGS/bin:$PATH - umask 0002 - -%files - genmod.patch / - environment.yml / - -%post - rm -rf /var/lib/apt/lists/* - apt -y clean - apt -y update - apt -y install libz-dev build-essential gettext cmake libxml2-dev libcurl4-openssl-dev libssl-dev make libbz2-dev cpanminus - - pip install genmod - patch /opt/conda/lib/python3.7/site-packages/genmod/score_variants/compound_scorer.py /genmod.patch - /opt/conda/bin/conda env create -f /environment.yml \ No newline at end of file diff --git a/container/genmod/environment.yml b/container/genmod/environment.yml deleted file mode 100644 index 231263b..0000000 --- a/container/genmod/environment.yml +++ /dev/null @@ -1,14 +0,0 @@ -name: CMD-WGS -channels: - - bioconda - - conda-forge - - defaults - -dependencies: - - bioconda::bcftools=1.9 - - bioconda::tabix=0.2.6 - - - - - diff --git a/container/genmod/genmod.patch b/container/genmod/genmod.patch deleted file mode 100644 index 68a3eba..0000000 --- a/container/genmod/genmod.patch +++ /dev/null @@ -1,24 +0,0 @@ ---- compound_scorer.py 2019-09-12 12:39:42.842725942 +0200 -+++ /home/bjorn/miniconda2/lib/python2.7/site-packages/genmod/score_variants/compound_scorer.py 2019-09-12 08:29:05.600059286 +0200 -@@ -141,18 +141,18 @@ - - logger.debug("Checking compounds for family {0}".format( - compound_family_id)) -- -+ - #Loop through compounds to check if they are only low scored - for compound_id in compound_list: - compound_rank_score = rank_scores[compound_id] -- if compound_rank_score > 9: -+ if compound_rank_score > 12: - only_low = False - logger.debug("Setting only_low to {0}".format(only_low)) - - if (correct_score and only_low): - logger.debug("correcting rank score for {0}".format( - variant_id)) -- current_rank_score -= 6 -+ current_rank_score -= 8 - - for compound_id in compound_list: - logger.debug("Checking compound {0}".format(compound_id)) diff --git a/docs/list_of_all_used_software.md b/docs/list_of_all_used_software.md index 172e493..b2c3a5e 100644 --- a/docs/list_of_all_used_software.md +++ b/docs/list_of_all_used_software.md @@ -13,7 +13,7 @@ | fastp | 0.23.4 | Open-Source | https://github.com/OpenGene/fastp | https://github.com/OpenGene/fastp/issues | Sailendra Pradhananga | FASTQ preprocessor | | freebayes | 1.3.2 | Open-Source | https://github.com/freebayes/freebayes | https://github.com/freebayes/freebayes/issues | Ram Sai Nanduri | Bayesian haplotype-based genetic polymorphism discovery and genotyping | | gatk | 4.1.9.0 | Open-Source | https://gatk.broadinstitute.org/hc/en-us | https://github.com/broadinstitute/gatk/issues | Viktor Henmyr | Wide set of tools for genomic analysis | -| genmod | 3.7.4 | Open-Source | https://github.com/Clinical-Genomics/genmod | https://github.com/Clinical-Genomics/genmod/issues | Viktor Henmyr | Annotate models of genetic inheritance patterns in variant files | +| genmod | 3.9.0 | Open-Source | https://github.com/Clinical-Genomics/genmod | https://github.com/Clinical-Genomics/genmod/issues | Viktor Henmyr | Annotate models of genetic inheritance patterns in variant files | | haplogrep | 2.2.8 | Open-Source | https://github.com/seppinho/haplogrep-cmd | https://github.com/seppinho/haplogrep-cmd/issues | Paul Piccinelli | mtDNA haplogroup classification. Supporting rCRS and RSRS. | | hmtnote | 0.7.2 | Open-Source | https://github.com/robertopreste/HmtNote | https://github.com/robertopreste/HmtNote/issues | Paul Piccinelli | Human mitochondrial variants annotation using HmtVar | | madeleine | 2.0 | Open-Source | https://github.com/piratical/Madeline_2.0_PDE | https://github.com/piratical/Madeline_2.0_PDE/issues | Jakob Willforss | Pedigree drawing program | diff --git a/main.nf b/main.nf index 31f0891..ede7855 100644 --- a/main.nf +++ b/main.nf @@ -2175,16 +2175,52 @@ def calculate_indel_cadd_version(task) { """ } +process bgzip_indel_cadd { + cpus 4 + tag "$group" + memory '1 GB' + time '5m' + container = "${params.container_bcftools}" + + input: + set group, file(cadd_scores) from indel_cadd + + output: + set group, file("cadd.gz"), file("cadd.gz.tbi") into indel_cadd_bgzip + + script: + """ + gunzip -c ${cadd_scores} > cadd + bgzip -@ ${task.cpus} cadd + tabix -p vcf cadd.gz + """ + + stub: + """ + touch "${group}.cadd.FIXME" + ${bgzip_indel_cadd_version(task)} + """ +} +def bgzip_indel_cadd_version(task) { + """ + cat <<-END_VERSIONS > ${task.process}_versions.yml + ${task.process}: + tabix: \$(echo \$(tabix --version 2>&1) | sed 's/^.*(htslib) // ; s/ Copyright.*//') + bcftools: \$(echo \$(bcftools --version 2>&1) | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ +} + // Add the calculated indel CADDs to the vcf process add_cadd_scores_to_vcf { cpus 4 tag "$group" memory '1 GB' time '5m' - container = '/fs1/resources/containers/genmod.sif' + container = "${params.container_genmod}" input: - set group, file(vcf), file(cadd_scores) from splice_marked.join(indel_cadd) + set group, file(vcf), file(cadd_scores), file(cadd_scores_tbi) from splice_marked.join(indel_cadd_bgzip) output: set group, file("${group}.cadd.vcf") into ma_vcf, fa_vcf, base_vcf @@ -2192,10 +2228,7 @@ process add_cadd_scores_to_vcf { script: """ - gunzip -c $cadd_scores > cadd - bgzip -@ ${task.cpus} cadd - tabix -p vcf cadd.gz - genmod annotate --cadd-file cadd.gz $vcf > ${group}.cadd.vcf + genmod annotate --cadd-file ${cadd_scores} ${vcf} > ${group}.cadd.vcf ${add_cadd_scores_to_vcf_version(task)} """ @@ -2210,10 +2243,8 @@ def add_cadd_scores_to_vcf_version(task) { """ cat <<-END_VERSIONS > ${task.process}_versions.yml ${task.process}: - tabix: \$(echo \$(tabix --version 2>&1) | sed 's/^.*(htslib) // ; s/ Copyright.*//') genmod: \$(echo \$(genmod --version 2>&1) | sed -e "s/^.*genmod version: //") END_VERSIONS - """ } @@ -2227,7 +2258,7 @@ process inher_models { // scratch true // stageInMode 'copy' // stageOutMode 'copy' - container = '/fs1/resources/containers/genmod.sif' + container = "${params.container_genmod}" input: set group, file(vcf), type, file(ped) from base_vcf.mix(ma_vcf, fa_vcf).join(ped_inher.mix(ped_inher_ma,ped_inher_fa)) @@ -2266,7 +2297,7 @@ process genmodscore { tag "$group" memory '20 GB' time '1h' - container = '/fs1/resources/containers/genmod.sif' + container = "${params.container_genmod}" input: set group, type, file(vcf) from inhermod @@ -2284,7 +2315,11 @@ process genmodscore { if ( mode == "family" && params.antype == "wgs" ) { """ genmod score -i $group_score -c $params.rank_model -r $vcf -o ${group_score}.score1.vcf - genmod compound ${group_score}.score1.vcf > ${group_score}.score2.vcf + genmod compound \ + --threshold ${params.genmod_compound_trio_threshold} \ + --penalty ${params.genmod_compound_trio_penalty} \ + -o ${group_score}.score2.vcf \ + ${group_score}.score1.vcf sed 's/RankScore=${group}:/RankScore=${group_score}:/g' -i ${group_score}.score2.vcf genmod sort -p -f $group_score ${group_score}.score2.vcf -o ${group_score}.scored.vcf @@ -2294,7 +2329,18 @@ process genmodscore { else { """ genmod score -i $group_score -c $params.rank_model_s -r $vcf -o ${group_score}.score1.vcf - genmod sort -p -f $group_score ${group_score}.score1.vcf -o ${group_score}.scored.vcf + + // To get compounds without applying rank score penalty + genmod compound \ + --penalty 0 \ + -o ${group_score}.score1.with_compounds.vcf \ + ${group_score}.score1.vcf + + genmod sort \ + -p \ + -f $group_score \ + -o ${group_score}.scored.vcf \ + ${group_score}.score1.with_compounds.vcf ${genmodscore_version(task)} """ @@ -3685,52 +3731,33 @@ process score_sv { publishDir "${OUTDIR}/vcf", mode: 'copy', overwrite: 'true', pattern: '*.vcf.gz*' memory '10 GB' time '2h' - container = '/fs1/resources/containers/genmod.sif' + container = "${params.container_genmod}" input: set group, type, file(vcf) from annotatedSV output: - set group, type, file("${group_score}.sv.scored.sorted.vcf.gz"), file("${group_score}.sv.scored.sorted.vcf.gz.tbi") into sv_rescore,sv_rescore_ma,sv_rescore_fa - set group, file("${group}_sv.INFO") into sv_INFO - set group, file("${group_score}.sv.scored.sorted.vcf.gz") into svvcf_bed, svvcf_pod + set group, val(group_score), file(".sv.scored.vcf") into ch_scored_sv set group, file("*versions.yml") into ch_score_sv_versions script: - group_score = group - if ( type == "ma" || type == "fa") { + def model = (mode == "family" && params.antype == "wgs") ? params.svrank_model : params.svrank_model_s + def scoredVcfOutput = (mode == "family" && params.antype == "wgs") ? "${group_score}.sv.scored.vcf" : "${group_score}.sv.scored.vcf" + def group_score = group + if ( type == "ma" || type == "fa" ) { group_score = group + "_" + type } - if (mode == "family" && params.antype == "wgs") { - """ - genmod score -i $group_score -c $params.svrank_model -r $vcf -o ${group_score}.sv.scored_tmp.vcf - bcftools sort -O v -o ${group_score}.sv.scored.sorted.vcf ${group_score}.sv.scored_tmp.vcf - bgzip -@ ${task.cpus} ${group_score}.sv.scored.sorted.vcf -f - tabix ${group_score}.sv.scored.sorted.vcf.gz -f - echo "SV $type ${params.accessdir}/vcf/${group_score}.sv.scored.sorted.vcf.gz" > ${group}_sv.INFO - - ${score_sv_version(task)} - """ - } - else { - """ - genmod score -i $group_score -c $params.svrank_model_s -r $vcf -o ${group_score}.sv.scored.vcf - bcftools sort -O v -o ${group_score}.sv.scored.sorted.vcf ${group}.sv.scored.vcf - bgzip -@ ${task.cpus} ${group_score}.sv.scored.sorted.vcf -f - tabix ${group_score}.sv.scored.sorted.vcf.gz -f - echo "SV $type ${params.accessdir}/vcf/${group_score}.sv.scored.sorted.vcf.gz" > ${group}_sv.INFO + """ + genmod score -i ${group_score} -c ${model} -r ${vcf} -o ${group_score}.sv.scored.vcf - ${score_sv_version(task)} - """ - } + ${score_sv_version(task)} + """ stub: group_score = group """ - touch "${group_score}.sv.scored.sorted.vcf.gz" - touch "${group_score}.sv.scored.sorted.vcf.gz.tbi" - touch "${group}_sv.INFO" + touch "${group_score}.sv.scored.sorted.vcf" ${score_sv_version(task)} """ @@ -3739,8 +3766,50 @@ def score_sv_version(task) { """ cat <<-END_VERSIONS > ${task.process}_versions.yml ${task.process}: - tabix: \$(echo \$(tabix --version 2>&1) | sed 's/^.*(htslib) // ; s/ Copyright.*//') genmod: \$(echo \$(genmod --version 2>&1) | sed -e "s/^.*genmod version: //") + END_VERSIONS + """ +} + +process bgzip_scored_genmod { + cpus 4 + tag "$group" + memory '1 GB' + time '5m' + container = "${params.container_bcftools}" + + input: + set group, val(group_score), file(scored_sv_vcf) from ch_scored_sv + + output: + set group, type, file("${group_score}.sv.scored.sorted.vcf.gz"), file("${group_score}.sv.scored.sorted.vcf.gz.tbi") into sv_rescore, sv_rescore_ma, sv_rescore_fa + set group, file("${group_score}.sv.scored.sorted.vcf.gz") into svvcf_bed, svvcf_pod + set group, file("${group}_sv.INFO") into sv_INFO + set group, file("*versions.yml") into ch_score_sv_bgzip_versions + + script: + """ + bcftools sort -O v -o ${group_score}.sv.scored.sorted.vcf ${scored_sv_vcf} + bgzip -@ ${task.cpus} ${group_score}.sv.scored.sorted.vcf -f + tabix ${group_score}.sv.scored.sorted.vcf.gz -f + echo "SV\t$type\t${params.accessdir}/vcf/${group_score}.sv.scored.sorted.vcf.gz" > ${group}_sv.INFO + + ${bgzip_score_sv_version(task)} + """ + stub: + """ + touch "${group_score}.sv.scored.sorted.vcf.gz" + touch "${group_score}.sv.scored.sorted.vcf.gz.tbi" + touch "${group}_sv.INFO" + + ${bgzip_score_sv_version(task)} + """ +} +def bgzip_score_sv_version(task) { + """ + cat <<-END_VERSIONS > ${task.process}_versions.yml + ${task.process}: + tabix: \$(echo \$(tabix --version 2>&1) | sed 's/^.*(htslib) // ; s/ Copyright.*//') bcftools: \$(echo \$(bcftools --version 2>&1) | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') END_VERSIONS """