Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Genmod fixes #247

Draft
wants to merge 4 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions configs/nextflow.hopper.config
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,14 @@ params {

// CONTAINERS //
container_vep = "/fs1/resources/containers/ensembl-vep_release_111.0.sif"
container_fastp = '/fs1/resources/containers/fastp_0.23.4.sif'
container_sentieon = '/fs1/resources/containers/sentieon_202308.sif'
container_fastp = "/fs1/resources/containers/fastp_0.23.4.sif"
container_sentieon = "/fs1/resources/containers/sentieon_202308.sif"
container_d4tools = "/fs1/resources/containers/d4tools_0.3.8.sif"
container_genmod = "/fs1/resources/containers/genmod_v3.9.sif"
container_bcftools = "/fs1/resources/containers/bcftools_1.20.sif"

genmod_compound_trio_threshold = 12
genmod_compound_trio_penalty = 8

run_chanjo2 = true
reanalyze = false
Expand Down
24 changes: 0 additions & 24 deletions container/genmod.patch

This file was deleted.

25 changes: 0 additions & 25 deletions container/genmod/Singularity

This file was deleted.

14 changes: 0 additions & 14 deletions container/genmod/environment.yml

This file was deleted.

24 changes: 0 additions & 24 deletions container/genmod/genmod.patch

This file was deleted.

2 changes: 1 addition & 1 deletion docs/list_of_all_used_software.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
| fastp | 0.23.4 | Open-Source | https://github.com/OpenGene/fastp | https://github.com/OpenGene/fastp/issues | Sailendra Pradhananga | FASTQ preprocessor |
| freebayes | 1.3.2 | Open-Source | https://github.com/freebayes/freebayes | https://github.com/freebayes/freebayes/issues | Ram Sai Nanduri | Bayesian haplotype-based genetic polymorphism discovery and genotyping |
| gatk | 4.1.9.0 | Open-Source | https://gatk.broadinstitute.org/hc/en-us | https://github.com/broadinstitute/gatk/issues | Viktor Henmyr | Wide set of tools for genomic analysis |
| genmod | 3.7.4 | Open-Source | https://github.com/Clinical-Genomics/genmod | https://github.com/Clinical-Genomics/genmod/issues | Viktor Henmyr | Annotate models of genetic inheritance patterns in variant files |
| genmod | 3.9.0 | Open-Source | https://github.com/Clinical-Genomics/genmod | https://github.com/Clinical-Genomics/genmod/issues | Viktor Henmyr | Annotate models of genetic inheritance patterns in variant files |
| haplogrep | 2.2.8 | Open-Source | https://github.com/seppinho/haplogrep-cmd | https://github.com/seppinho/haplogrep-cmd/issues | Paul Piccinelli | mtDNA haplogroup classification. Supporting rCRS and RSRS. |
| hmtnote | 0.7.2 | Open-Source | https://github.com/robertopreste/HmtNote | https://github.com/robertopreste/HmtNote/issues | Paul Piccinelli | Human mitochondrial variants annotation using HmtVar |
| madeleine | 2.0 | Open-Source | https://github.com/piratical/Madeline_2.0_PDE | https://github.com/piratical/Madeline_2.0_PDE/issues | Jakob Willforss | Pedigree drawing program |
Expand Down
155 changes: 112 additions & 43 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2175,27 +2175,60 @@ def calculate_indel_cadd_version(task) {
"""
}

process bgzip_indel_cadd {
cpus 4
tag "$group"
memory '1 GB'
time '5m'
container = "${params.container_bcftools}"

input:
set group, file(cadd_scores) from indel_cadd

output:
set group, file("cadd.gz"), file("cadd.gz.tbi") into indel_cadd_bgzip

script:
"""
gunzip -c ${cadd_scores} > cadd
bgzip -@ ${task.cpus} cadd
tabix -p vcf cadd.gz
"""

stub:
"""
touch "${group}.cadd.FIXME"
${bgzip_indel_cadd_version(task)}
"""
}
def bgzip_indel_cadd_version(task) {
"""
cat <<-END_VERSIONS > ${task.process}_versions.yml
${task.process}:
tabix: \$(echo \$(tabix --version 2>&1) | sed 's/^.*(htslib) // ; s/ Copyright.*//')
bcftools: \$(echo \$(bcftools --version 2>&1) | head -n1 | sed 's/^.*bcftools //; s/ .*\$//')
END_VERSIONS
"""
}

// Add the calculated indel CADDs to the vcf
process add_cadd_scores_to_vcf {
cpus 4
tag "$group"
memory '1 GB'
time '5m'
container = '/fs1/resources/containers/genmod.sif'
container = "${params.container_genmod}"

input:
set group, file(vcf), file(cadd_scores) from splice_marked.join(indel_cadd)
set group, file(vcf), file(cadd_scores), file(cadd_scores_tbi) from splice_marked.join(indel_cadd_bgzip)

output:
set group, file("${group}.cadd.vcf") into ma_vcf, fa_vcf, base_vcf
set group, file("*versions.yml") into ch_add_cadd_scores_to_vcf_versions

script:
"""
gunzip -c $cadd_scores > cadd
bgzip -@ ${task.cpus} cadd
tabix -p vcf cadd.gz
genmod annotate --cadd-file cadd.gz $vcf > ${group}.cadd.vcf
genmod annotate --cadd-file ${cadd_scores} ${vcf} > ${group}.cadd.vcf

${add_cadd_scores_to_vcf_version(task)}
"""
Expand All @@ -2210,10 +2243,8 @@ def add_cadd_scores_to_vcf_version(task) {
"""
cat <<-END_VERSIONS > ${task.process}_versions.yml
${task.process}:
tabix: \$(echo \$(tabix --version 2>&1) | sed 's/^.*(htslib) // ; s/ Copyright.*//')
genmod: \$(echo \$(genmod --version 2>&1) | sed -e "s/^.*genmod version: //")
END_VERSIONS

"""
}

Expand All @@ -2227,7 +2258,7 @@ process inher_models {
// scratch true
// stageInMode 'copy'
// stageOutMode 'copy'
container = '/fs1/resources/containers/genmod.sif'
container = "${params.container_genmod}"

input:
set group, file(vcf), type, file(ped) from base_vcf.mix(ma_vcf, fa_vcf).join(ped_inher.mix(ped_inher_ma,ped_inher_fa))
Expand Down Expand Up @@ -2266,7 +2297,7 @@ process genmodscore {
tag "$group"
memory '20 GB'
time '1h'
container = '/fs1/resources/containers/genmod.sif'
container = "${params.container_genmod}"

input:
set group, type, file(vcf) from inhermod
Expand All @@ -2284,7 +2315,11 @@ process genmodscore {
if ( mode == "family" && params.antype == "wgs" ) {
"""
genmod score -i $group_score -c $params.rank_model -r $vcf -o ${group_score}.score1.vcf
genmod compound ${group_score}.score1.vcf > ${group_score}.score2.vcf
genmod compound \
--threshold ${params.genmod_compound_trio_threshold} \
--penalty ${params.genmod_compound_trio_penalty} \
-o ${group_score}.score2.vcf \
${group_score}.score1.vcf
sed 's/RankScore=${group}:/RankScore=${group_score}:/g' -i ${group_score}.score2.vcf
genmod sort -p -f $group_score ${group_score}.score2.vcf -o ${group_score}.scored.vcf

Expand All @@ -2294,7 +2329,18 @@ process genmodscore {
else {
"""
genmod score -i $group_score -c $params.rank_model_s -r $vcf -o ${group_score}.score1.vcf
genmod sort -p -f $group_score ${group_score}.score1.vcf -o ${group_score}.scored.vcf

// To get compounds without applying rank score penalty
genmod compound \
--penalty 0 \
-o ${group_score}.score1.with_compounds.vcf \
${group_score}.score1.vcf

genmod sort \
-p \
-f $group_score \
-o ${group_score}.scored.vcf \
${group_score}.score1.with_compounds.vcf

${genmodscore_version(task)}
"""
Expand Down Expand Up @@ -3685,52 +3731,33 @@ process score_sv {
publishDir "${OUTDIR}/vcf", mode: 'copy', overwrite: 'true', pattern: '*.vcf.gz*'
memory '10 GB'
time '2h'
container = '/fs1/resources/containers/genmod.sif'
container = "${params.container_genmod}"

input:
set group, type, file(vcf) from annotatedSV

output:
set group, type, file("${group_score}.sv.scored.sorted.vcf.gz"), file("${group_score}.sv.scored.sorted.vcf.gz.tbi") into sv_rescore,sv_rescore_ma,sv_rescore_fa
set group, file("${group}_sv.INFO") into sv_INFO
set group, file("${group_score}.sv.scored.sorted.vcf.gz") into svvcf_bed, svvcf_pod
set group, val(group_score), file(".sv.scored.vcf") into ch_scored_sv
set group, file("*versions.yml") into ch_score_sv_versions

script:
group_score = group
if ( type == "ma" || type == "fa") {
def model = (mode == "family" && params.antype == "wgs") ? params.svrank_model : params.svrank_model_s
def scoredVcfOutput = (mode == "family" && params.antype == "wgs") ? "${group_score}.sv.scored.vcf" : "${group_score}.sv.scored.vcf"
def group_score = group
if ( type == "ma" || type == "fa" ) {
group_score = group + "_" + type
}

if (mode == "family" && params.antype == "wgs") {
"""
genmod score -i $group_score -c $params.svrank_model -r $vcf -o ${group_score}.sv.scored_tmp.vcf
bcftools sort -O v -o ${group_score}.sv.scored.sorted.vcf ${group_score}.sv.scored_tmp.vcf
bgzip -@ ${task.cpus} ${group_score}.sv.scored.sorted.vcf -f
tabix ${group_score}.sv.scored.sorted.vcf.gz -f
echo "SV $type ${params.accessdir}/vcf/${group_score}.sv.scored.sorted.vcf.gz" > ${group}_sv.INFO

${score_sv_version(task)}
"""
}
else {
"""
genmod score -i $group_score -c $params.svrank_model_s -r $vcf -o ${group_score}.sv.scored.vcf
bcftools sort -O v -o ${group_score}.sv.scored.sorted.vcf ${group}.sv.scored.vcf
bgzip -@ ${task.cpus} ${group_score}.sv.scored.sorted.vcf -f
tabix ${group_score}.sv.scored.sorted.vcf.gz -f
echo "SV $type ${params.accessdir}/vcf/${group_score}.sv.scored.sorted.vcf.gz" > ${group}_sv.INFO
"""
genmod score -i ${group_score} -c ${model} -r ${vcf} -o ${group_score}.sv.scored.vcf

${score_sv_version(task)}
"""
}
${score_sv_version(task)}
"""

stub:
group_score = group
"""
touch "${group_score}.sv.scored.sorted.vcf.gz"
touch "${group_score}.sv.scored.sorted.vcf.gz.tbi"
touch "${group}_sv.INFO"
touch "${group_score}.sv.scored.sorted.vcf"

${score_sv_version(task)}
"""
Expand All @@ -3739,8 +3766,50 @@ def score_sv_version(task) {
"""
cat <<-END_VERSIONS > ${task.process}_versions.yml
${task.process}:
tabix: \$(echo \$(tabix --version 2>&1) | sed 's/^.*(htslib) // ; s/ Copyright.*//')
genmod: \$(echo \$(genmod --version 2>&1) | sed -e "s/^.*genmod version: //")
END_VERSIONS
"""
}

process bgzip_scored_genmod {
cpus 4
tag "$group"
memory '1 GB'
time '5m'
container = "${params.container_bcftools}"

input:
set group, val(group_score), file(scored_sv_vcf) from ch_scored_sv

output:
set group, type, file("${group_score}.sv.scored.sorted.vcf.gz"), file("${group_score}.sv.scored.sorted.vcf.gz.tbi") into sv_rescore, sv_rescore_ma, sv_rescore_fa
set group, file("${group_score}.sv.scored.sorted.vcf.gz") into svvcf_bed, svvcf_pod
set group, file("${group}_sv.INFO") into sv_INFO
set group, file("*versions.yml") into ch_score_sv_bgzip_versions

script:
"""
bcftools sort -O v -o ${group_score}.sv.scored.sorted.vcf ${scored_sv_vcf}
bgzip -@ ${task.cpus} ${group_score}.sv.scored.sorted.vcf -f
tabix ${group_score}.sv.scored.sorted.vcf.gz -f
echo "SV\t$type\t${params.accessdir}/vcf/${group_score}.sv.scored.sorted.vcf.gz" > ${group}_sv.INFO

${bgzip_score_sv_version(task)}
"""
stub:
"""
touch "${group_score}.sv.scored.sorted.vcf.gz"
touch "${group_score}.sv.scored.sorted.vcf.gz.tbi"
touch "${group}_sv.INFO"

${bgzip_score_sv_version(task)}
"""
}
def bgzip_score_sv_version(task) {
"""
cat <<-END_VERSIONS > ${task.process}_versions.yml
${task.process}:
tabix: \$(echo \$(tabix --version 2>&1) | sed 's/^.*(htslib) // ; s/ Copyright.*//')
bcftools: \$(echo \$(bcftools --version 2>&1) | head -n1 | sed 's/^.*bcftools //; s/ .*\$//')
END_VERSIONS
"""
Expand Down