diff --git a/CHANGELOG.md b/CHANGELOG.md index e7e1480a..31781c81 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Installed the nf-core version of the sentieon/datametrics module [#400](https://github.com/nf-core/raredisease/pull/400) - Installed the nf-core version of the sentieon/dedup module. The dedup module also contains a call to Sentieon's LocusCollector [#401](https://github.com/nf-core/raredisease/pull/401) - Removing Sentieon-based BQSR. Recent Illumina sequencers tend to provide well-calibrated BQs, so BQSR may not provide much benefit [#402](https://github.com/nf-core/raredisease/pull/402) +- Installed the nf-core version of the sentieon/dnamodelapply module [#403](https://github.com/nf-core/raredisease/pull/403) ### `Fixed` diff --git a/modules.json b/modules.json index 9e427d30..b4e2c848 100644 --- a/modules.json +++ b/modules.json @@ -360,6 +360,11 @@ "git_sha": "915a0b16ba3e40ef59e7b44843b3118e17a9c906", "installed_by": ["modules"] }, + "sentieon/dnamodelapply": { + "branch": "master", + "git_sha": "43ef68091a1188fd8dc4c03f9341b556803c7514", + "installed_by": ["modules"] + }, "sentieon/readwriter": { "branch": "master", "git_sha": "b28e4dde755117e8dab5d6e85e292f145b8b53c3", diff --git a/modules/local/sentieon/dnamodelapply.nf b/modules/local/sentieon/dnamodelapply.nf deleted file mode 100644 index 32582b2c..00000000 --- a/modules/local/sentieon/dnamodelapply.nf +++ /dev/null @@ -1,50 +0,0 @@ -process SENTIEON_DNAMODELAPPLY { - tag "$meta.id" - label 'process_high' - label 'sentieon' - - input: - tuple val(meta), path(vcf), path(vcf_idx) - tuple val(meta2), path(fasta) - tuple val(meta3), path(fai) - path ml_model - - output: - tuple val(meta), path("*.vcf.gz") , emit: vcf - tuple val(meta), path("*.vcf.gz.tbi") , emit: index - tuple val(meta), path("*.vcf.gz"), path("*.vcf.gz.tbi"), emit: vcf_index - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def prefix = task.ext.prefix ?: "${meta.id}" - - """ - sentieon driver \\ - -t $task.cpus \\ - -r $fasta \\ - --algo DNAModelApply \\ - --model $ml_model \\ - -v $vcf \\ - ${prefix}.vcf.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.vcf.gz - touch ${prefix}.vcf.gz.tbi - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g" ) - END_VERSIONS - """ -} diff --git a/modules/nf-core/sentieon/dnamodelapply/main.nf b/modules/nf-core/sentieon/dnamodelapply/main.nf new file mode 100644 index 00000000..3fe9a28f --- /dev/null +++ b/modules/nf-core/sentieon/dnamodelapply/main.nf @@ -0,0 +1,81 @@ +process SENTIEON_DNAMODELAPPLY { + tag "$meta.id" + label 'process_high' + label 'sentieon' + + secret 'SENTIEON_LICENSE_BASE64' + + container 'nf-core/sentieon:202112.06' + + input: + tuple val(meta), path(vcf), path(idx) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(ml_model) + + output: + tuple val(meta), path("*.vcf.gz") , emit: vcf + tuple val(meta), path("*.vcf.gz.tbi"), emit: index + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "Sentieon modules do not support Conda. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def sentieon_auth_mech_base64 = task.ext.sentieon_auth_mech_base64 ?: '' + def sentieon_auth_data_base64 = task.ext.sentieon_auth_data_base64 ?: '' + + """ + if [ "\${#SENTIEON_LICENSE_BASE64}" -lt "1500" ]; then # If the string SENTIEON_LICENSE_BASE64 is short, then it is an encrypted url. + export SENTIEON_LICENSE=\$(echo -e "\$SENTIEON_LICENSE_BASE64" | base64 -d) + else # Localhost license file + # The license file is stored as a nextflow variable like, for instance, this: + # nextflow secrets set SENTIEON_LICENSE_BASE64 \$(cat | base64 -w 0) + export SENTIEON_LICENSE=\$(mktemp) + echo -e "\$SENTIEON_LICENSE_BASE64" | base64 -d > \$SENTIEON_LICENSE + fi + + if [ ${sentieon_auth_mech_base64} ] && [ ${sentieon_auth_data_base64} ]; then + # If sentieon_auth_mech_base64 and sentieon_auth_data_base64 are non-empty strings, then Sentieon is mostly likely being run with some test-license. + export SENTIEON_AUTH_MECH=\$(echo -n "${sentieon_auth_mech_base64}" | base64 -d) + export SENTIEON_AUTH_DATA=\$(echo -n "${sentieon_auth_data_base64}" | base64 -d) + echo "Decoded and exported Sentieon test-license system environment variables" + fi + + sentieon driver \\ + -t $task.cpus \\ + -r $fasta \\ + $args \\ + --algo DNAModelApply \\ + --model $ml_model \\ + -v $vcf \\ + ${prefix}.vcf.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "Sentieon modules do not support Conda. Please use Docker / Singularity / Podman instead." + } + """ + touch ${prefix}.vcf.gz + touch ${prefix}.vcf.gz.tbi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g" ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/sentieon/dnamodelapply/meta.yml b/modules/nf-core/sentieon/dnamodelapply/meta.yml new file mode 100644 index 00000000..ec429bea --- /dev/null +++ b/modules/nf-core/sentieon/dnamodelapply/meta.yml @@ -0,0 +1,78 @@ +name: "sentieon_dnamodelapply" +description: modifies the input VCF file by adding the MLrejected FILTER to the variants +keywords: + - dnamodelapply + - vcf + - filter + - sentieon +tools: + - sentieon: + description: | + Sentieon® provides complete solutions for secondary DNA/RNA analysis for a variety of sequencing platforms, including short and long reads. + Our software improves upon BWA, STAR, Minimap2, GATK, HaplotypeCaller, Mutect, and Mutect2 based pipelines and is deployable on any generic-CPU-based computing system. + homepage: https://www.sentieon.com/ + documentation: https://www.sentieon.com/ + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. `[ id:'test' ]` + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. `[ id:'test' ]` + - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. `[ id:'test' ]` + - vcf: + type: file + description: INPUT VCF file + pattern: "*.{vcf,vcf.gz}" + - idx: + type: file + description: Index of the input VCF file + pattern: "*.{tbi}" + - fasta: + type: file + description: Genome fasta file + pattern: "*.{fa,fasta}" + - fai: + type: file + description: Index of the genome fasta file + pattern: "*.fai" + - ml_model: + type: file + description: machine learning model file + pattern: "*.model" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - vcf: + type: file + description: INPUT VCF file + pattern: "*.{vcf,vcf.gz}" + - index: + type: file + description: Index of the input VCF file + pattern: "*.{tbi}" + +authors: + - "@ramprasadn" diff --git a/subworkflows/local/variant_calling/call_snv_sentieon.nf b/subworkflows/local/variant_calling/call_snv_sentieon.nf index d1b8bbb2..bbcdbc19 100644 --- a/subworkflows/local/variant_calling/call_snv_sentieon.nf +++ b/subworkflows/local/variant_calling/call_snv_sentieon.nf @@ -3,7 +3,7 @@ // include { SENTIEON_DNASCOPE } from '../../../modules/local/sentieon/dnascope' -include { SENTIEON_DNAMODELAPPLY } from '../../../modules/local/sentieon/dnamodelapply' +include { SENTIEON_DNAMODELAPPLY } from '../../../modules/nf-core/sentieon/dnamodelapply/main' include { BCFTOOLS_MERGE } from '../../../modules/nf-core/bcftools/merge/main' include { BCFTOOLS_NORM as SPLIT_MULTIALLELICS_SEN } from '../../../modules/nf-core/bcftools/norm/main' include { BCFTOOLS_NORM as REMOVE_DUPLICATES_SEN } from '../../../modules/nf-core/bcftools/norm/main' @@ -28,7 +28,7 @@ workflow CALL_SNV_SENTIEON { SENTIEON_DNASCOPE ( ch_bam_bai, ch_genome_fasta, ch_genome_fai, ch_dbsnp, ch_dbsnp_index, ch_call_interval, ch_ml_model ) - SENTIEON_DNAMODELAPPLY ( SENTIEON_DNASCOPE.out.vcf_index, ch_genome_fasta, ch_genome_fai, ch_ml_model ) + SENTIEON_DNAMODELAPPLY ( SENTIEON_DNASCOPE.out.vcf_index, ch_genome_fasta, ch_genome_fai, [ [:], ch_ml_model ] ) BCF_FILTER_ONE (SENTIEON_DNAMODELAPPLY.out.vcf )