diff --git a/CHANGELOG.md b/CHANGELOG.md index 244eda8d..00d4752e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Removing Sentieon-based BQSR. Recent Illumina sequencers tend to provide well-calibrated BQs, so BQSR may not provide much benefit [#402](https://github.com/nf-core/raredisease/pull/402) - Installed the nf-core version of the sentieon/dnamodelapply module [#403](https://github.com/nf-core/raredisease/pull/403) - Installed the nf-core version of the sentieon/wgsmetricsalgo module [#404](https://github.com/nf-core/raredisease/pull/404) +- Installed the nf-core version of the sentieon/dnascope module [#406](https://github.com/nf-core/raredisease/pull/406) ### `Fixed` diff --git a/conf/modules/align_sentieon.config b/conf/modules/align_sentieon.config index b39bad3c..3e002cb6 100644 --- a/conf/modules/align_sentieon.config +++ b/conf/modules/align_sentieon.config @@ -37,6 +37,7 @@ process { withName: '.*ALIGN:ALIGN_SENTIEON:SENTIEON_DEDUP' { ext.args = { $params.rmdup ? "--rmdup" : '' } ext.prefix = { "${meta.id}_dedup" } + ext.suffix = ".bam" publishDir = [ enabled: !params.save_mapped_as_cram, path: { "${params.outdir}/alignment" }, diff --git a/modules.json b/modules.json index 5e1764b9..dcc0e9d9 100644 --- a/modules.json +++ b/modules.json @@ -365,6 +365,11 @@ "git_sha": "43ef68091a1188fd8dc4c03f9341b556803c7514", "installed_by": ["modules"] }, + "sentieon/dnascope": { + "branch": "master", + "git_sha": "127edadc279e19da093fdd513926c6cdee82c306", + "installed_by": ["modules"] + }, "sentieon/readwriter": { "branch": "master", "git_sha": "b28e4dde755117e8dab5d6e85e292f145b8b53c3", diff --git a/modules/local/sentieon/dnascope.nf b/modules/local/sentieon/dnascope.nf deleted file mode 100644 index d03fe2d4..00000000 --- a/modules/local/sentieon/dnascope.nf +++ /dev/null @@ -1,62 +0,0 @@ -process SENTIEON_DNASCOPE { - tag "$meta.id" - label 'process_high' - label 'sentieon' - - input: - tuple val(meta), path(bam), path(bai) - tuple val(meta2), path(fasta) - tuple val(meta3), path(fai) - tuple val(meta4), path(known_dbsnp) - tuple val(meta5), path(known_dbsnp_tbi) - path call_interval - path ml_model - - output: - tuple val(meta), path("*.vcf.gz") , emit: vcf - tuple val(meta), path("*.vcf.gz.tbi") , emit: index - tuple val(meta), path("*.vcf.gz"), path("*.vcf.gz.tbi"), emit: vcf_index - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def args2 = task.ext.args2 ?: '' - def interval = call_interval ? "--interval ${call_interval}" : '' - def dbsnp = known_dbsnp ? "-d ${known_dbsnp}" : '' - def model = ml_model ? "--model ${ml_model}" : '' - def prefix = task.ext.prefix ?: "${meta.id}" - - """ - sentieon driver \\ - -t $task.cpus \\ - -r $fasta \\ - $args \\ - -i $bam \\ - --algo DNAscope \\ - $dbsnp \\ - $interval \\ - $args2 \\ - $model \\ - ${prefix}.vcf.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.vcf.gz - touch ${prefix}.vcf.gz.tbi - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g" ) - END_VERSIONS - """ -} diff --git a/modules/nf-core/sentieon/dnascope/main.nf b/modules/nf-core/sentieon/dnascope/main.nf new file mode 100644 index 00000000..acc0ca7b --- /dev/null +++ b/modules/nf-core/sentieon/dnascope/main.nf @@ -0,0 +1,91 @@ +process SENTIEON_DNASCOPE { + tag "$meta.id" + label 'process_high' + label 'sentieon' + + secret 'SENTIEON_LICENSE_BASE64' + + container 'nf-core/sentieon:202112.06' + + input: + tuple val(meta), path(bam), path(bai) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(dbsnp) + tuple val(meta5), path(dbsnp_tbi) + tuple val(meta6), path(call_interval) + tuple val(meta7), path(ml_model) + + output: + tuple val(meta), path("*.vcf.gz") , emit: vcf + tuple val(meta), path("*.vcf.gz.tbi") , emit: index + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "Sentieon modules do not support Conda. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def interval = call_interval ? "--interval ${call_interval}" : '' + def dbsnp_str = dbsnp ? "-d ${dbsnp}" : '' + def model = ml_model ? "--model ${ml_model}" : '' + def prefix = task.ext.prefix ?: "${meta.id}" + def sentieon_auth_mech_base64 = task.ext.sentieon_auth_mech_base64 ?: '' + def sentieon_auth_data_base64 = task.ext.sentieon_auth_data_base64 ?: '' + + """ + if [ "\${#SENTIEON_LICENSE_BASE64}" -lt "1500" ]; then # If the string SENTIEON_LICENSE_BASE64 is short, then it is an encrypted url. + export SENTIEON_LICENSE=\$(echo -e "\$SENTIEON_LICENSE_BASE64" | base64 -d) + else # Localhost license file + # The license file is stored as a nextflow variable like, for instance, this: + # nextflow secrets set SENTIEON_LICENSE_BASE64 \$(cat | base64 -w 0) + export SENTIEON_LICENSE=\$(mktemp) + echo -e "\$SENTIEON_LICENSE_BASE64" | base64 -d > \$SENTIEON_LICENSE + fi + + if [ ${sentieon_auth_mech_base64} ] && [ ${sentieon_auth_data_base64} ]; then + # If sentieon_auth_mech_base64 and sentieon_auth_data_base64 are non-empty strings, then Sentieon is mostly likely being run with some test-license. + export SENTIEON_AUTH_MECH=\$(echo -n "${sentieon_auth_mech_base64}" | base64 -d) + export SENTIEON_AUTH_DATA=\$(echo -n "${sentieon_auth_data_base64}" | base64 -d) + echo "Decoded and exported Sentieon test-license system environment variables" + fi + + sentieon driver \\ + -t $task.cpus \\ + -r $fasta \\ + $args \\ + $interval \\ + -i $bam \\ + --algo DNAscope \\ + $dbsnp_str \\ + $args2 \\ + $model \\ + ${prefix}.vcf.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "Sentieon modules do not support Conda. Please use Docker / Singularity / Podman instead." + } + """ + touch ${prefix}.vcf.gz + touch ${prefix}.vcf.gz.tbi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g" ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/sentieon/dnascope/meta.yml b/modules/nf-core/sentieon/dnascope/meta.yml new file mode 100644 index 00000000..82154b38 --- /dev/null +++ b/modules/nf-core/sentieon/dnascope/meta.yml @@ -0,0 +1,103 @@ +name: sentieon_dnascope +description: DNAscope algorithm performs an improved version of Haplotype variant calling. +keywords: + - dnascope + - sentieon + - variant_calling +tools: + - sentieon: + description: | + Sentieon® provides complete solutions for secondary DNA/RNA analysis for a variety of sequencing platforms, including short and long reads. + Our software improves upon BWA, STAR, Minimap2, GATK, HaplotypeCaller, Mutect, and Mutect2 based pipelines and is deployable on any generic-CPU-based computing system. + homepage: https://www.sentieon.com/ + documentation: https://www.sentieon.com/ +input: + - meta: + type: map + description: | + Groovy Map containing sample information. + e.g. [ id:'test', single_end:false ] + - meta2: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test' ] + - meta3: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test' ] + - meta4: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test' ] + - meta5: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test' ] + - meta6: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test' ] + - meta7: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test' ] + - bam: + type: file + description: BAM file. + pattern: "*.bam" + - bai: + type: file + description: BAI file + pattern: "*.bai" + - fasta: + type: file + description: Genome fasta file + pattern: "*.{fa,fasta}" + - fai: + type: file + description: Index of the genome fasta file + pattern: "*.fai" + - dbsnp: + type: file + description: Single Nucleotide Polymorphism database (dbSNP) file + pattern: "*.vcf.gz" + - dbsnp_tbi: + type: file + description: Index of the Single Nucleotide Polymorphism database (dbSNP) file + pattern: "*.vcf.gz.tbi" + - call_interval: + type: file + description: bed or interval_list file containing interval in the reference that will be used in the analysis + pattern: "*.{bed,interval_list}" + - ml_model: + type: file + description: machine learning model file + pattern: "*.model" + +output: + - meta: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: VCF file + pattern: "*.{vcf.gz}" + - index: + type: file + description: Index of the VCF file + pattern: "*.vcf.gz.tbi" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@ramprasadn" diff --git a/subworkflows/local/variant_calling/call_snv_sentieon.nf b/subworkflows/local/variant_calling/call_snv_sentieon.nf index bbcdbc19..f7a36daf 100644 --- a/subworkflows/local/variant_calling/call_snv_sentieon.nf +++ b/subworkflows/local/variant_calling/call_snv_sentieon.nf @@ -2,7 +2,7 @@ // A subworkflow to call SNVs by sentieon dnascope with a machine learning model. // -include { SENTIEON_DNASCOPE } from '../../../modules/local/sentieon/dnascope' +include { SENTIEON_DNASCOPE } from '../../../modules/nf-core/sentieon/dnascope/main' include { SENTIEON_DNAMODELAPPLY } from '../../../modules/nf-core/sentieon/dnamodelapply/main' include { BCFTOOLS_MERGE } from '../../../modules/nf-core/bcftools/merge/main' include { BCFTOOLS_NORM as SPLIT_MULTIALLELICS_SEN } from '../../../modules/nf-core/bcftools/norm/main' @@ -15,12 +15,12 @@ include { BCFTOOLS_FILTER as BCF_FILTER_TWO } from '../../../modules/nf-c workflow CALL_SNV_SENTIEON { take: ch_bam_bai // channel: [mandatory] [ val(meta), path(bam), path(bai) ] - ch_genome_fasta // channel: [mandatory] [ path(fasta) ] - ch_genome_fai // channel: [mandatory] [ path(fai) ] + ch_genome_fasta // channel: [mandatory] [ val(meta), path(fasta) ] + ch_genome_fai // channel: [mandatory] [ val(meta), path(fai) ] ch_dbsnp // channel: [mandatory] [ val(meta), path(vcf) ] ch_dbsnp_index // channel: [mandatory] [ val(meta), path(tbi) ] - ch_call_interval // channel: [mandatory] [ path(interval) ] - ch_ml_model // channel: [mandatory] [ path(model) ] + ch_call_interval // channel: [mandatory] [ val(meta), path(interval) ] + ch_ml_model // channel: [mandatory] [ val(meta), path(model) ] ch_case_info // channel: [mandatory] [ val(case_info) ] main: @@ -28,7 +28,9 @@ workflow CALL_SNV_SENTIEON { SENTIEON_DNASCOPE ( ch_bam_bai, ch_genome_fasta, ch_genome_fai, ch_dbsnp, ch_dbsnp_index, ch_call_interval, ch_ml_model ) - SENTIEON_DNAMODELAPPLY ( SENTIEON_DNASCOPE.out.vcf_index, ch_genome_fasta, ch_genome_fai, [ [:], ch_ml_model ] ) + ch_dnamodelapply_in = SENTIEON_DNASCOPE.out.vcf.join(SENTIEON_DNASCOPE.out.index) + + SENTIEON_DNAMODELAPPLY ( ch_dnamodelapply_in, ch_genome_fasta, ch_genome_fai, ch_ml_model ) BCF_FILTER_ONE (SENTIEON_DNAMODELAPPLY.out.vcf ) diff --git a/workflows/raredisease.nf b/workflows/raredisease.nf index 0704b202..88a657d4 100644 --- a/workflows/raredisease.nf +++ b/workflows/raredisease.nf @@ -209,8 +209,8 @@ workflow RAREDISEASE { ch_cadd_header = Channel.fromPath("$projectDir/assets/cadd_to_vcf_header_-1.0-.txt", checkIfExists: true).collect() ch_cadd_resources = params.cadd_resources ? Channel.fromPath(params.cadd_resources).collect() : Channel.value([]) - ch_call_interval = params.call_interval ? Channel.fromPath(params.call_interval).collect() - : Channel.value([]) + ch_call_interval = params.call_interval ? Channel.fromPath(params.call_interval).map {it -> [[id:it[0].simpleName], it]}.collect() + : Channel.value([[:],[]]) ch_dbsnp_tbi = params.known_dbsnp_tbi ? Channel.fromPath(params.known_dbsnp_tbi).map {it -> [[id:it[0].simpleName], it]}.collect() : ch_references.known_dbsnp_tbi.ifEmpty([[],[]]) ch_gcnvcaller_model = params.gcnvcaller_model ? Channel.fromPath(params.gcnvcaller_model).splitCsv ( header:true ) @@ -234,8 +234,8 @@ workflow RAREDISEASE { : Channel.empty() ch_intervals_y = params.intervals_y ? Channel.fromPath(params.intervals_y).collect() : Channel.empty() - ch_ml_model = params.variant_caller.equals("sentieon") ? Channel.fromPath(params.ml_model).collect() - : Channel.value([]) + ch_ml_model = params.variant_caller.equals("sentieon") ? Channel.fromPath(params.ml_model).map {it -> [[id:it[0].simpleName], it]}.collect() + : Channel.value([[:],[]]) ch_mt_intervals = ch_references.mt_intervals ch_mtshift_backchain = ch_references.mtshift_backchain ch_mtshift_bwaindex = ch_references.mtshift_bwa_index