diff --git a/CHANGELOG.md b/CHANGELOG.md index 4975544f..487d77c1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Installed the nf-core version of the sentieon/bwamem module [#398](https://github.com/nf-core/raredisease/pull/398) - Installed the nf-core version of the sentieon/readwriter module [#399](https://github.com/nf-core/raredisease/pull/399) - Installed the nf-core version of the sentieon/datametrics module [#400](https://github.com/nf-core/raredisease/pull/400) +- Installed the nf-core version of the sentieon/dedup module. The dedup module also contains a call to Sentieon's LocusCollector [#401](https://github.com/nf-core/raredisease/pull/401) ### `Fixed` diff --git a/conf/modules/align_sentieon.config b/conf/modules/align_sentieon.config index 5d89f69c..e3dd1cc2 100644 --- a/conf/modules/align_sentieon.config +++ b/conf/modules/align_sentieon.config @@ -34,10 +34,6 @@ process { ext.prefix = { "${meta.id}_merged" } } - withName: '.*ALIGN:ALIGN_SENTIEON:SENTIEON_LOCUSCOLLECTOR' { - ext.prefix = { "${meta.id}_locuscollector" } - } - withName: '.*ALIGN:ALIGN_SENTIEON:SENTIEON_DEDUP' { ext.args = { $params.rmdup ? "--rmdup" : '' } ext.prefix = { "${meta.id}_dedup" } diff --git a/modules.json b/modules.json index 2974defc..9e427d30 100644 --- a/modules.json +++ b/modules.json @@ -355,6 +355,11 @@ "git_sha": "127edadc279e19da093fdd513926c6cdee82c306", "installed_by": ["modules"] }, + "sentieon/dedup": { + "branch": "master", + "git_sha": "915a0b16ba3e40ef59e7b44843b3118e17a9c906", + "installed_by": ["modules"] + }, "sentieon/readwriter": { "branch": "master", "git_sha": "b28e4dde755117e8dab5d6e85e292f145b8b53c3", diff --git a/modules/local/sentieon/dedup.nf b/modules/local/sentieon/dedup.nf deleted file mode 100644 index bb738985..00000000 --- a/modules/local/sentieon/dedup.nf +++ /dev/null @@ -1,60 +0,0 @@ -process SENTIEON_DEDUP { - tag "$meta.id" - label 'process_high' - label 'sentieon' - - secret 'SENTIEON_LICENSE_BASE64' - - input: - tuple val(meta), path(bam), path(bai), path(score), path(score_idx) - tuple val(meta2), path(fasta) - tuple val(meta3), path(fai) - - output: - tuple val(meta), path('*.bam') , emit: bam - tuple val(meta), path('*.bam.bai') , emit: bai - tuple val(meta), path('*_metrics.txt'), emit: metrics_dedup - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def input = bam.sort().collect{"-i $it"}.join(' ') - """ - if [ \${SENTIEON_LICENSE_BASE64:-"unset"} != "unset" ]; then - echo "Initializing SENTIEON_LICENSE env variable" - source sentieon_init.sh SENTIEON_LICENSE_BASE64 - fi - - sentieon \\ - driver \\ - -t $task.cpus \\ - $input \\ - $args \\ - --algo Dedup \\ - --score_info $score \\ - --metrics ${prefix}_metrics.txt \\ - ${prefix}.bam - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.bam - touch ${prefix}.bam.bai - touch ${prefix}_metrics.txt - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") - END_VERSIONS - """ -} diff --git a/modules/local/sentieon/locuscollector.nf b/modules/local/sentieon/locuscollector.nf deleted file mode 100644 index 9335b0ec..00000000 --- a/modules/local/sentieon/locuscollector.nf +++ /dev/null @@ -1,52 +0,0 @@ -process SENTIEON_LOCUSCOLLECTOR { - tag "$meta.id" - label 'process_high' - label 'sentieon' - - secret 'SENTIEON_LICENSE_BASE64' - - input: - tuple val(meta), path(bam), path(bai) - - output: - tuple val(meta), path('*txt.gz') , emit: score , optional: true - tuple val(meta), path('*txt.gz.tbi'), emit: score_idx, optional: true - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def input = bam.sort().collect{"-i $it"}.join(' ') - def prefix = task.ext.prefix ? "${task.ext.prefix}.txt.gz" : "${meta.id}.txt.gz" - """ - if [ \${SENTIEON_LICENSE_BASE64:-"unset"} != "unset" ]; then - echo "Initializing SENTIEON_LICENSE env variable" - source sentieon_init.sh SENTIEON_LICENSE_BASE64 - fi - - sentieon \\ - driver \\ - -t $task.cpus \\ - $input \\ - --algo LocusCollector \\ - --fun score_info $prefix - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ? "${task.ext.prefix}.txt.gz" : "${meta.id}.txt.gz" - """ - touch ${prefix}.txt.gz - touch ${prefix}.txt.gz.tbi - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") - END_VERSIONS - """ -} diff --git a/modules/nf-core/sentieon/dedup/main.nf b/modules/nf-core/sentieon/dedup/main.nf new file mode 100644 index 00000000..c83d5e55 --- /dev/null +++ b/modules/nf-core/sentieon/dedup/main.nf @@ -0,0 +1,86 @@ +process SENTIEON_DEDUP { + tag "$meta.id" + label 'process_medium' + label 'sentieon' + + secret 'SENTIEON_LICENSE_BASE64' + + container 'nf-core/sentieon:202112.06' + + input: + tuple val(meta), path(bam), path(bai) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fasta_fai) + + output: + tuple val(meta), path("*.cram") , emit: cram, optional: true + tuple val(meta), path("*.crai") , emit: crai, optional: true + tuple val(meta), path("*.bam") , emit: bam , optional: true + tuple val(meta), path("*.bai") , emit: bai + tuple val(meta), path("*.score") , emit: score + tuple val(meta), path("*.metrics") , emit: metrics + tuple val(meta), path("*.metrics.multiqc.tsv"), emit: metrics_multiqc_tsv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "Sentieon modules do not support Conda. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def args3 = task.ext.args3 ?: '' + def args4 = task.ext.args4 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def suffix = task.ext.suffix ?: ".cram" // The suffix should be either ".cram" or ".bam". + def metrics = task.ext.metrics ?: "${prefix}${suffix}.metrics" + def sentieon_auth_mech_base64 = task.ext.sentieon_auth_mech_base64 ?: '' + def sentieon_auth_data_base64 = task.ext.sentieon_auth_data_base64 ?: '' + def input_list = bam.collect{"-i $it"}.join(' ') + + """ + if [ "\${#SENTIEON_LICENSE_BASE64}" -lt "1500" ]; then # If the string SENTIEON_LICENSE_BASE64 is short, then it is an encrypted url. + export SENTIEON_LICENSE=\$(echo -e "\$SENTIEON_LICENSE_BASE64" | base64 -d) + else # Localhost license file + # The license file is stored as a nextflow variable like, for instance, this: + # nextflow secrets set SENTIEON_LICENSE_BASE64 \$(cat | base64 -w 0) + export SENTIEON_LICENSE=\$(mktemp) + echo -e "\$SENTIEON_LICENSE_BASE64" | base64 -d > \$SENTIEON_LICENSE + fi + + if [ ${sentieon_auth_mech_base64} ] && [ ${sentieon_auth_data_base64} ]; then + # If sentieon_auth_mech_base64 and sentieon_auth_data_base64 are non-empty strings, then Sentieon is mostly likely being run with some test-license. + export SENTIEON_AUTH_MECH=\$(echo -n "${sentieon_auth_mech_base64}" | base64 -d) + export SENTIEON_AUTH_DATA=\$(echo -n "${sentieon_auth_data_base64}" | base64 -d) + echo "Decoded and exported Sentieon test-license system environment variables" + fi + + sentieon driver $args $input_list -r ${fasta} --algo LocusCollector $args2 --fun score_info ${prefix}.score + sentieon driver $args3 -t $task.cpus $input_list -r ${fasta} --algo Dedup $args4 --score_info ${prefix}.score --metrics ${metrics} ${prefix}${suffix} + # This following tsv-file is produced in order to get a proper tsv-file with Dedup-metrics for importing in MultiQC as "custom content". + # It should be removed once MultiQC has a module for displaying Dedup-metrics. + head -3 ${metrics} > ${metrics}.multiqc.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.cram + touch ${prefix}.cram.crai + touch ${prefix}.metrics + touch ${prefix}.score + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/sentieon/dedup/meta.yml b/modules/nf-core/sentieon/dedup/meta.yml new file mode 100644 index 00000000..ec0565d9 --- /dev/null +++ b/modules/nf-core/sentieon/dedup/meta.yml @@ -0,0 +1,88 @@ +name: sentieon_dedup +description: Runs the sentieon tool LocusCollector followed by Dedup. LocusCollector collects read information that is used by Dedup which in turn marks or removes duplicate reads. +keywords: + - mem + - dedup + - map + - bam + - cram + - sentieon +tools: + - sentieon: + description: | + Sentieon® provides complete solutions for secondary DNA/RNA analysis for a variety of sequencing platforms, including short and long reads. + Our software improves upon BWA, STAR, Minimap2, GATK, HaplotypeCaller, Mutect, and Mutect2 based pipelines and is deployable on any generic-CPU-based computing system. + homepage: https://www.sentieon.com/ + documentation: https://www.sentieon.com/ +input: + - meta: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM file. + pattern: "*.bam" + - bai: + type: file + description: BAI file + pattern: "*.bai" + - meta2: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Genome fasta file + pattern: "*.{fa,fasta}" + - meta3: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - fasta_fai: + type: file + description: The index of the FASTA reference. + pattern: "*.fai" +output: + - meta: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - cram: + type: file + description: CRAM file + pattern: "*.cram" + - crai: + type: file + description: CRAM index file + pattern: "*.crai" + - bam: + type: file + description: BAM file. + pattern: "*.bam" + - bai: + type: file + description: BAI file + pattern: "*.bai" + - score: + type: file + description: The score file indicates which reads LocusCollector finds are likely duplicates. + pattern: "*.score" + - metrics: + type: file + description: Output file containing Dedup metrics incl. histogram data. + pattern: "*.metrics" + - metrics_multiqc_tsv: + type: file + description: Output tsv-file containing Dedup metrics excl. histogram data. + pattern: "*.metrics.multiqc.tsv" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@asp8200" diff --git a/subworkflows/local/alignment/align_sentieon.nf b/subworkflows/local/alignment/align_sentieon.nf index 8cbc122b..b7387a44 100644 --- a/subworkflows/local/alignment/align_sentieon.nf +++ b/subworkflows/local/alignment/align_sentieon.nf @@ -4,8 +4,7 @@ include { SENTIEON_BWAMEM } from '../../../modules/nf-core/sentieon/bwamem/main' include { SENTIEON_DATAMETRICS } from '../../../modules/nf-core/sentieon/datametrics/main' -include { SENTIEON_LOCUSCOLLECTOR } from '../../../modules/local/sentieon/locuscollector' -include { SENTIEON_DEDUP } from '../../../modules/local/sentieon/dedup' +include { SENTIEON_DEDUP } from '../../../modules/nf-core/sentieon/dedup/main' include { SENTIEON_BQSR } from '../../../modules/local/sentieon/bqsr' include { SENTIEON_READWRITER } from '../../../modules/nf-core/sentieon/readwriter/main' workflow ALIGN_SENTIEON { @@ -45,14 +44,7 @@ workflow ALIGN_SENTIEON { SENTIEON_DATAMETRICS (ch_bam_bai, ch_genome_fasta, ch_genome_fai ) - SENTIEON_LOCUSCOLLECTOR ( ch_bam_bai ) - - ch_bam_bai - .join(SENTIEON_LOCUSCOLLECTOR.out.score, failOnMismatch:true, failOnDuplicate:true) - .join(SENTIEON_LOCUSCOLLECTOR.out.score_idx, failOnMismatch:true, failOnDuplicate:true) - .set { ch_bam_bai_score } - - SENTIEON_DEDUP ( ch_bam_bai_score, ch_genome_fasta, ch_genome_fai ) + SENTIEON_DEDUP ( ch_bam_bai, ch_genome_fasta, ch_genome_fai ) if (params.variant_caller == "sentieon") { SENTIEON_DEDUP.out.bam @@ -67,7 +59,6 @@ workflow ALIGN_SENTIEON { ch_versions = ch_versions.mix(SENTIEON_BWAMEM.out.versions.first()) ch_versions = ch_versions.mix(SENTIEON_DATAMETRICS.out.versions.first()) - ch_versions = ch_versions.mix(SENTIEON_LOCUSCOLLECTOR.out.versions.first()) ch_versions = ch_versions.mix(SENTIEON_DEDUP.out.versions.first()) emit: