From 2640c83167e15a712e497b81fb835b8ccc4c63b8 Mon Sep 17 00:00:00 2001 From: asp8200 Date: Tue, 15 Aug 2023 12:02:54 +0000 Subject: [PATCH 1/7] Replace local version of SENTIEON_WGSMETRICS with nf-core version --- modules.json | 5 ++ modules/local/sentieon/wgsmetricsalgo.nf | 57 -------------- modules/nf-core/sentieon/wgsmetrics/main.nf | 81 ++++++++++++++++++++ modules/nf-core/sentieon/wgsmetrics/meta.yml | 73 ++++++++++++++++++ subworkflows/local/qc_bam.nf | 8 +- 5 files changed, 163 insertions(+), 61 deletions(-) delete mode 100644 modules/local/sentieon/wgsmetricsalgo.nf create mode 100644 modules/nf-core/sentieon/wgsmetrics/main.nf create mode 100644 modules/nf-core/sentieon/wgsmetrics/meta.yml diff --git a/modules.json b/modules.json index b4e2c848..5e1764b9 100644 --- a/modules.json +++ b/modules.json @@ -370,6 +370,11 @@ "git_sha": "b28e4dde755117e8dab5d6e85e292f145b8b53c3", "installed_by": ["modules"] }, + "sentieon/wgsmetrics": { + "branch": "master", + "git_sha": "b1e9314b919f498e116bfc1417b7185185e99261", + "installed_by": ["modules"] + }, "smncopynumbercaller": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", diff --git a/modules/local/sentieon/wgsmetricsalgo.nf b/modules/local/sentieon/wgsmetricsalgo.nf deleted file mode 100644 index 3663947d..00000000 --- a/modules/local/sentieon/wgsmetricsalgo.nf +++ /dev/null @@ -1,57 +0,0 @@ -process SENTIEON_WGSMETRICSALGO { - tag "$meta.id" - label 'process_medium' - label 'sentieon' - - secret 'SENTIEON_LICENSE_BASE64' - - input: - tuple val(meta), path(bam), path(bai) - tuple val(meta2), path(fasta) - tuple val(meta3), path(fai) - path intervals_list - - output: - tuple val(meta), path('*.txt'), emit: wgs_metrics - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def input = bam.sort().collect{"-i $it"}.join(' ') - def prefix = task.ext.prefix ?: "${meta.id}" - def interval = intervals_list ? "--interval ${intervals_list}" : "" - """ - if [ \${SENTIEON_LICENSE_BASE64:-"unset"} != "unset" ]; then - echo "Initializing SENTIEON_LICENSE env variable" - source sentieon_init.sh SENTIEON_LICENSE_BASE64 - fi - - sentieon \\ - driver \\ - -t $task.cpus \\ - -r $fasta \\ - $input \\ - $interval \\ - $args \\ - --algo WgsMetricsAlgo ${prefix}.txt - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}_wgs_metrics.txt - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") - END_VERSIONS - """ -} diff --git a/modules/nf-core/sentieon/wgsmetrics/main.nf b/modules/nf-core/sentieon/wgsmetrics/main.nf new file mode 100644 index 00000000..fd7fdae5 --- /dev/null +++ b/modules/nf-core/sentieon/wgsmetrics/main.nf @@ -0,0 +1,81 @@ +process SENTIEON_WGSMETRICS { + tag "$meta.id" + label 'process_medium' + label 'sentieon' + + secret 'SENTIEON_LICENSE_BASE64' + + container 'nf-core/sentieon:202112.06' + + input: + tuple val(meta), path(bam), path(bai) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(intervals_list) + + + output: + tuple val(meta), path('*.txt'), emit: wgs_metrics + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "Sentieon modules do not support Conda. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' + def input = bam.sort().collect{"-i $it"}.join(' ') + def prefix = task.ext.prefix ?: "${meta.id}" + def interval = intervals_list ? "--interval ${intervals_list}" : "" + def sentieon_auth_mech_base64 = task.ext.sentieon_auth_mech_base64 ?: '' + def sentieon_auth_data_base64 = task.ext.sentieon_auth_data_base64 ?: '' + """ + if [ "\${#SENTIEON_LICENSE_BASE64}" -lt "1500" ]; then # If the string SENTIEON_LICENSE_BASE64 is short, then it is an encrypted url. + export SENTIEON_LICENSE=\$(echo -e "\$SENTIEON_LICENSE_BASE64" | base64 -d) + else # Localhost license file + # The license file is stored as a nextflow variable like, for instance, this: + # nextflow secrets set SENTIEON_LICENSE_BASE64 \$(cat | base64 -w 0) + export SENTIEON_LICENSE=\$(mktemp) + echo -e "\$SENTIEON_LICENSE_BASE64" | base64 -d > \$SENTIEON_LICENSE + fi + + if [ ${sentieon_auth_mech_base64} ] && [ ${sentieon_auth_data_base64} ]; then + # If sentieon_auth_mech_base64 and sentieon_auth_data_base64 are non-empty strings, then Sentieon is mostly likely being run with some test-license. + export SENTIEON_AUTH_MECH=\$(echo -n "${sentieon_auth_mech_base64}" | base64 -d) + export SENTIEON_AUTH_DATA=\$(echo -n "${sentieon_auth_data_base64}" | base64 -d) + echo "Decoded and exported Sentieon test-license system environment variables" + fi + + sentieon \\ + driver \\ + -t $task.cpus \\ + -r $fasta \\ + $input \\ + $interval \\ + $args \\ + --algo WgsMetricsAlgo ${prefix}.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") + END_VERSIONS + """ + + stub: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "Sentieon modules do not support Conda. Please use Docker / Singularity / Podman instead." + } + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/sentieon/wgsmetrics/meta.yml b/modules/nf-core/sentieon/wgsmetrics/meta.yml new file mode 100644 index 00000000..09fb9eca --- /dev/null +++ b/modules/nf-core/sentieon/wgsmetrics/meta.yml @@ -0,0 +1,73 @@ +name: "sentieon_wgsmetrics" +description: Collects whole genome quality metrics from a bam file +keywords: + - metrics + - bam + - sentieon +tools: + - sentieon: + description: | + Sentieon® provides complete solutions for secondary DNA/RNA analysis for a variety of sequencing platforms, including short and long reads. + Our software improves upon BWA, STAR, Minimap2, GATK, HaplotypeCaller, Mutect, and Mutect2 based pipelines and is deployable on any generic-CPU-based computing system. + homepage: https://www.sentieon.com/ + documentation: https://www.sentieon.com/ + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. `[ id:'test' ]` + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. `[ id:'test' ]` + - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. `[ id:'test' ]` + - bam: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - bai: + type: file + description: Index of th sorted BAM/CRAM/SAM file + pattern: "*.{bai,crai,sai}" + - fasta: + type: file + description: Genome fasta file + pattern: "*.{fa,fasta}" + - fai: + type: file + description: Index of the genome fasta file + pattern: "*.fai" + - interval_list: + type: file + description: bed or interval_list file containing interval in the reference that will be used in the analysis + pattern: "*.{bed,interval_list}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - wgs_metrics: + type: file + description: File containing the information about mean base quality score for each sequencing cycle + pattern: "*.txt" + +authors: + - "@ramprasadn" diff --git a/subworkflows/local/qc_bam.nf b/subworkflows/local/qc_bam.nf index 06b55422..d593bac9 100644 --- a/subworkflows/local/qc_bam.nf +++ b/subworkflows/local/qc_bam.nf @@ -10,8 +10,8 @@ include { MOSDEPTH } from '../../m include { UCSC_WIGTOBIGWIG } from '../../modules/nf-core/ucsc/wigtobigwig/main' include { PICARD_COLLECTWGSMETRICS as PICARD_COLLECTWGSMETRICS } from '../../modules/nf-core/picard/collectwgsmetrics/main' include { PICARD_COLLECTWGSMETRICS as PICARD_COLLECTWGSMETRICS_Y } from '../../modules/nf-core/picard/collectwgsmetrics/main' -include { SENTIEON_WGSMETRICSALGO as SENTIEON_WGSMETRICS } from '../../modules/local/sentieon/wgsmetricsalgo' -include { SENTIEON_WGSMETRICSALGO as SENTIEON_WGSMETRICS_Y } from '../../modules/local/sentieon/wgsmetricsalgo' +include { SENTIEON_WGSMETRICSALGO as SENTIEON_WGSMETRICS } from '../../modules/nf-core/sentieon/wgsmetrics/main' +include { SENTIEON_WGSMETRICSALGO as SENTIEON_WGSMETRICS_Y } from '../../modules/nf-core/sentieon/wgsmetrics/main' workflow QC_BAM { @@ -52,8 +52,8 @@ workflow QC_BAM { PICARD_COLLECTWGSMETRICS ( ch_bam_bai, ch_genome_fasta, ch_genome_fai, ch_intervals_wgs ) PICARD_COLLECTWGSMETRICS_Y ( ch_bam_bai, ch_genome_fasta, ch_genome_fai, ch_intervals_y ) - SENTIEON_WGSMETRICS ( ch_bam_bai, ch_genome_fasta, ch_genome_fai, ch_intervals_wgs ) - SENTIEON_WGSMETRICS_Y ( ch_bam_bai, ch_genome_fasta, ch_genome_fai, ch_intervals_y ) + SENTIEON_WGSMETRICS ( ch_bam_bai, ch_genome_fasta, ch_genome_fai, [[:], ch_intervals_wgs] ) + SENTIEON_WGSMETRICS_Y ( ch_bam_bai, ch_genome_fasta, ch_genome_fai, [[:], ch_intervals_y] ) ch_cov = Channel.empty().mix(PICARD_COLLECTWGSMETRICS.out.metrics, SENTIEON_WGSMETRICS.out.wgs_metrics) ch_cov_y = Channel.empty().mix(PICARD_COLLECTWGSMETRICS_Y.out.metrics, SENTIEON_WGSMETRICS_Y.out.wgs_metrics) From 50cd408ce015a7ad2088dd655c8f87382dbec154 Mon Sep 17 00:00:00 2001 From: asp8200 Date: Tue, 15 Aug 2023 12:07:22 +0000 Subject: [PATCH 2/7] SENTIEON_WGSMETRICSALGO replaced by SENTIEON_WGSMETRICS --- subworkflows/local/qc_bam.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/qc_bam.nf b/subworkflows/local/qc_bam.nf index d593bac9..9519c2a1 100644 --- a/subworkflows/local/qc_bam.nf +++ b/subworkflows/local/qc_bam.nf @@ -10,8 +10,8 @@ include { MOSDEPTH } from '../../m include { UCSC_WIGTOBIGWIG } from '../../modules/nf-core/ucsc/wigtobigwig/main' include { PICARD_COLLECTWGSMETRICS as PICARD_COLLECTWGSMETRICS } from '../../modules/nf-core/picard/collectwgsmetrics/main' include { PICARD_COLLECTWGSMETRICS as PICARD_COLLECTWGSMETRICS_Y } from '../../modules/nf-core/picard/collectwgsmetrics/main' -include { SENTIEON_WGSMETRICSALGO as SENTIEON_WGSMETRICS } from '../../modules/nf-core/sentieon/wgsmetrics/main' -include { SENTIEON_WGSMETRICSALGO as SENTIEON_WGSMETRICS_Y } from '../../modules/nf-core/sentieon/wgsmetrics/main' +include { SENTIEON_WGSMETRICS as SENTIEON_WGSMETRICS } from '../../modules/nf-core/sentieon/wgsmetrics/main' +include { SENTIEON_WGSMETRICS as SENTIEON_WGSMETRICS_Y } from '../../modules/nf-core/sentieon/wgsmetrics/main' workflow QC_BAM { From d2e74922522b7790c36f4fdabb0261c9065dbed4 Mon Sep 17 00:00:00 2001 From: asp8200 Date: Tue, 15 Aug 2023 12:19:00 +0000 Subject: [PATCH 3/7] Trying to fix input-channel with intervals for sentieon-wgsmetrics module --- subworkflows/local/qc_bam.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/qc_bam.nf b/subworkflows/local/qc_bam.nf index 9519c2a1..8308c221 100644 --- a/subworkflows/local/qc_bam.nf +++ b/subworkflows/local/qc_bam.nf @@ -52,8 +52,8 @@ workflow QC_BAM { PICARD_COLLECTWGSMETRICS ( ch_bam_bai, ch_genome_fasta, ch_genome_fai, ch_intervals_wgs ) PICARD_COLLECTWGSMETRICS_Y ( ch_bam_bai, ch_genome_fasta, ch_genome_fai, ch_intervals_y ) - SENTIEON_WGSMETRICS ( ch_bam_bai, ch_genome_fasta, ch_genome_fai, [[:], ch_intervals_wgs] ) - SENTIEON_WGSMETRICS_Y ( ch_bam_bai, ch_genome_fasta, ch_genome_fai, [[:], ch_intervals_y] ) + SENTIEON_WGSMETRICS ( ch_bam_bai, ch_genome_fasta, ch_genome_fai, ch_intervals_wgs.map{ interval -> [[:] interval]} ) + SENTIEON_WGSMETRICS_Y ( ch_bam_bai, ch_genome_fasta, ch_genome_fai, ch_intervals_y.map{ interval -> [[:] interval]} ) ch_cov = Channel.empty().mix(PICARD_COLLECTWGSMETRICS.out.metrics, SENTIEON_WGSMETRICS.out.wgs_metrics) ch_cov_y = Channel.empty().mix(PICARD_COLLECTWGSMETRICS_Y.out.metrics, SENTIEON_WGSMETRICS_Y.out.wgs_metrics) From d8094c27bbec07c576946ef8606f9c32dac145c4 Mon Sep 17 00:00:00 2001 From: asp8200 Date: Tue, 15 Aug 2023 12:21:36 +0000 Subject: [PATCH 4/7] Fixing typo --- subworkflows/local/qc_bam.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/qc_bam.nf b/subworkflows/local/qc_bam.nf index 8308c221..a86cd7ee 100644 --- a/subworkflows/local/qc_bam.nf +++ b/subworkflows/local/qc_bam.nf @@ -52,8 +52,8 @@ workflow QC_BAM { PICARD_COLLECTWGSMETRICS ( ch_bam_bai, ch_genome_fasta, ch_genome_fai, ch_intervals_wgs ) PICARD_COLLECTWGSMETRICS_Y ( ch_bam_bai, ch_genome_fasta, ch_genome_fai, ch_intervals_y ) - SENTIEON_WGSMETRICS ( ch_bam_bai, ch_genome_fasta, ch_genome_fai, ch_intervals_wgs.map{ interval -> [[:] interval]} ) - SENTIEON_WGSMETRICS_Y ( ch_bam_bai, ch_genome_fasta, ch_genome_fai, ch_intervals_y.map{ interval -> [[:] interval]} ) + SENTIEON_WGSMETRICS ( ch_bam_bai, ch_genome_fasta, ch_genome_fai, ch_intervals_wgs.map{ interval -> [[:], interval]} ) + SENTIEON_WGSMETRICS_Y ( ch_bam_bai, ch_genome_fasta, ch_genome_fai, ch_intervals_y.map{ interval -> [[:], interval]} ) ch_cov = Channel.empty().mix(PICARD_COLLECTWGSMETRICS.out.metrics, SENTIEON_WGSMETRICS.out.wgs_metrics) ch_cov_y = Channel.empty().mix(PICARD_COLLECTWGSMETRICS_Y.out.metrics, SENTIEON_WGSMETRICS_Y.out.wgs_metrics) From 3c1291a606834627e8c3845c64043fd108db77a4 Mon Sep 17 00:00:00 2001 From: asp8200 Date: Tue, 15 Aug 2023 12:59:58 +0000 Subject: [PATCH 5/7] Update changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 31781c81..2b548c69 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Installed the nf-core version of the sentieon/dedup module. The dedup module also contains a call to Sentieon's LocusCollector [#401](https://github.com/nf-core/raredisease/pull/401) - Removing Sentieon-based BQSR. Recent Illumina sequencers tend to provide well-calibrated BQs, so BQSR may not provide much benefit [#402](https://github.com/nf-core/raredisease/pull/402) - Installed the nf-core version of the sentieon/dnamodelapply module [#403](https://github.com/nf-core/raredisease/pull/403) +- Installed the nf-core version of the sentieon/wgsmetricsalgo module [#404](https://github.com/nf-core/raredisease/pull/404) + ### `Fixed` From a9993518db2f98ebe3bccacc23c4b26731303202 Mon Sep 17 00:00:00 2001 From: asp8200 Date: Tue, 15 Aug 2023 13:07:21 +0000 Subject: [PATCH 6/7] prettier --- CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2b548c69..244eda8d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,7 +21,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Installed the nf-core version of the sentieon/dnamodelapply module [#403](https://github.com/nf-core/raredisease/pull/403) - Installed the nf-core version of the sentieon/wgsmetricsalgo module [#404](https://github.com/nf-core/raredisease/pull/404) - ### `Fixed` - Invalid GATK4 container which caused incorrect singularity downloads with nf-core download [nf-core/modules #3668](https://github.com/nf-core/modules/issues/3668) From cc653d32fab318b59499c0c8750ada8f509f2730 Mon Sep 17 00:00:00 2001 From: Anders Sune Pedersen <37172585+asp8200@users.noreply.github.com> Date: Tue, 15 Aug 2023 15:11:44 +0200 Subject: [PATCH 7/7] Update subworkflows/local/qc_bam.nf Co-authored-by: Ramprasad Neethiraj <20065894+ramprasadn@users.noreply.github.com> --- subworkflows/local/qc_bam.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/qc_bam.nf b/subworkflows/local/qc_bam.nf index a86cd7ee..9331523e 100644 --- a/subworkflows/local/qc_bam.nf +++ b/subworkflows/local/qc_bam.nf @@ -10,7 +10,7 @@ include { MOSDEPTH } from '../../m include { UCSC_WIGTOBIGWIG } from '../../modules/nf-core/ucsc/wigtobigwig/main' include { PICARD_COLLECTWGSMETRICS as PICARD_COLLECTWGSMETRICS } from '../../modules/nf-core/picard/collectwgsmetrics/main' include { PICARD_COLLECTWGSMETRICS as PICARD_COLLECTWGSMETRICS_Y } from '../../modules/nf-core/picard/collectwgsmetrics/main' -include { SENTIEON_WGSMETRICS as SENTIEON_WGSMETRICS } from '../../modules/nf-core/sentieon/wgsmetrics/main' +include { SENTIEON_WGSMETRICS } from '../../modules/nf-core/sentieon/wgsmetrics/main' include { SENTIEON_WGSMETRICS as SENTIEON_WGSMETRICS_Y } from '../../modules/nf-core/sentieon/wgsmetrics/main' workflow QC_BAM {