diff --git a/conf/modules/align_sentieon.config b/conf/modules/align_sentieon.config index a882a017..5d89f69c 100644 --- a/conf/modules/align_sentieon.config +++ b/conf/modules/align_sentieon.config @@ -16,6 +16,7 @@ // process { + withName: '.*ALIGN_SENTIEON:.*' { ext.when = params.aligner.equals("sentieon") } diff --git a/conf/test.config b/conf/test.config index 8065aa8a..9093bfb6 100644 --- a/conf/test.config +++ b/conf/test.config @@ -50,3 +50,10 @@ params { vep_filters = "https://raw.githubusercontent.com/nf-core/test-datasets/raredisease/reference/hgnc.txt" vep_cache_version = 107 } + +process { + withLabel: 'sentieon' { + ext.sentieon_auth_mech_base64 = secrets.SENTIEON_AUTH_MECH_BASE64 + ext.sentieon_auth_data_base64 = secrets.SENTIEON_AUTH_DATA_BASE64 + } +} diff --git a/modules.json b/modules.json index 0b3f5084..106948ad 100644 --- a/modules.json +++ b/modules.json @@ -345,6 +345,11 @@ "git_sha": "49af8ed458e28729e483bc96e5a57c28163b9ea0", "installed_by": ["modules"] }, + "sentieon/bwamem": { + "branch": "master", + "git_sha": "b9172e8c26a3db5009f7872654c44587e254f094", + "installed_by": ["modules"] + }, "smncopynumbercaller": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", diff --git a/modules/local/sentieon/bwamem.nf b/modules/local/sentieon/bwamem.nf deleted file mode 100644 index 60ca36d6..00000000 --- a/modules/local/sentieon/bwamem.nf +++ /dev/null @@ -1,68 +0,0 @@ -process SENTIEON_BWAMEM { - tag "$meta.id" - label 'process_high' - label 'sentieon' - - secret 'SENTIEON_LICENSE_BASE64' - - input: - tuple val(meta), path(reads) - tuple val(meta2), path(fasta) - tuple val(meta3), path(fai) - tuple val(meta4), path(index) - - output: - tuple val(meta), path('*.bam'), emit: bam - tuple val(meta), path('*.bai'), emit: bai - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def args2 = task.ext.args2 ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - """ - INDEX=`find -L ./ -name "*.amb" | sed 's/.amb//'` - - if [ \${SENTIEON_LICENSE_BASE64:-"unset"} != "unset" ]; then - echo "Initializing SENTIEON_LICENSE env variable" - source sentieon_init.sh SENTIEON_LICENSE_BASE64 - fi - - sentieon bwa mem \\ - -t $task.cpus \\ - \$INDEX \\ - $reads \\ - $args \\ - | sentieon \\ - util \\ - sort \\ - -r $fasta \\ - -o ${prefix}.bam \\ - -t $task.cpus \\ - $args2 \\ - --sam2bam \\ - -i - - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") - bwa: \$(echo \$(sentieon bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') - END_VERSIONS - """ - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.bam - touch ${prefix}.bai - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") - bwa: \$(echo \$(sentieon bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/sentieon/bwamem/main.nf b/modules/nf-core/sentieon/bwamem/main.nf new file mode 100644 index 00000000..aeca2825 --- /dev/null +++ b/modules/nf-core/sentieon/bwamem/main.nf @@ -0,0 +1,78 @@ +process SENTIEON_BWAMEM { + tag "$meta.id" + label 'process_high' + label 'sentieon' + + secret 'SENTIEON_LICENSE_BASE64' + + container 'nf-core/sentieon:202112.06' + + input: + tuple val(meta), path(reads) + tuple val(meta2), path(index) + path(fasta) + path(fasta_fai) + + output: + tuple val(meta), path("*.bam"), path("*.bai"), emit: bam_and_bai + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "Sentieon modules do not support Conda. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def sentieon_auth_mech_base64 = task.ext.sentieon_auth_mech_base64 ?: '' + def sentieon_auth_data_base64 = task.ext.sentieon_auth_data_base64 ?: '' + + """ + if [ "\${#SENTIEON_LICENSE_BASE64}" -lt "1500" ]; then # If the string SENTIEON_LICENSE_BASE64 is short, then it is an encrypted url. + export SENTIEON_LICENSE=\$(echo -e "\$SENTIEON_LICENSE_BASE64" | base64 -d) + else # Localhost license file + # The license file is stored as a nextflow variable like, for instance, this: + # nextflow secrets set SENTIEON_LICENSE_BASE64 \$(cat | base64 -w 0) + export SENTIEON_LICENSE=\$(mktemp) + echo -e "\$SENTIEON_LICENSE_BASE64" | base64 -d > \$SENTIEON_LICENSE + fi + + if [ ${sentieon_auth_mech_base64} ] && [ ${sentieon_auth_data_base64} ]; then + # If sentieon_auth_mech_base64 and sentieon_auth_data_base64 are non-empty strings, then Sentieon is mostly likely being run with some test-license. + export SENTIEON_AUTH_MECH=\$(echo -n "${sentieon_auth_mech_base64}" | base64 -d) + export SENTIEON_AUTH_DATA=\$(echo -n "${sentieon_auth_data_base64}" | base64 -d) + echo "Decoded and exported Sentieon test-license system environment variables" + fi + + INDEX=`find -L ./ -name "*.amb" | sed 's/.amb//'` + + sentieon bwa mem \\ + $args \\ + -t $task.cpus \\ + \$INDEX \\ + $reads \\ + | sentieon util sort -r $fasta -t $task.cpus -o ${prefix}.bam --sam2bam - + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") + bwa: \$(echo \$(sentieon bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.bam + touch ${prefix}.bam.bai + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sentieon: \$(echo \$(sentieon driver --version 2>&1) | sed -e "s/sentieon-genomics-//g") + bwa: \$(echo \$(sentieon bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/sentieon/bwamem/meta.yml b/modules/nf-core/sentieon/bwamem/meta.yml new file mode 100644 index 00000000..3d1546c5 --- /dev/null +++ b/modules/nf-core/sentieon/bwamem/meta.yml @@ -0,0 +1,63 @@ +name: sentieon_bwamem +description: Performs fastq alignment to a fasta reference using Sentieon's BWA MEM +keywords: + - mem + - bwa + - alignment + - map + - fastq + - bam + - sentieon +tools: + - sentieon: + description: | + Sentieon® provides complete solutions for secondary DNA/RNA analysis for a variety of sequencing platforms, including short and long reads. + Our software improves upon BWA, STAR, Minimap2, GATK, HaplotypeCaller, Mutect, and Mutect2 based pipelines and is deployable on any generic-CPU-based computing system. + homepage: https://www.sentieon.com/ + documentation: https://www.sentieon.com/ +input: + - meta: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: Genome fastq files (single-end or paired-end) + - meta2: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - index: + type: file + description: BWA genome index files + pattern: "*.{amb,ann,bwt,pac,sa}" + - fasta: + type: file + description: Genome fasta file + pattern: "*.{fa,fasta}" + - fasta_fai: + type: file + description: The index of the FASTA reference. + pattern: "*.fai" +output: + - meta: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM file. + pattern: "*.bam" + - bai: + type: file + description: BAI file + pattern: "*.bai" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@asp8200" diff --git a/subworkflows/local/alignment/align_sentieon.nf b/subworkflows/local/alignment/align_sentieon.nf index e4094edf..796aeaa4 100644 --- a/subworkflows/local/alignment/align_sentieon.nf +++ b/subworkflows/local/alignment/align_sentieon.nf @@ -2,7 +2,7 @@ // A subworkflow to annotate structural variants. // -include { SENTIEON_BWAMEM } from '../../../modules/local/sentieon/bwamem' +include { SENTIEON_BWAMEM } from '../../../modules/nf-core/sentieon/bwamem/main' include { SENTIEON_DATAMETRICS } from '../../../modules/local/sentieon/datametrics' include { SENTIEON_LOCUSCOLLECTOR } from '../../../modules/local/sentieon/locuscollector' include { SENTIEON_DEDUP } from '../../../modules/local/sentieon/dedup' @@ -25,11 +25,10 @@ workflow ALIGN_SENTIEON { ch_bqsr_bai = Channel.empty() ch_bqsr_csv = Channel.empty() - SENTIEON_BWAMEM ( ch_reads_input, ch_genome_fasta, ch_genome_fai, ch_bwa_index ) + SENTIEON_BWAMEM ( ch_reads_input, ch_bwa_index, ch_genome_fasta.map{ meta, fasta -> fasta }, ch_genome_fai.map{ meta, fai -> fai }) SENTIEON_BWAMEM.out - .bam - .join(SENTIEON_BWAMEM.out.bai, failOnMismatch:true, failOnDuplicate:true) + .bam_and_bai .map{ meta, bam, bai -> new_id = meta.id.split('_')[0] new_meta = meta + [id:new_id, read_group:"\'@RG\\tID:" + new_id + "\\tPL:" + val_platform + "\\tSM:" + new_id + "\'"] diff --git a/subworkflows/local/mitochondria/align_and_call_MT.nf b/subworkflows/local/mitochondria/align_and_call_MT.nf index be060a7d..fb00739a 100644 --- a/subworkflows/local/mitochondria/align_and_call_MT.nf +++ b/subworkflows/local/mitochondria/align_and_call_MT.nf @@ -2,7 +2,7 @@ // Align and call MT // -include { SENTIEON_BWAMEM as SENTIEON_BWAMEM_MT } from '../../../modules/local/sentieon/bwamem' +include { SENTIEON_BWAMEM as SENTIEON_BWAMEM_MT } from '../../../modules/nf-core/sentieon/bwamem/main' include { BWAMEM2_MEM as BWAMEM2_MEM_MT } from '../../../modules/nf-core/bwamem2/mem/main' include { GATK4_MERGEBAMALIGNMENT as GATK4_MERGEBAMALIGNMENT_MT } from '../../../modules/nf-core/gatk4/mergebamalignment/main' include { PICARD_ADDORREPLACEREADGROUPS as PICARD_ADDORREPLACEREADGROUPS_MT } from '../../../modules/nf-core/picard/addorreplacereadgroups/main' @@ -32,10 +32,10 @@ workflow ALIGN_AND_CALL_MT { BWAMEM2_MEM_MT (ch_fastq, ch_bwamem2index, true) - SENTIEON_BWAMEM_MT ( ch_fastq, ch_fasta, ch_fai, ch_bwaindex ) + SENTIEON_BWAMEM_MT ( ch_fastq, ch_bwaindex, ch_fasta.map{ meta, fasta -> fasta }, ch_fai.map{ meta, fai -> fai } ) Channel.empty() - .mix(BWAMEM2_MEM_MT.out.bam, SENTIEON_BWAMEM_MT.out.bam) + .mix(BWAMEM2_MEM_MT.out.bam, SENTIEON_BWAMEM_MT.out.bam_and_bai.map{ meta, bam, bai -> [meta, bam] }) .join(ch_ubam, failOnMismatch:true, failOnDuplicate:true) .set {ch_bam_ubam}