diff --git a/README.md b/README.md index 4f4e6e0..5577161 100644 --- a/README.md +++ b/README.md @@ -55,6 +55,8 @@ See the website for detailed information, documentation, and examples: - dependencies - project URLs 1. Write your nextflow workflow. + - Where possible, reuse existing modules and subworklows from [CCBR/nf-modules](https://github.com/CCBR/nf-modules)[^3]. + Also consider contributing new modules & subworkflows to that repository! 1. Write your documentation in `docs/` and enable GitHub Pages. - In settings, go to General > Pages and select the `gh-pages` branch. mkdocs will build your site under the `gh-pages` branch, and GitHub Pages will make it available at `https://OWNER.github.io/TOOL_NAME`. @@ -119,3 +121,4 @@ If you plan to contribute your pipeline to nf-core, don't use this template -- i [^1]: nektool https://github.com/beardymcjohnface/nektool [^2]: instructions for nf-core pipelines https://nf-co.re/docs/contributing/tutorials/creating_with_nf_core +[^3]: See also our reusable modules and subworkflows for CCBR nextflow pipelines: diff --git a/main.nf b/main.nf index 97a9fa8..3c00970 100644 --- a/main.nf +++ b/main.nf @@ -17,6 +17,7 @@ input : ${params.input} .stripIndent() include { FASTQC } from "./modules/local/qc.nf" +include { BWA_MEM } from './modules/CCBR/bwa/mem' workflow.onComplete { if (!workflow.stubRun && !workflow.commandLine.contains('-preview')) { diff --git a/modules.json b/modules.json new file mode 100644 index 0000000..b7f8bb6 --- /dev/null +++ b/modules.json @@ -0,0 +1,17 @@ +{ + "name": "CCBR/TOOL_NAME", + "homePage": "https://github.com/CCBR/TOOL_NAME", + "repos": { + "https://github.com/CCBR/nf-modules": { + "modules": { + "CCBR": { + "bwa/mem": { + "branch": "main", + "git_sha": "490e4454c81e3fb852f7f964356f9981b6f1c439", + "installed_by": ["modules"] + } + } + } + } + } +} diff --git a/modules/CCBR/bwa/mem/main.nf b/modules/CCBR/bwa/mem/main.nf new file mode 100644 index 0000000..e9fa59f --- /dev/null +++ b/modules/CCBR/bwa/mem/main.nf @@ -0,0 +1,53 @@ +process BWA_MEM { + tag { meta.id } + label 'process_high' + + container 'nciccbr/ccbr_ubuntu_base_20.04:v5' + + input: + tuple val(meta), path(fastq) + tuple val(meta_idx), path(index_files) + + output: + tuple val(meta), path("*.bam"), path("*.bai"), emit: bam + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + # current working directory is a tmpdir when 'scratch' is set + TMP=tmp/ + mkdir \$TMP + trap 'rm -rf "\$TMP"' EXIT + + INDEX=`find -L ./ -name "*.amb" | sed 's/\\.amb\$//'` + + bwa mem \\ + -t ${task.cpus} \\ + -o \$TMP/align.bam \\ + \$INDEX \\ + ${fastq} + + samtools sort \\ + -@ ${task.cpus} \\ + -m 2G \\ + -T \$TMP \\ + --write-index \\ + -o ${prefix}.bam##idx##${prefix}.bam.bai \\ + \$TMP/align.bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + """ + touch ${meta.id}.bam ${meta.id}.bam.bai versions.yml + """ +} diff --git a/modules/CCBR/bwa/mem/meta.yml b/modules/CCBR/bwa/mem/meta.yml new file mode 100644 index 0000000..9d5264d --- /dev/null +++ b/modules/CCBR/bwa/mem/meta.yml @@ -0,0 +1,51 @@ +name: bwa_mem +description: Performs fastq alignment to a fasta reference using BWA. Adapted from the nf-core bwa-mem module. +keywords: + - mem + - bwa + - alignment + - map + - fastq + - bam + - sam +tools: + - bwa: + description: | + BWA is a software package for mapping DNA sequences against + a large reference genome, such as the human genome. + homepage: http://bio-bwa.sourceforge.net/ + documentation: http://www.htslib.org/doc/samtools.html + arxiv: arXiv:1303.3997 + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - index: + type: file + description: BWA genome index files + pattern: "Directory containing BWA index *.{amb,ann,bwt,pac,sa}" +output: + - bam: + type: file + description: Output BAM file containing read alignments + pattern: "*.{bam}" + - bai: + type: file + description: Output BAI index file + pattern: "*.{bai}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@kelly-sovacool" +maintainers: + - "@kelly-sovacool"