Skip to content

Commit

Permalink
added nf-plugin stuff and a test profile
Browse files Browse the repository at this point in the history
  • Loading branch information
SimonDMurray committed Mar 18, 2024
1 parent 5221025 commit 0553d38
Show file tree
Hide file tree
Showing 14 changed files with 362 additions and 37 deletions.
9 changes: 0 additions & 9 deletions RESUME

This file was deleted.

17 changes: 14 additions & 3 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ process {
ext.args = [
params.minuniquesize != null ? "--minuniquesize ${params.minuniquesize}" : "",
params.derep_strand != null ? "--strand ${params.derep_strand}" : "",
params.sizeout != null ? "--sizeout" : "",
params.sizeout == true ? "--sizeout" : "",
params.fasta_width != null ? "--fasta_width ${params.fasta_width}" : ""
].join(' ').trim()
publishDir = [
Expand All @@ -104,13 +104,24 @@ process {
withName: R_PROCESSING {
publishDir = [
path: { "${params.outdir}/r-processing" },
mode: params.publish_dir_mode
mode: params.publish_dir_mode,
pattern: "*.classified.tsv"
]
}

withName: PROCESSING {
publishDir = [
path: { "${params.outdir}/processing" },
mode: params.publish_dir_mode
mode: params.publish_dir_mode,
pattern: "*.classified.tsv"
]
}

withName: 'CUSTOM_DUMPSOFTWAREVERSIONS' {
publishDir = [
path: { "${params.outdir}/pipeline_info" },
mode: params.publish_dir_mode,
pattern: '*_versions.yml'
]
}
}
20 changes: 20 additions & 0 deletions conf/test.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
params {
input = "${projectDir}/data/input-s3.csv"
database = "s3://pollen-metabarcoding-test-data/data/viridiplantae_all_2014.sintax.fa"
FW_primer = "ATGCGATACTTGGTGTGAAT"
RV_primer = "GCATATCAATAAGCGGAGGA"
retain_untrimmed = true
fastq_maxee = "0.5"
fastq_minlen = 250
fastq_maxns = 0
fasta_width = 0
minuniquesize = 2
derep_strand = "plus"
sizeout = true
sintax_strand = "both"
sintax_cutoff = "0.95"
// Max resources for test input data can be reduced
max_memory = '24.GB'
max_cpus = 16
max_time = '24.h'
}
56 changes: 54 additions & 2 deletions main.nf
Original file line number Diff line number Diff line change
@@ -1,5 +1,22 @@
#!/usr/bin/env nextflow

log.info """\
=========================================
nf-pollen-metabarcoding (v1.0.0)
-----------------------------------------
Authors:
- Simon Murray <[email protected]>
- Chris Wyatt <[email protected]>
-----------------------------------------
Copyright (c) 2024
=========================================""".stripIndent()

def helpMessage() {
log.info"""
You have asked for the help message.
Expand All @@ -21,32 +38,67 @@ include { VSEARCH_DEREP_FULL_LENGTH } from './modules/local/vsearch_derep.nf'
include { VSEARCH_SINTAX } from './modules/nf-core/vsearch/sintax/main'
include { R_PROCESSING } from './modules/local/r_processing.nf'
include { PROCESSING } from './modules/local/processing.nf'
include { validateParameters; paramsHelp; paramsSummaryLog; fromSamplesheet } from 'plugin/nf-validation'
include { CUSTOM_DUMPSOFTWAREVERSIONS } from './modules/nf-core/custom/dumpsoftwareversions/main'

workflow {

if (params.help) {
helpMessage()
exit 0
}

//Ensuring mandatory parameters are provided
ch_sample_list = params.input != null ? Channel.fromPath(params.input) : errorMessage()
ch_database = params.database != null ? Channel.fromPath(params.database) : errorMessage()

// Validate input parameters
validateParameters()

// Print summary of supplied parameters
log.info paramsSummaryLog(workflow)

//Make a channel for version outputs:
ch_versions = Channel.empty()

//Input to cutadapt depends on whether a single-end fastq or a set of paired-end fastqs are provided
if (params.single_end == true) {
ch_sample_list | flatMap{ it.readLines() } | map{ csv -> [ [ "id":csv.split(",")[0], "single_end": true ], [ csv.split(",")[1] ] ] } | CUTADAPT
}
else {
ch_sample_list | flatMap{ it.readLines() } | map{ csv -> [ [ "id":csv.split(",")[0], "single_end": false ], [ csv.split(",")[1], csv.split(",")[2] ] ] } | CUTADAPT
}
ch_versions = ch_versions.mix(CUTADAPT.out.versions.first())

CUTADAPT.out.reads | PEAR
ch_versions = ch_versions.mix(PEAR.out.versions.first())

PEAR.out.assembled | VSEARCH_FASTQ_FILTER
ch_versions = ch_versions.mix(VSEARCH_FASTQ_FILTER.out.versions.first())

VSEARCH_FASTQ_FILTER.out.fasta | VSEARCH_DEREP_FULL_LENGTH
ch_versions = ch_versions.mix(VSEARCH_DEREP_FULL_LENGTH.out.versions.first())

//Need to ensure the database is provided to each fasta file
VSEARCH_DEREP_FULL_LENGTH.out.fasta | combine(ch_database) | multiMap { it -> fa: [it[0], it[1]]; db: it[2] } | set { ch_sintax }
VSEARCH_DEREP_FULL_LENGTH.out.fasta | combine(ch_database) | multiMap { it -> fa: [it[0], it[1]]; db: it[2] } | set { ch_sintax }

VSEARCH_SINTAX(ch_sintax.fa, ch_sintax.db)
ch_versions = ch_versions.mix(VSEARCH_SINTAX.out.versions.first())

//Original scripts used R for wrangling the sintax output, same can be done with a single line of bash code so made the R script an optional module
if (params.r_processing == true) {
R_PROCESSING(VSEARCH_SINTAX.out.tsv)
R_PROCESSING(VSEARCH_SINTAX.out.tsv)
ch_versions = ch_versions.mix(R_PROCESSING.out.versions.first())
}
else {
PROCESSING(VSEARCH_SINTAX.out.tsv)
}

CUSTOM_DUMPSOFTWAREVERSIONS (
ch_versions.collectFile(name: 'collated_versions.yml')
)
}

workflow.onComplete {
println ( workflow.success ? "\nDone! Check results in $params.outdir \n" : "Hmmm .. something went wrong\n" )
}
5 changes: 5 additions & 0 deletions modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@
"https://github.com/nf-core/modules.git": {
"modules": {
"nf-core": {
"custom/dumpsoftwareversions": {
"branch": "master",
"git_sha": "de45447d060b8c8b98575bc637a4a575fd0638e1",
"installed_by": ["modules"]
},
"cutadapt": {
"branch": "master",
"git_sha": "6618151ed69274863dc6fe6d2920afa90abaca1f",
Expand Down
7 changes: 7 additions & 0 deletions modules/nf-core/custom/dumpsoftwareversions/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
name: custom_dumpsoftwareversions
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- bioconda::multiqc=1.20
24 changes: 24 additions & 0 deletions modules/nf-core/custom/dumpsoftwareversions/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
process CUSTOM_DUMPSOFTWAREVERSIONS {
label 'process_single'

// Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/multiqc:1.20--pyhdfd78af_0' :
'biocontainers/multiqc:1.20--pyhdfd78af_0' }"

input:
path versions

output:
path "software_versions.yml" , emit: yml
path "software_versions_mqc.yml", emit: mqc_yml
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
template 'dumpsoftwareversions.py'
}
37 changes: 37 additions & 0 deletions modules/nf-core/custom/dumpsoftwareversions/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
name: custom_dumpsoftwareversions
description: Custom module used to dump software versions within the nf-core pipeline template
keywords:
- custom
- dump
- version
tools:
- custom:
description: Custom module used to dump software versions within the nf-core pipeline template
homepage: https://github.com/nf-core/tools
documentation: https://github.com/nf-core/tools
licence: ["MIT"]
input:
- versions:
type: file
description: YML file containing software versions
pattern: "*.yml"
output:
- yml:
type: file
description: Standard YML file containing software versions
pattern: "software_versions.yml"
- mqc_yml:
type: file
description: MultiQC custom content YML file containing software versions
pattern: "software_versions_mqc.yml"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@drpatelh"
- "@grst"
maintainers:
- "@drpatelh"
- "@grst"
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
#!/usr/bin/env python


"""Provide functions to merge multiple versions.yml files."""


import yaml
import platform
from textwrap import dedent


def _make_versions_html(versions):
"""Generate a tabular HTML output of all versions for MultiQC."""
html = [
dedent(
"""\\
<style>
#nf-core-versions tbody:nth-child(even) {
background-color: #f2f2f2;
}
</style>
<table class="table" style="width:100%" id="nf-core-versions">
<thead>
<tr>
<th> Process Name </th>
<th> Software </th>
<th> Version </th>
</tr>
</thead>
"""
)
]
for process, tmp_versions in sorted(versions.items()):
html.append("<tbody>")
for i, (tool, version) in enumerate(sorted(tmp_versions.items())):
html.append(
dedent(
f"""\\
<tr>
<td><samp>{process if (i == 0) else ''}</samp></td>
<td><samp>{tool}</samp></td>
<td><samp>{version}</samp></td>
</tr>
"""
)
)
html.append("</tbody>")
html.append("</table>")
return "\\n".join(html)


def main():
"""Load all version files and generate merged output."""
versions_this_module = {}
versions_this_module["${task.process}"] = {
"python": platform.python_version(),
"yaml": yaml.__version__,
}

with open("$versions") as f:
versions_by_process = yaml.load(f, Loader=yaml.BaseLoader) | versions_this_module

# aggregate versions by the module name (derived from fully-qualified process name)
versions_by_module = {}
for process, process_versions in versions_by_process.items():
module = process.split(":")[-1]
try:
if versions_by_module[module] != process_versions:
raise AssertionError(
"We assume that software versions are the same between all modules. "
"If you see this error-message it means you discovered an edge-case "
"and should open an issue in nf-core/tools. "
)
except KeyError:
versions_by_module[module] = process_versions

versions_by_module["Workflow"] = {
"Nextflow": "$workflow.nextflow.version",
"$workflow.manifest.name": "$workflow.manifest.version",
}

versions_mqc = {
"id": "software_versions",
"section_name": "${workflow.manifest.name} Software Versions",
"section_href": "https://github.com/${workflow.manifest.name}",
"plot_type": "html",
"description": "are collected at run time from the software output.",
"data": _make_versions_html(versions_by_module),
}

with open("software_versions.yml", "w") as f:
yaml.dump(versions_by_module, f, default_flow_style=False)
with open("software_versions_mqc.yml", "w") as f:
yaml.dump(versions_mqc, f, default_flow_style=False)

with open("versions.yml", "w") as f:
yaml.dump(versions_this_module, f, default_flow_style=False)


if __name__ == "__main__":
main()
43 changes: 43 additions & 0 deletions modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
nextflow_process {

name "Test Process CUSTOM_DUMPSOFTWAREVERSIONS"
script "../main.nf"
process "CUSTOM_DUMPSOFTWAREVERSIONS"
tag "modules"
tag "modules_nfcore"
tag "custom"
tag "dumpsoftwareversions"
tag "custom/dumpsoftwareversions"

test("Should run without failures") {
when {
process {
"""
def tool1_version = '''
TOOL1:
tool1: 0.11.9
'''.stripIndent()

def tool2_version = '''
TOOL2:
tool2: 1.9
'''.stripIndent()

input[0] = Channel.of(tool1_version, tool2_version).collectFile()
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
process.out.versions,
file(process.out.mqc_yml[0]).readLines()[0..10],
file(process.out.yml[0]).readLines()[0..7]
).match()
}
)
}
}
}
Loading

0 comments on commit 0553d38

Please sign in to comment.