added nf-plugin stuff and a test profile

Eco-Flow · Mar 18, 2024 · 0553d38 · 0553d38
1 parent 5221025
commit 0553d38
Show file tree

Hide file tree

Showing 14 changed files with 362 additions and 37 deletions.
diff --git a/RESUME b/RESUME
diff --git a/conf/modules.config b/conf/modules.config
@@ -79,7 +79,7 @@ process {
         ext.args = [
             params.minuniquesize != null ? "--minuniquesize ${params.minuniquesize}" : "",
             params.derep_strand != null ? "--strand ${params.derep_strand}" : "",
-            params.sizeout != null ? "--sizeout" : "",
+            params.sizeout == true ? "--sizeout" : "",
             params.fasta_width != null ? "--fasta_width ${params.fasta_width}" : ""
         ].join(' ').trim()
         publishDir = [
@@ -104,13 +104,24 @@ process {
     withName: R_PROCESSING {
         publishDir = [
             path: { "${params.outdir}/r-processing" },
-            mode: params.publish_dir_mode
+            mode: params.publish_dir_mode,
+            pattern: "*.classified.tsv"
         ]
     }
+
     withName: PROCESSING {
         publishDir = [
             path: { "${params.outdir}/processing" },
-            mode: params.publish_dir_mode
+            mode: params.publish_dir_mode,
+            pattern: "*.classified.tsv"
+        ]
+    }
+
+    withName: 'CUSTOM_DUMPSOFTWAREVERSIONS' {
+        publishDir = [
+            path: { "${params.outdir}/pipeline_info" },
+            mode: params.publish_dir_mode,
+            pattern: '*_versions.yml'
         ]
     }
 }
diff --git a/conf/test.config b/conf/test.config
@@ -0,0 +1,20 @@
+params {
+  input                      = "${projectDir}/data/input-s3.csv"
+  database                   = "s3://pollen-metabarcoding-test-data/data/viridiplantae_all_2014.sintax.fa"  
+  FW_primer                  = "ATGCGATACTTGGTGTGAAT"
+  RV_primer                  = "GCATATCAATAAGCGGAGGA"
+  retain_untrimmed           = true
+  fastq_maxee                = "0.5"
+  fastq_minlen               = 250
+  fastq_maxns                = 0
+  fasta_width                = 0
+  minuniquesize              = 2
+  derep_strand               = "plus"
+  sizeout                    = true
+  sintax_strand              = "both"
+  sintax_cutoff              = "0.95"
+  // Max resources for test input data can be reduced
+  max_memory                 = '24.GB'
+  max_cpus                   =  16
+  max_time                   = '24.h'
+}
diff --git a/main.nf b/main.nf
@@ -1,5 +1,22 @@
 #!/usr/bin/env nextflow
 
+log.info """\
+ =========================================
+
+ nf-pollen-metabarcoding (v1.0.0)
+
+ -----------------------------------------
+
+ Authors:
+   - Simon Murray <[email protected]>
+   - Chris Wyatt <[email protected]>
+
+ -----------------------------------------
+
+ Copyright (c) 2024
+
+ =========================================""".stripIndent()
+
 def helpMessage() {
     log.info"""
     You have asked for the help message.
@@ -21,32 +38,67 @@ include { VSEARCH_DEREP_FULL_LENGTH } from './modules/local/vsearch_derep.nf'
 include { VSEARCH_SINTAX } from './modules/nf-core/vsearch/sintax/main'
 include { R_PROCESSING } from './modules/local/r_processing.nf'
 include { PROCESSING } from './modules/local/processing.nf'
+include { validateParameters; paramsHelp; paramsSummaryLog; fromSamplesheet } from 'plugin/nf-validation'
+include { CUSTOM_DUMPSOFTWAREVERSIONS } from './modules/nf-core/custom/dumpsoftwareversions/main'
 
 workflow {
+
   if (params.help) {
     helpMessage()
     exit 0
   }
+
   //Ensuring mandatory parameters are provided
   ch_sample_list = params.input != null ? Channel.fromPath(params.input) : errorMessage()
   ch_database = params.database != null ? Channel.fromPath(params.database) : errorMessage()
+
+  // Validate input parameters
+  validateParameters()
+
+  // Print summary of supplied parameters
+  log.info paramsSummaryLog(workflow)
+
+  //Make a channel for version outputs:
+  ch_versions = Channel.empty()
+
   //Input to cutadapt depends on whether a single-end fastq or a set of paired-end fastqs are provided
   if (params.single_end == true) {
      ch_sample_list | flatMap{ it.readLines() } | map{ csv -> [ [ "id":csv.split(",")[0], "single_end": true ], [ csv.split(",")[1] ] ] } | CUTADAPT
   }
   else {
      ch_sample_list | flatMap{ it.readLines() } | map{ csv -> [ [ "id":csv.split(",")[0], "single_end": false ], [ csv.split(",")[1], csv.split(",")[2] ] ] } | CUTADAPT
   }
+  ch_versions = ch_versions.mix(CUTADAPT.out.versions.first())
+
   CUTADAPT.out.reads | PEAR
+  ch_versions = ch_versions.mix(PEAR.out.versions.first())
+
   PEAR.out.assembled | VSEARCH_FASTQ_FILTER
+  ch_versions = ch_versions.mix(VSEARCH_FASTQ_FILTER.out.versions.first())
+
   VSEARCH_FASTQ_FILTER.out.fasta | VSEARCH_DEREP_FULL_LENGTH
+  ch_versions = ch_versions.mix(VSEARCH_DEREP_FULL_LENGTH.out.versions.first())
+
   //Need to ensure the database is provided to each fasta file
-  VSEARCH_DEREP_FULL_LENGTH.out.fasta | combine(ch_database) | multiMap { it -> fa: [it[0], it[1]]; db: it[2] } | set { ch_sintax } 
+  VSEARCH_DEREP_FULL_LENGTH.out.fasta | combine(ch_database) | multiMap { it -> fa: [it[0], it[1]]; db: it[2] } | set { ch_sintax }
+
   VSEARCH_SINTAX(ch_sintax.fa, ch_sintax.db)
+  ch_versions = ch_versions.mix(VSEARCH_SINTAX.out.versions.first())
+
+  //Original scripts used R for wrangling the sintax output, same can be done with a single line of bash code so made the R script an optional module
   if (params.r_processing == true) {
-    R_PROCESSING(VSEARCH_SINTAX.out.tsv)  
+    R_PROCESSING(VSEARCH_SINTAX.out.tsv)
+    ch_versions = ch_versions.mix(R_PROCESSING.out.versions.first())
   }
   else {
     PROCESSING(VSEARCH_SINTAX.out.tsv)
   }
+
+  CUSTOM_DUMPSOFTWAREVERSIONS (
+        ch_versions.collectFile(name: 'collated_versions.yml')
+  )
+}
+
+workflow.onComplete {
+    println ( workflow.success ? "\nDone! Check results in $params.outdir \n" : "Hmmm .. something went wrong\n" )
 }
diff --git a/modules.json b/modules.json
@@ -5,6 +5,11 @@
     "https://github.com/nf-core/modules.git": {
       "modules": {
         "nf-core": {
+          "custom/dumpsoftwareversions": {
+            "branch": "master",
+            "git_sha": "de45447d060b8c8b98575bc637a4a575fd0638e1",
+            "installed_by": ["modules"]
+          },
           "cutadapt": {
             "branch": "master",
             "git_sha": "6618151ed69274863dc6fe6d2920afa90abaca1f",

diff --git a/modules/nf-core/custom/dumpsoftwareversions/environment.yml b/modules/nf-core/custom/dumpsoftwareversions/environment.yml
@@ -0,0 +1,7 @@
+name: custom_dumpsoftwareversions
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - bioconda::multiqc=1.20
diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf
@@ -0,0 +1,24 @@
+process CUSTOM_DUMPSOFTWAREVERSIONS {
+    label 'process_single'
+
+    // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/multiqc:1.20--pyhdfd78af_0' :
+        'biocontainers/multiqc:1.20--pyhdfd78af_0' }"
+
+    input:
+    path versions
+
+    output:
+    path "software_versions.yml"    , emit: yml
+    path "software_versions_mqc.yml", emit: mqc_yml
+    path "versions.yml"             , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    template 'dumpsoftwareversions.py'
+}
diff --git a/modules/nf-core/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml
@@ -0,0 +1,37 @@
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
+name: custom_dumpsoftwareversions
+description: Custom module used to dump software versions within the nf-core pipeline template
+keywords:
+  - custom
+  - dump
+  - version
+tools:
+  - custom:
+      description: Custom module used to dump software versions within the nf-core pipeline template
+      homepage: https://github.com/nf-core/tools
+      documentation: https://github.com/nf-core/tools
+      licence: ["MIT"]
+input:
+  - versions:
+      type: file
+      description: YML file containing software versions
+      pattern: "*.yml"
+output:
+  - yml:
+      type: file
+      description: Standard YML file containing software versions
+      pattern: "software_versions.yml"
+  - mqc_yml:
+      type: file
+      description: MultiQC custom content YML file containing software versions
+      pattern: "software_versions_mqc.yml"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+authors:
+  - "@drpatelh"
+  - "@grst"
+maintainers:
+  - "@drpatelh"
+  - "@grst"
diff --git a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py
@@ -0,0 +1,101 @@
+#!/usr/bin/env python
+
+
+"""Provide functions to merge multiple versions.yml files."""
+
+
+import yaml
+import platform
+from textwrap import dedent
+
+
+def _make_versions_html(versions):
+    """Generate a tabular HTML output of all versions for MultiQC."""
+    html = [
+        dedent(
+            """\\
+            <style>
+            #nf-core-versions tbody:nth-child(even) {
+                background-color: #f2f2f2;
+            }
+            </style>
+            <table class="table" style="width:100%" id="nf-core-versions">
+                <thead>
+                    <tr>
+                        <th> Process Name </th>
+                        <th> Software </th>
+                        <th> Version  </th>
+                    </tr>
+                </thead>
+            """
+        )
+    ]
+    for process, tmp_versions in sorted(versions.items()):
+        html.append("<tbody>")
+        for i, (tool, version) in enumerate(sorted(tmp_versions.items())):
+            html.append(
+                dedent(
+                    f"""\\
+                    <tr>
+                        <td><samp>{process if (i == 0) else ''}</samp></td>
+                        <td><samp>{tool}</samp></td>
+                        <td><samp>{version}</samp></td>
+                    </tr>
+                    """
+                )
+            )
+        html.append("</tbody>")
+    html.append("</table>")
+    return "\\n".join(html)
+
+
+def main():
+    """Load all version files and generate merged output."""
+    versions_this_module = {}
+    versions_this_module["${task.process}"] = {
+        "python": platform.python_version(),
+        "yaml": yaml.__version__,
+    }
+
+    with open("$versions") as f:
+        versions_by_process = yaml.load(f, Loader=yaml.BaseLoader) | versions_this_module
+
+    # aggregate versions by the module name (derived from fully-qualified process name)
+    versions_by_module = {}
+    for process, process_versions in versions_by_process.items():
+        module = process.split(":")[-1]
+        try:
+            if versions_by_module[module] != process_versions:
+                raise AssertionError(
+                    "We assume that software versions are the same between all modules. "
+                    "If you see this error-message it means you discovered an edge-case "
+                    "and should open an issue in nf-core/tools. "
+                )
+        except KeyError:
+            versions_by_module[module] = process_versions
+
+    versions_by_module["Workflow"] = {
+        "Nextflow": "$workflow.nextflow.version",
+        "$workflow.manifest.name": "$workflow.manifest.version",
+    }
+
+    versions_mqc = {
+        "id": "software_versions",
+        "section_name": "${workflow.manifest.name} Software Versions",
+        "section_href": "https://github.com/${workflow.manifest.name}",
+        "plot_type": "html",
+        "description": "are collected at run time from the software output.",
+        "data": _make_versions_html(versions_by_module),
+    }
+
+    with open("software_versions.yml", "w") as f:
+        yaml.dump(versions_by_module, f, default_flow_style=False)
+    with open("software_versions_mqc.yml", "w") as f:
+        yaml.dump(versions_mqc, f, default_flow_style=False)
+
+    with open("versions.yml", "w") as f:
+        yaml.dump(versions_this_module, f, default_flow_style=False)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test
@@ -0,0 +1,43 @@
+nextflow_process {
+
+    name "Test Process CUSTOM_DUMPSOFTWAREVERSIONS"
+    script "../main.nf"
+    process "CUSTOM_DUMPSOFTWAREVERSIONS"
+    tag "modules"
+    tag "modules_nfcore"
+    tag "custom"
+    tag "dumpsoftwareversions"
+    tag "custom/dumpsoftwareversions"
+
+    test("Should run without failures") {
+        when {
+            process {
+                """
+                def tool1_version = '''
+                TOOL1:
+                    tool1: 0.11.9
+                '''.stripIndent()
+
+                def tool2_version = '''
+                TOOL2:
+                    tool2: 1.9
+                '''.stripIndent()
+
+                input[0] = Channel.of(tool1_version, tool2_version).collectFile()
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                    process.out.versions,
+                    file(process.out.mqc_yml[0]).readLines()[0..10],
+                    file(process.out.yml[0]).readLines()[0..7]
+                    ).match()
+                }
+            )
+        }
+    }
+}