From 3d9dfa91186abedf077d2ba7d9630ec76c51892c Mon Sep 17 00:00:00 2001 From: zxBIB Schcolnicov Date: Thu, 10 Oct 2024 16:56:18 +0200 Subject: [PATCH 01/11] tmp --- bin/validate_samplesheet.py | 35 -------- conf/modules.config | 7 +- docs/usage.md | 2 +- modules/local/samshee/README.md | 84 ------------------- modules/local/samshee/tests/nextflow.config | 0 .../samshee/environment.yml | 2 +- modules/{local => nf-core}/samshee/main.nf | 28 +++---- modules/{local => nf-core}/samshee/meta.yml | 0 .../samshee/tests/main.nf.test | 10 +-- .../nf-core/samshee/tests/main.nf.test.snap | 72 ++++++++++++++++ nextflow_schema.json | 3 +- workflows/demultiplex.nf | 13 ++- 12 files changed, 102 insertions(+), 154 deletions(-) delete mode 100755 bin/validate_samplesheet.py delete mode 100644 modules/local/samshee/README.md delete mode 100644 modules/local/samshee/tests/nextflow.config rename modules/{local => nf-core}/samshee/environment.yml (85%) rename modules/{local => nf-core}/samshee/main.nf (56%) rename modules/{local => nf-core}/samshee/meta.yml (100%) rename modules/{local => nf-core}/samshee/tests/main.nf.test (85%) create mode 100644 modules/nf-core/samshee/tests/main.nf.test.snap diff --git a/bin/validate_samplesheet.py b/bin/validate_samplesheet.py deleted file mode 100755 index 987e3441..00000000 --- a/bin/validate_samplesheet.py +++ /dev/null @@ -1,35 +0,0 @@ -#!/usr/bin/env python3 - -from samshee.samplesheetv2 import read_samplesheetv2 -from samshee.validation import illuminasamplesheetv2schema, illuminasamplesheetv2logic, validate -import json -import sys - -def validate_samplesheet(filename, custom_schema_file=None): - # Load the custom schema if provided - if custom_schema_file: - with open(custom_schema_file, 'r') as f: - custom_schema = json.load(f) - custom_validator = lambda doc: validate(doc, custom_schema) - else: - custom_validator = None - - # Prepare the list of validators - validators = [illuminasamplesheetv2schema, illuminasamplesheetv2logic] - if custom_validator: - validators.append(custom_validator) - # Read and validate the sample sheet - try: - sheet = read_samplesheetv2(filename, validation=validators) - print(f"Validation successful for {filename}") - except Exception as e: - print(f"Validation failed: {e}") - -if __name__ == "__main__": - if len(sys.argv) < 2 or len(sys.argv) > 3: - print("Usage: validate_samplesheet.py [custom_schema.json]") - sys.exit(1) - samplesheet_file = sys.argv[1] - schema_file = sys.argv[2] if len(sys.argv) == 3 else None - - validate_samplesheet(samplesheet_file, schema_file) diff --git a/conf/modules.config b/conf/modules.config index 6d635d07..31e22fb6 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -232,9 +232,14 @@ process { mode: params.publish_dir_mode ] } -// Samshee should fail the entire pipeline immediately as it validated the illumina samplesheet to be valid before the pipeline runs. As such, it should not be running more than once & if it fails should stop the pipeline + withName: SAMSHEE { errorStrategy = "terminate" + publishDir = [ + path: { "${params.outdir}/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] } } diff --git a/docs/usage.md b/docs/usage.md index c3efd5b8..ed89d388 100755 --- a/docs/usage.md +++ b/docs/usage.md @@ -130,7 +130,7 @@ The trimming process in our demultiplexing pipeline has been updated to ensure c ## samshee (Samplesheet validator) -samshee ensures the integrity of Illumina v2 Sample Sheets by allowing users to apply custom validation rules. The module can be used together with the parameter `--validator_schema`, which accepts a JSON schema validator file. Users can specify this file to enforce additional validation rules beyond the default ones provided by the tool. To use this feature, simply provide the path to the JSON schema validator file via the `--validator_schema` parameter in the pipeline configuration. This enables tailored validation of Sample Sheets to meet specific requirements or standards relevant to your sequencing workflow. For more information about the tool or how to write the schema JSON file, please refer to [Samshee on GitHub](https://github.com/lit-regensburg/samshee). +samshee ensures the integrity of Illumina v2 Sample Sheets by allowing users to apply custom validation rules. The module can be used together with the parameter `--validator_schema`, which accepts a JSON schema validation string. Users can specify this file to enforce additional validation rules beyond the default ones provided by the tool. To use this feature, simply provide the JSON string via the `--validator_schema` parameter in the pipeline configuration. This enables tailored validation of Sample Sheets to meet specific requirements or standards relevant to your sequencing workflow. For more information refer to [Samshee on GitHub](https://github.com/lit-regensburg/samshee). ### Updating the pipeline diff --git a/modules/local/samshee/README.md b/modules/local/samshee/README.md deleted file mode 100644 index 3e8745bc..00000000 --- a/modules/local/samshee/README.md +++ /dev/null @@ -1,84 +0,0 @@ -# Guide to Writing a `validation.json` Schema File - -## Introduction - -A JSON schema defines the structure and constraints of JSON data. This guide will help you create a `validation.json` schema file for use with Samshee to perform additional checks on Illumina® Sample Sheet v2 files. - -## JSON Schema Basics - -JSON Schema is a powerful tool for validating the structure of JSON data. It allows you to specify required fields, data types, and constraints. Here are some common components: - -- **`$schema`**: Declares the JSON Schema version being used. -- **`type`**: Specifies the data type (e.g., `object`, `array`, `string`, `number`). -- **`properties`**: Defines the properties of an object and their constraints. -- **`required`**: Lists properties that must be present in the object. -- **`items`**: Specifies the schema for items in an array. - -## Example Schema - -Here’s an example of a `validation.json` schema file for an Illumina® Sample Sheet: - -```json -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "type": "object", - "properties": { - "Header": { - "type": "object", - "properties": { - "InvestigatorName": { - "type": "string" - }, - "ExperimentName": { - "type": "string" - } - }, - "required": ["InvestigatorName", "ExperimentName"] - }, - "Reads": { - "type": "object", - "properties": { - "Read1": { - "type": "integer", - "minimum": 1 - }, - "Read2": { - "type": "integer", - "minimum": 1 - } - }, - "required": ["Read1", "Read2"] - }, - "BCLConvert": { - "type": "object", - "properties": { - "Index": { - "type": "string", - "pattern": "^[ACGT]{8}$" // Example pattern for 8-base indices - } - } - } - }, - "required": ["Header", "Reads"] -} -``` - -### Explanation of the Example - -- **`$schema`**: Specifies the JSON Schema version (draft-07). -- **`type`**: Defines the main type as `object`. -- **`properties`**: Lists the properties of the object: -- **`Header`**: An object with required `InvestigatorName` and `ExperimentName` fields. -- **`Reads`**: An object with required `Read1` and `Read2` fields that must be integers greater than or equal to 1. -- **`BCLConvert`**: An object with an optional `Index` field that must be a string matching a pattern for 8-base indices. -- **`required`**: Lists required properties at the top level. - -### Tips for Writing JSON Schemas - -1. **Start Simple**: Begin with basic constraints and gradually add complexity. -2. **Use Online Validators**: Validate your schema using online tools to ensure it adheres to the JSON Schema specification. -3. **Refer to Schema Documentation**: Consult the [JSON Schema documentation](https://json-schema.org/) for detailed guidance. - -### Conclusion - -By defining a JSON schema, you can enforce specific rules and ensure that your Illumina® Sample Sheet v2 files meet your required structure and constraints. Use this guide to create and validate your `validation.json` schema files effectively. diff --git a/modules/local/samshee/tests/nextflow.config b/modules/local/samshee/tests/nextflow.config deleted file mode 100644 index e69de29b..00000000 diff --git a/modules/local/samshee/environment.yml b/modules/nf-core/samshee/environment.yml similarity index 85% rename from modules/local/samshee/environment.yml rename to modules/nf-core/samshee/environment.yml index f92e0eee..b46b46a6 100644 --- a/modules/local/samshee/environment.yml +++ b/modules/nf-core/samshee/environment.yml @@ -5,4 +5,4 @@ dependencies: - python>=3.9 - pip - pip: # FIXME https://github.com/nf-core/modules/issues/5814 - - samshee==0.1.12 + - samshee==0.2.1 diff --git a/modules/local/samshee/main.nf b/modules/nf-core/samshee/main.nf similarity index 56% rename from modules/local/samshee/main.nf rename to modules/nf-core/samshee/main.nf index acbf928c..5256c209 100644 --- a/modules/local/samshee/main.nf +++ b/modules/nf-core/samshee/main.nf @@ -4,47 +4,41 @@ process SAMSHEE { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'oras://community.wave.seqera.io/library/python_pip_samshee:84a770c9853c725d' : - 'community.wave.seqera.io/library/python_pip_samshee:e8a5c47ec32efa42' }" + 'docker://community.wave.seqera.io/library/pip_samshee:733e11f3377fc2e3' : + 'community.wave.seqera.io/library/pip_samshee:733e11f3377fc2e3' }" input: tuple val(meta), path(samplesheet) - path(validator_schema) //optional + val(validator_schema) //optional output: // Module is meant to stop the pipeline if validation fails - path "versions.yml", emit: versions + tuple val(meta), path("*_formatted.csv"), emit: samplesheet + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: def args = task.ext.args ?: '' - def args2 = task.ext.args2 ?: '' - def args3 = task.ext.args3 ?: '' - def arg_validator_schema = validator_schema ? "${validator_schema}" : "" + def arg_validator_schema = validator_schema ? "--schema ${validator_schema}" : "" """ # Run validation command and capture output - output=\$(validate_samplesheet.py "${samplesheet}" "${arg_validator_schema}" 2>&1) - status=\$? - # Check if validation failed - if echo "\$output" | grep -q "Validation failed:"; then - echo "\$output" # Print output for debugging - exit 1 # Fail the process if validation failed - fi + python -m samshee $samplesheet \ + $arg_validator_schema \ + $args \ + > ${samplesheet.baseName}_formatted.csv cat <<-END_VERSIONS > versions.yml "${task.process}": samshee: \$( python -m pip show --version samshee | grep "Version" | sed -e "s/Version: //g" ) python: \$( python --version | sed -e "s/Python //g" ) END_VERSIONS - - # If no validation errors, process exits with status 0 - exit \$status """ stub: """ + touch ${samplesheet.baseName}_formatted.csv cat <<-END_VERSIONS > versions.yml "${task.process}": samshee: \$( python -m pip show --version samshee | grep "Version" | sed -e "s/Version: //g" ) diff --git a/modules/local/samshee/meta.yml b/modules/nf-core/samshee/meta.yml similarity index 100% rename from modules/local/samshee/meta.yml rename to modules/nf-core/samshee/meta.yml diff --git a/modules/local/samshee/tests/main.nf.test b/modules/nf-core/samshee/tests/main.nf.test similarity index 85% rename from modules/local/samshee/tests/main.nf.test rename to modules/nf-core/samshee/tests/main.nf.test index d76c98f4..0601d195 100644 --- a/modules/local/samshee/tests/main.nf.test +++ b/modules/nf-core/samshee/tests/main.nf.test @@ -20,9 +20,8 @@ nextflow_process { } then { - assertAll( - { assert process.success } - ) + assert process.success + assert snapshot(process.out).match() } } @@ -41,9 +40,8 @@ nextflow_process { } then { - assertAll( - { assert process.success }, - ) + assert process.success + assert snapshot(process.out).match() } } diff --git a/modules/nf-core/samshee/tests/main.nf.test.snap b/modules/nf-core/samshee/tests/main.nf.test.snap new file mode 100644 index 00000000..a6c854eb --- /dev/null +++ b/modules/nf-core/samshee/tests/main.nf.test.snap @@ -0,0 +1,72 @@ +{ + "test samplesheet": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "lane": 1 + }, + "SampleSheet_formatted.csv:md5,9a1cac9e958256a17c7f43a8e15cb697" + ] + ], + "1": [ + "versions.yml:md5,8fbb7f500f23ab9ecff6ad5b9f15f14a" + ], + "samplesheet": [ + [ + { + "id": "test", + "lane": 1 + }, + "SampleSheet_formatted.csv:md5,9a1cac9e958256a17c7f43a8e15cb697" + ] + ], + "versions": [ + "versions.yml:md5,8fbb7f500f23ab9ecff6ad5b9f15f14a" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-07T19:55:47.950781" + }, + "stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "lane": 1 + }, + "SampleSheet_formatted.csv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,8fbb7f500f23ab9ecff6ad5b9f15f14a" + ], + "samplesheet": [ + [ + { + "id": "test", + "lane": 1 + }, + "SampleSheet_formatted.csv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,8fbb7f500f23ab9ecff6ad5b9f15f14a" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-07T19:56:05.713861" + } +} \ No newline at end of file diff --git a/nextflow_schema.json b/nextflow_schema.json index 7ab8d009..58f8f583 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -33,8 +33,7 @@ }, "validator_schema": { "type": "string", - "format": "file-path", - "description": "Path to Illumina v2 samplesheet validator .json file" + "description": "String in json format to be passed to samshee module for samplesheet validation" }, "downstream_pipeline": { "type": "string", diff --git a/workflows/demultiplex.nf b/workflows/demultiplex.nf index eae296d4..f79ce212 100644 --- a/workflows/demultiplex.nf +++ b/workflows/demultiplex.nf @@ -27,11 +27,7 @@ include { MULTIQC } from '../modules/nf-core/multiqc/main' include { UNTAR as UNTAR_FLOWCELL } from '../modules/nf-core/untar/main' include { UNTAR as UNTAR_KRAKEN_DB } from '../modules/nf-core/untar/main' include { MD5SUM } from '../modules/nf-core/md5sum/main' - -// -// MODULE: Local modules -// -include { SAMSHEE } from '../modules/local/samshee/main' +include { SAMSHEE } from '../modules/nf-core/samshee/main' // // FUNCTION @@ -66,8 +62,8 @@ workflow DEMULTIPLEX { ch_versions = Channel.empty() ch_multiqc_files = Channel.empty() ch_multiqc_reports = Channel.empty() - checkqc_config = params.checkqc_config ? Channel.fromPath(params.checkqc_config, checkIfExists: true) : [] // file checkqc_config.yaml - ch_validator_schema = params.validator_schema ? Channel.fromPath(params.validator_schema, checkIfExists: true) : [] // file validator_schema.json + checkqc_config = params.checkqc_config ? Channel.fromPath(params.checkqc_config, checkIfExists: true) : [] // file checkqc_config.yaml + ch_validator_schema = params.validator_schema ? Channel.value(params.validator_schema) : [] // string validator_schema.json // Remove adapter from Illumina samplesheet to avoid adapter trimming in demultiplexer tools if (params.remove_adapter && (params.demultiplexer in ["bcl2fastq", "bclconvert", "mkfastq"])) { @@ -99,6 +95,9 @@ workflow DEMULTIPLEX { ch_validator_schema ) ch_versions = ch_versions.mix(SAMSHEE.out.versions) + ch_samplesheet = ch_samplesheet + .join(SAMSHEE.out.samplesheet) + .map{ meta, samplesheet, flowcell, lane, samplesheet_formatted -> [ meta, samplesheet_formatted, flowcell, lane ] } } // Convenience From 54a98cb9baca2a0558ebaab0dc234d2a5a584cea Mon Sep 17 00:00:00 2001 From: nschcolnicov Date: Thu, 10 Oct 2024 18:29:37 +0000 Subject: [PATCH 02/11] Adding samshee --- CHANGELOG.md | 1 + docs/usage.md | 14 ++++++++++- modules/nf-core/samshee/main.nf | 17 ++++++++++---- modules/nf-core/samshee/tests/main.nf.test | 23 +++++++++++++++++-- nextflow.config | 9 +++++--- nextflow_schema.json | 17 ++++++++++++-- .../nf-core/utils_nfvalidation_plugin/main.nf | 10 ++++---- .../utils_nfvalidation_plugin/meta.yml | 2 +- .../tests/main.nf.test | 20 ++++++++-------- workflows/demultiplex.nf | 16 ++++++++----- 10 files changed, 94 insertions(+), 35 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 537a33f5..d2c4dc50 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,6 +31,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#225](https://github.com/nf-core/demultiplex/pull/225) Added test profile for multi-lane samples, updated handling of such samples and adapter trimming. - [#234](https://github.com/nf-core/demultiplex/pull/234) Added module for samplesheet validation. - [#236](https://github.com/nf-core/demultiplex/pull/236) Add samplesheet generation. +- [#TBD](TBD) Update samshee module from nf-core. ### `Changed` diff --git a/docs/usage.md b/docs/usage.md index ed89d388..e945d95b 100755 --- a/docs/usage.md +++ b/docs/usage.md @@ -130,7 +130,19 @@ The trimming process in our demultiplexing pipeline has been updated to ensure c ## samshee (Samplesheet validator) -samshee ensures the integrity of Illumina v2 Sample Sheets by allowing users to apply custom validation rules. The module can be used together with the parameter `--validator_schema`, which accepts a JSON schema validation string. Users can specify this file to enforce additional validation rules beyond the default ones provided by the tool. To use this feature, simply provide the JSON string via the `--validator_schema` parameter in the pipeline configuration. This enables tailored validation of Sample Sheets to meet specific requirements or standards relevant to your sequencing workflow. For more information refer to [Samshee on GitHub](https://github.com/lit-regensburg/samshee). +samshee ensures the integrity of Illumina v2 Sample Sheets by allowing users to apply custom validation rules. The module can be used together with the parameter `--json_schema_validator`, which accepts a JSON schema validation string; the `--name_schema_validator`, which accepts a schema name string; and the `--file_schema_validator` which accepts a JSON schema validation file. Users can specify additional validation rules beyond the default ones provided by the tool using all or any of these parameters, this enables tailored validation of Sample Sheets to meet specific requirements or standards relevant to your sequencing workflow. For more information refer to [Samshee on GitHub](https://github.com/lit-regensburg/samshee). + +:::note + +- Samshee assumes all illumina samplesheets are v2. If working with samples that have an illumina samplesheet v1 set the parameter `--v1_schema` to true. +- When indicating `--json_schema_validator` or `--name_schema_validator`, please note that it expects a JSON reference value in string format. For example: + +```bash +--json_schema_validator '{"required": ["Data"]}' +--name_schema_validator '{"$ref": "urn:samshee:illuminav2/v1"}' +``` + +::: ### Updating the pipeline diff --git a/modules/nf-core/samshee/main.nf b/modules/nf-core/samshee/main.nf index 5256c209..199a4fce 100644 --- a/modules/nf-core/samshee/main.nf +++ b/modules/nf-core/samshee/main.nf @@ -9,10 +9,11 @@ process SAMSHEE { input: tuple val(meta), path(samplesheet) - val(validator_schema) //optional + val(json_schema_validator) // optional + val(name_schema_validator) // optional + path(file_schema_validator) // optional output: - // Module is meant to stop the pipeline if validation fails tuple val(meta), path("*_formatted.csv"), emit: samplesheet path "versions.yml" , emit: versions @@ -20,12 +21,18 @@ process SAMSHEE { task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - def arg_validator_schema = validator_schema ? "--schema ${validator_schema}" : "" + def arg_json_schema_validator = json_schema_validator ? "--schema '${json_schema_validator}'" : "" + def arg_name_schema_validator = name_schema_validator ? "--schema '${name_schema_validator}'" : "" + def arg_file_schema_validator = file_schema_validator ? "--schema '{\"\$ref\": \"file:${file_schema_validator}\"}'" : "" + def arg_v1_schema = params.v1_schema ? "--output-format sectioned" : "" + def args = task.ext.args ?: "" """ # Run validation command and capture output python -m samshee $samplesheet \ - $arg_validator_schema \ + $arg_json_schema_validator \ + $arg_name_schema_validator \ + $arg_file_schema_validator \ + $arg_v1_schema \ $args \ > ${samplesheet.baseName}_formatted.csv diff --git a/modules/nf-core/samshee/tests/main.nf.test b/modules/nf-core/samshee/tests/main.nf.test index 0601d195..da71b3d4 100644 --- a/modules/nf-core/samshee/tests/main.nf.test +++ b/modules/nf-core/samshee/tests/main.nf.test @@ -1,4 +1,3 @@ -// nf-core modules test cellranger/mkfastq nextflow_process { name "Test Process samshee" @@ -8,7 +7,27 @@ nextflow_process { tag "modules" - test("test samplesheet") { + test("test samplesheet_v1") { + + when { + params { + v1_schema= true + } + process { + """ + input[0] = [ [ id: 'test', lane:1 ], file("https://raw.githubusercontent.com/nf-core/test-datasets/demultiplex/testdata/miseq_35147139/miseq_35147139_samplesheet.csv", checkIfExists: true) ] + input[1] = [] + """ + } + } + + then { + assert process.success + assert snapshot(process.out).match() + } + + } + test("test samplesheet_v2") { when { process { diff --git a/nextflow.config b/nextflow.config index 7d764432..0864aab0 100755 --- a/nextflow.config +++ b/nextflow.config @@ -13,6 +13,12 @@ params { input = null demultiplexer = "bclconvert" // enum string [bclconvert, bcl2fastq, bases2fastq, fqtk, sgdemux, mkfastq] + // Options: samshee, Illumina samplesheet validator + v1_schema = false // [true, false] + json_schema_validator = null // string + name_schema_validator = null // string + file_schema_validator = null // file .json + // Options: trimming trim_fastq = true // [true, false] remove_adapter = true // [true, false] @@ -32,9 +38,6 @@ params { // Options: CheckQC checkqc_config = [] // file .yaml - // Options: Illumina samplesheet validator - validator_schema = null // file .json - // MultiQC options multiqc_config = null multiqc_title = null diff --git a/nextflow_schema.json b/nextflow_schema.json index 58f8f583..38abc7fa 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -31,9 +31,22 @@ "format": "path", "description": "Path to Kraken2 DB to use for screening" }, - "validator_schema": { + "json_schema_validator": { "type": "string", - "description": "String in json format to be passed to samshee module for samplesheet validation" + "description": "String in JSON format to be passed to samshee module for samplesheet validation" + }, + "name_schema_validator": { + "type": "string", + "description": "Schema name to be passed to samshee module for samplesheet validation" + }, + "file_schema_validator": { + "type": "string", + "format": "file-path", + "description": "Local JSON file to be passed to samshee module for samplesheet validation" + }, + "v1_schema": { + "type": "boolean", + "description": "Whether or not illumina samplesheet is v1 " }, "downstream_pipeline": { "type": "string", diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf b/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf index 2585b65d..4f41c9bd 100644 --- a/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf @@ -26,11 +26,11 @@ workflow UTILS_NFVALIDATION_PLUGIN { pre_help_text // string: string to be printed before help text and summary log post_help_text // string: string to be printed after help text and summary log validate_params // boolean: validate parameters - schema_filename // path: JSON schema file, null to use default value + file_schema_validatorname // path: JSON schema file, null to use default value main: - log.debug "Using schema file: ${schema_filename}" + log.debug "Using schema file: ${file_schema_validatorname}" // Default values for strings pre_help_text = pre_help_text ?: '' @@ -41,20 +41,20 @@ workflow UTILS_NFVALIDATION_PLUGIN { // Print help message if needed // if (print_help) { - log.info pre_help_text + paramsHelp(workflow_command, parameters_schema: schema_filename) + post_help_text + log.info pre_help_text + paramsHelp(workflow_command, parameters_schema: file_schema_validatorname) + post_help_text System.exit(0) } // // Print parameter summary to stdout // - log.info pre_help_text + paramsSummaryLog(workflow, parameters_schema: schema_filename) + post_help_text + log.info pre_help_text + paramsSummaryLog(workflow, parameters_schema: file_schema_validatorname) + post_help_text // // Validate parameters relative to the parameter JSON schema // if (validate_params){ - validateParameters(parameters_schema: schema_filename) + validateParameters(parameters_schema: file_schema_validatorname) } emit: diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml b/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml index 3d4a6b04..b71432d0 100644 --- a/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml @@ -28,7 +28,7 @@ input: type: boolean description: | Validate the parameters and error if invalid. - - schema_filename: + - file_schema_validatorname: type: string description: | The filename of the schema to validate against. diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test index 5784a33f..9cb32fdf 100644 --- a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test @@ -26,14 +26,14 @@ nextflow_workflow { pre_help_text = null post_help_text = null validate_params = false - schema_filename = "$moduleTestDir/nextflow_schema.json" + file_schema_validatorname = "$moduleTestDir/nextflow_schema.json" input[0] = help input[1] = workflow_command input[2] = pre_help_text input[3] = post_help_text input[4] = validate_params - input[5] = schema_filename + input[5] = file_schema_validatorname """ } } @@ -61,14 +61,14 @@ nextflow_workflow { pre_help_text = null post_help_text = null validate_params = false - schema_filename = "$moduleTestDir/nextflow_schema.json" + file_schema_validatorname = "$moduleTestDir/nextflow_schema.json" input[0] = help input[1] = workflow_command input[2] = pre_help_text input[3] = post_help_text input[4] = validate_params - input[5] = schema_filename + input[5] = file_schema_validatorname """ } } @@ -98,14 +98,14 @@ nextflow_workflow { pre_help_text = null post_help_text = null validate_params = false - schema_filename = "$moduleTestDir/nextflow_schema.json" + file_schema_validatorname = "$moduleTestDir/nextflow_schema.json" input[0] = help input[1] = workflow_command input[2] = pre_help_text input[3] = post_help_text input[4] = validate_params - input[5] = schema_filename + input[5] = file_schema_validatorname """ } } @@ -137,14 +137,14 @@ nextflow_workflow { pre_help_text = "pre-help-text" post_help_text = "post-help-text" validate_params = false - schema_filename = "$moduleTestDir/nextflow_schema.json" + file_schema_validatorname = "$moduleTestDir/nextflow_schema.json" input[0] = help input[1] = workflow_command input[2] = pre_help_text input[3] = post_help_text input[4] = validate_params - input[5] = schema_filename + input[5] = file_schema_validatorname """ } } @@ -178,14 +178,14 @@ nextflow_workflow { pre_help_text = null post_help_text = null validate_params = true - schema_filename = "$moduleTestDir/nextflow_schema.json" + file_schema_validatorname = "$moduleTestDir/nextflow_schema.json" input[0] = help input[1] = workflow_command input[2] = pre_help_text input[3] = post_help_text input[4] = validate_params - input[5] = schema_filename + input[5] = file_schema_validatorname """ } } diff --git a/workflows/demultiplex.nf b/workflows/demultiplex.nf index f79ce212..fa9886cf 100644 --- a/workflows/demultiplex.nf +++ b/workflows/demultiplex.nf @@ -59,11 +59,13 @@ workflow DEMULTIPLEX { // Channel inputs - ch_versions = Channel.empty() - ch_multiqc_files = Channel.empty() - ch_multiqc_reports = Channel.empty() - checkqc_config = params.checkqc_config ? Channel.fromPath(params.checkqc_config, checkIfExists: true) : [] // file checkqc_config.yaml - ch_validator_schema = params.validator_schema ? Channel.value(params.validator_schema) : [] // string validator_schema.json + ch_versions = Channel.empty() + ch_multiqc_files = Channel.empty() + ch_multiqc_reports = Channel.empty() + checkqc_config = params.checkqc_config ? Channel.fromPath(params.checkqc_config, checkIfExists: true) : [] // file checkqc_config.yaml + ch_json_schema_validator = params.json_schema_validator ? Channel.value(params.json_schema_validator) : [] // string schema in json format + ch_name_schema_validator = params.name_schema_validator ? Channel.value(params.name_schema_validator) : [] // string schema name + ch_file_schema_validator = params.file_schema_validator ? Channel.fromPath(params.file_schema_validator, checkIfExists: true) : [] // file schema.json // Remove adapter from Illumina samplesheet to avoid adapter trimming in demultiplexer tools if (params.remove_adapter && (params.demultiplexer in ["bcl2fastq", "bclconvert", "mkfastq"])) { @@ -92,7 +94,9 @@ workflow DEMULTIPLEX { if (!("samshee" in skip_tools) && (params.demultiplexer in ["bcl2fastq", "bclconvert", "mkfastq"])){ SAMSHEE ( ch_samplesheet.map{ meta, samplesheet, flowcell, lane -> [meta,samplesheet] }, - ch_validator_schema + ch_json_schema_validator, + ch_name_schema_validator, + ch_file_schema_validator ) ch_versions = ch_versions.mix(SAMSHEE.out.versions) ch_samplesheet = ch_samplesheet From ff60023fa16b0a7950a39edf2607b35756ff5cee Mon Sep 17 00:00:00 2001 From: nschcolnicov Date: Thu, 10 Oct 2024 18:38:29 +0000 Subject: [PATCH 03/11] Docs and changelog update --- CHANGELOG.md | 2 +- docs/usage.md | 36 +++++++++++++++--------------------- 2 files changed, 16 insertions(+), 22 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d2c4dc50..0a22d4e3 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,7 +31,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#225](https://github.com/nf-core/demultiplex/pull/225) Added test profile for multi-lane samples, updated handling of such samples and adapter trimming. - [#234](https://github.com/nf-core/demultiplex/pull/234) Added module for samplesheet validation. - [#236](https://github.com/nf-core/demultiplex/pull/236) Add samplesheet generation. -- [#TBD](TBD) Update samshee module from nf-core. +- [#275](https://github.com/nf-core/demultiplex/pull/275) Update samshee module from nf-core. ### `Changed` diff --git a/docs/usage.md b/docs/usage.md index e945d95b..cb9888eb 100755 --- a/docs/usage.md +++ b/docs/usage.md @@ -97,9 +97,8 @@ If you wish to repeatedly use the same parameters for multiple runs, rather than Pipeline settings can be provided in a `yaml` or `json` file via `-params-file `. -:::warning -Do not use `-c ` to specify parameters as this will result in errors. Custom config files specified with `-c` must only be used for [tuning process resource specifications](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources), other infrastructural tweaks (such as output directories), or module arguments (args). -::: +> [!WARNING] +> Do not use `-c ` to specify parameters as this will result in errors. Custom config files specified with `-c` must only be used for [tuning process resource specifications](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources), other infrastructural tweaks (such as output directories), or module arguments (args). The above pipeline run specified with a params file in yaml format: @@ -132,17 +131,15 @@ The trimming process in our demultiplexing pipeline has been updated to ensure c samshee ensures the integrity of Illumina v2 Sample Sheets by allowing users to apply custom validation rules. The module can be used together with the parameter `--json_schema_validator`, which accepts a JSON schema validation string; the `--name_schema_validator`, which accepts a schema name string; and the `--file_schema_validator` which accepts a JSON schema validation file. Users can specify additional validation rules beyond the default ones provided by the tool using all or any of these parameters, this enables tailored validation of Sample Sheets to meet specific requirements or standards relevant to your sequencing workflow. For more information refer to [Samshee on GitHub](https://github.com/lit-regensburg/samshee). -:::note +> [!NOTE] -- Samshee assumes all illumina samplesheets are v2. If working with samples that have an illumina samplesheet v1 set the parameter `--v1_schema` to true. -- When indicating `--json_schema_validator` or `--name_schema_validator`, please note that it expects a JSON reference value in string format. For example: +> - Samshee assumes all illumina samplesheets are v2. If working with samples that have an illumina samplesheet v1 set the parameter `--v1_schema` to true. +> - When indicating `--json_schema_validator` or `--name_schema_validator`, please note that it expects a JSON reference value in string format. For example: -```bash ---json_schema_validator '{"required": ["Data"]}' ---name_schema_validator '{"$ref": "urn:samshee:illuminav2/v1"}' -``` - -::: +> ```bash +> --json_schema_validator '{"required": ["Data"]}' +> --name_schema_validator '{"$ref": "urn:samshee:illuminav2/v1"}' +> ``` ### Updating the pipeline @@ -162,15 +159,13 @@ This version number will be logged in reports when you run the pipeline, so that To further assist in reproducbility, you can use share and re-use [parameter files](#running-the-pipeline) to repeat pipeline runs with the same settings without having to write out a command with every single parameter. -:::tip -If you wish to share such profile (such as upload as supplementary material for academic publications), make sure to NOT include cluster specific paths to files, nor institutional specific profiles. -::: +> [!TIP] +> If you wish to share such profile (such as upload as supplementary material for academic publications), make sure to NOT include cluster specific paths to files, nor institutional specific profiles. ## Core Nextflow arguments -:::note -These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen). -::: +> [!NOTE] +> These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen). ### `-profile` @@ -178,9 +173,8 @@ Use this parameter to choose a configuration profile. Profiles can give configur Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Apptainer, Conda) - see below. -:::info -We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. -::: +> [!INFO] +> We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. The pipeline also dynamically loads configurations from [https://github.com/nf-core/configs](https://github.com/nf-core/configs) when it runs, making multiple config profiles for various institutional clusters available at run time. For more information and to see if your system is available in these configs please see the [nf-core/configs documentation](https://github.com/nf-core/configs#documentation). From 4d305f13801b53102d1fc1bb56b12406bec9e788 Mon Sep 17 00:00:00 2001 From: nschcolnicov Date: Thu, 10 Oct 2024 18:41:12 +0000 Subject: [PATCH 04/11] Docs --- docs/usage.md | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index cb9888eb..8ce08f22 100755 --- a/docs/usage.md +++ b/docs/usage.md @@ -132,10 +132,9 @@ The trimming process in our demultiplexing pipeline has been updated to ensure c samshee ensures the integrity of Illumina v2 Sample Sheets by allowing users to apply custom validation rules. The module can be used together with the parameter `--json_schema_validator`, which accepts a JSON schema validation string; the `--name_schema_validator`, which accepts a schema name string; and the `--file_schema_validator` which accepts a JSON schema validation file. Users can specify additional validation rules beyond the default ones provided by the tool using all or any of these parameters, this enables tailored validation of Sample Sheets to meet specific requirements or standards relevant to your sequencing workflow. For more information refer to [Samshee on GitHub](https://github.com/lit-regensburg/samshee). > [!NOTE] - -> - Samshee assumes all illumina samplesheets are v2. If working with samples that have an illumina samplesheet v1 set the parameter `--v1_schema` to true. -> - When indicating `--json_schema_validator` or `--name_schema_validator`, please note that it expects a JSON reference value in string format. For example: - +> Samshee assumes all illumina samplesheets are v2. If working with samples that have an illumina samplesheet v1 set the parameter `--v1_schema` to true. +> When indicating `--json_schema_validator` or `--name_schema_validator`, please note that it expects a JSON reference value in string format. For example: +> > ```bash > --json_schema_validator '{"required": ["Data"]}' > --name_schema_validator '{"$ref": "urn:samshee:illuminav2/v1"}' @@ -173,7 +172,7 @@ Use this parameter to choose a configuration profile. Profiles can give configur Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Apptainer, Conda) - see below. -> [!INFO] +> [!NOTE] > We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. The pipeline also dynamically loads configurations from [https://github.com/nf-core/configs](https://github.com/nf-core/configs) when it runs, making multiple config profiles for various institutional clusters available at run time. For more information and to see if your system is available in these configs please see the [nf-core/configs documentation](https://github.com/nf-core/configs#documentation). From 6e615b96634f5cf39ea0da1562c838815a6f42fa Mon Sep 17 00:00:00 2001 From: nschcolnicov Date: Tue, 15 Oct 2024 11:10:31 +0000 Subject: [PATCH 05/11] pulled from modules --- modules/nf-core/samshee/meta.yml | 44 +++++++++++----- modules/nf-core/samshee/tests/main.nf.test | 14 +++-- .../nf-core/samshee/tests/main.nf.test.snap | 51 ++++++++++++++++--- 3 files changed, 85 insertions(+), 24 deletions(-) diff --git a/modules/nf-core/samshee/meta.yml b/modules/nf-core/samshee/meta.yml index 145ddd24..76779cf1 100644 --- a/modules/nf-core/samshee/meta.yml +++ b/modules/nf-core/samshee/meta.yml @@ -12,21 +12,41 @@ tools: documentation: https://github.com/lit-regensburg/samshee/blob/main/README.md tool_dev_url: https://github.com/lit-regensburg/samshee licence: [MIT license] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', lane:1 ] - - samplesheet: - type: file - description: "illumina v2 samplesheet" - pattern: "*.{csv}" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', lane:1 ] + - samplesheet: + type: file + description: "illumina v2 samplesheet" + pattern: "*.{csv}" + - - json_schema_validator: + type: string + description: "String in JSON format used additional samplesheet validation settings" + - - name_schema_validator: + type: string + description: "Schema name used additional samplesheet validation settings" + - - file_schema_validator: + type: string + description: "JSON file used additional samplesheet validation settings" output: + - samplesheet: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', lane:1 ] + - "*_formatted.csv": + type: file + description: "illumina v2 samplesheet" - versions: - type: file - description: File containing software version - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software version + pattern: "versions.yml" authors: - "@nschcolnicov" maintainers: diff --git a/modules/nf-core/samshee/tests/main.nf.test b/modules/nf-core/samshee/tests/main.nf.test index da71b3d4..20da1d7d 100644 --- a/modules/nf-core/samshee/tests/main.nf.test +++ b/modules/nf-core/samshee/tests/main.nf.test @@ -2,21 +2,23 @@ nextflow_process { name "Test Process samshee" script "../main.nf" - config "./nextflow.config" process "SAMSHEE" - tag "modules" + tag "modules_nfcore" + tag "samshee" test("test samplesheet_v1") { when { params { - v1_schema= true + v1_schema = true } process { """ input[0] = [ [ id: 'test', lane:1 ], file("https://raw.githubusercontent.com/nf-core/test-datasets/demultiplex/testdata/miseq_35147139/miseq_35147139_samplesheet.csv", checkIfExists: true) ] - input[1] = [] + input[1] = '{"required": ["Data"]}' + input[2] = [] + input[3] = [] """ } } @@ -34,6 +36,8 @@ nextflow_process { """ input[0] = [ [ id: 'test', lane:1 ], file("https://raw.githubusercontent.com/nf-core/test-datasets/demultiplex/testdata/NextSeq2000/SampleSheet.csv", checkIfExists: true) ] input[1] = [] + input[2] = '{"\$ref": "urn:samshee:illuminav2/v1"}' + input[3] = [] """ } } @@ -54,6 +58,8 @@ nextflow_process { """ input[0] = [ [ id: 'test', lane:1 ], file("https://raw.githubusercontent.com/nf-core/test-datasets/demultiplex/testdata/NextSeq2000/SampleSheet.csv", checkIfExists: true), [] ] input[1] = [] + input[2] = [] + input[3] = [] """ } } diff --git a/modules/nf-core/samshee/tests/main.nf.test.snap b/modules/nf-core/samshee/tests/main.nf.test.snap index a6c854eb..97900d59 100644 --- a/modules/nf-core/samshee/tests/main.nf.test.snap +++ b/modules/nf-core/samshee/tests/main.nf.test.snap @@ -1,5 +1,5 @@ { - "test samplesheet": { + "stub": { "content": [ { "0": [ @@ -8,7 +8,7 @@ "id": "test", "lane": 1 }, - "SampleSheet_formatted.csv:md5,9a1cac9e958256a17c7f43a8e15cb697" + "SampleSheet_formatted.csv:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "1": [ @@ -20,7 +20,7 @@ "id": "test", "lane": 1 }, - "SampleSheet_formatted.csv:md5,9a1cac9e958256a17c7f43a8e15cb697" + "SampleSheet_formatted.csv:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], "versions": [ @@ -32,9 +32,9 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-10-07T19:55:47.950781" + "timestamp": "2024-10-10T19:18:20.176809754" }, - "stub": { + "test samplesheet_v1": { "content": [ { "0": [ @@ -43,7 +43,7 @@ "id": "test", "lane": 1 }, - "SampleSheet_formatted.csv:md5,d41d8cd98f00b204e9800998ecf8427e" + "miseq_35147139_samplesheet_formatted.csv:md5,2a6ee5b13242aeefdeeaa98671f1ee26" ] ], "1": [ @@ -55,7 +55,42 @@ "id": "test", "lane": 1 }, - "SampleSheet_formatted.csv:md5,d41d8cd98f00b204e9800998ecf8427e" + "miseq_35147139_samplesheet_formatted.csv:md5,2a6ee5b13242aeefdeeaa98671f1ee26" + ] + ], + "versions": [ + "versions.yml:md5,8fbb7f500f23ab9ecff6ad5b9f15f14a" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-10T19:17:58.422458873" + }, + "test samplesheet_v2": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "lane": 1 + }, + "SampleSheet_formatted.csv:md5,9a1cac9e958256a17c7f43a8e15cb697" + ] + ], + "1": [ + "versions.yml:md5,8fbb7f500f23ab9ecff6ad5b9f15f14a" + ], + "samplesheet": [ + [ + { + "id": "test", + "lane": 1 + }, + "SampleSheet_formatted.csv:md5,9a1cac9e958256a17c7f43a8e15cb697" ] ], "versions": [ @@ -67,6 +102,6 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-10-07T19:56:05.713861" + "timestamp": "2024-10-10T19:18:08.736465443" } } \ No newline at end of file From 9b33143f542639ceff80e93ded8dee9ac3672523 Mon Sep 17 00:00:00 2001 From: nschcolnicov Date: Tue, 15 Oct 2024 12:14:59 +0000 Subject: [PATCH 06/11] PR comments on module --- conf/modules.config | 3 +++ conf/test_full.config | 2 +- modules/nf-core/samshee/main.nf | 20 +++++++++----------- modules/nf-core/samshee/meta.yml | 8 +------- workflows/demultiplex.nf | 4 ---- 5 files changed, 14 insertions(+), 23 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 31e22fb6..2076d6dd 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -234,6 +234,9 @@ process { } withName: SAMSHEE { + ext.json_schema_validator = { params.json_schema_validator ? "--schema '${params.json_schema_validator}'" : "" } + ext.name_schema_validator = { params.name_schema_validator ? "--schema '${params.name_schema_validator}'" : "" } + ext.v1 = { params.v1_schema ? "--output-format sectioned" : "" } errorStrategy = "terminate" publishDir = [ path: { "${params.outdir}/" }, diff --git a/conf/test_full.config b/conf/test_full.config index 40209e9a..d591fdc4 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -17,5 +17,5 @@ params { // Input data input = 'https://raw.githubusercontent.com/nf-core/test-datasets/demultiplex/samplesheet/1.3.0/samplesheet_full.csv' demultiplexer = 'bcl2fastq' - skip_tools = 'samshee' + v1_schema = true } diff --git a/modules/nf-core/samshee/main.nf b/modules/nf-core/samshee/main.nf index 199a4fce..bd66f8f5 100644 --- a/modules/nf-core/samshee/main.nf +++ b/modules/nf-core/samshee/main.nf @@ -4,14 +4,12 @@ process SAMSHEE { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'docker://community.wave.seqera.io/library/pip_samshee:733e11f3377fc2e3' : - 'community.wave.seqera.io/library/pip_samshee:733e11f3377fc2e3' }" + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/65/659cdc3068a6fbce17ccb199bb3afc8600c65940743c1a0214b3bf0eed4df1a3/data' : + 'community.wave.seqera.io/library/pip_samshee:9b655e3c18eee356' }" input: tuple val(meta), path(samplesheet) - val(json_schema_validator) // optional - val(name_schema_validator) // optional - path(file_schema_validator) // optional + path(file_schema_validator) output: tuple val(meta), path("*_formatted.csv"), emit: samplesheet @@ -21,18 +19,18 @@ process SAMSHEE { task.ext.when == null || task.ext.when script: - def arg_json_schema_validator = json_schema_validator ? "--schema '${json_schema_validator}'" : "" - def arg_name_schema_validator = name_schema_validator ? "--schema '${name_schema_validator}'" : "" - def arg_file_schema_validator = file_schema_validator ? "--schema '{\"\$ref\": \"file:${file_schema_validator}\"}'" : "" - def arg_v1_schema = params.v1_schema ? "--output-format sectioned" : "" - def args = task.ext.args ?: "" + def arg_file_schema_validator = file_schema_validator ? "--schema '{\"\$ref\": \"file:${file_schema_validator}\"}'" : "" + def arg_json_schema_validator = task.ext.json_schema_validator ?: "" + def arg_name_schema_validator = task.ext.name_schema_validator ?: "" + def arg_v1 = task.ext.v1 ?: "" + def args = task.ext.args ?: "" """ # Run validation command and capture output python -m samshee $samplesheet \ $arg_json_schema_validator \ $arg_name_schema_validator \ $arg_file_schema_validator \ - $arg_v1_schema \ + $arg_v1 \ $args \ > ${samplesheet.baseName}_formatted.csv diff --git a/modules/nf-core/samshee/meta.yml b/modules/nf-core/samshee/meta.yml index 76779cf1..50789dfb 100644 --- a/modules/nf-core/samshee/meta.yml +++ b/modules/nf-core/samshee/meta.yml @@ -23,15 +23,9 @@ input: type: file description: "illumina v2 samplesheet" pattern: "*.{csv}" - - - json_schema_validator: - type: string - description: "String in JSON format used additional samplesheet validation settings" - - - name_schema_validator: - type: string - description: "Schema name used additional samplesheet validation settings" - - file_schema_validator: type: string - description: "JSON file used additional samplesheet validation settings" + description: "Optional JSON file used additional samplesheet validation settings" output: - samplesheet: - meta: diff --git a/workflows/demultiplex.nf b/workflows/demultiplex.nf index fa9886cf..b2f41f33 100644 --- a/workflows/demultiplex.nf +++ b/workflows/demultiplex.nf @@ -63,8 +63,6 @@ workflow DEMULTIPLEX { ch_multiqc_files = Channel.empty() ch_multiqc_reports = Channel.empty() checkqc_config = params.checkqc_config ? Channel.fromPath(params.checkqc_config, checkIfExists: true) : [] // file checkqc_config.yaml - ch_json_schema_validator = params.json_schema_validator ? Channel.value(params.json_schema_validator) : [] // string schema in json format - ch_name_schema_validator = params.name_schema_validator ? Channel.value(params.name_schema_validator) : [] // string schema name ch_file_schema_validator = params.file_schema_validator ? Channel.fromPath(params.file_schema_validator, checkIfExists: true) : [] // file schema.json // Remove adapter from Illumina samplesheet to avoid adapter trimming in demultiplexer tools @@ -94,8 +92,6 @@ workflow DEMULTIPLEX { if (!("samshee" in skip_tools) && (params.demultiplexer in ["bcl2fastq", "bclconvert", "mkfastq"])){ SAMSHEE ( ch_samplesheet.map{ meta, samplesheet, flowcell, lane -> [meta,samplesheet] }, - ch_json_schema_validator, - ch_name_schema_validator, ch_file_schema_validator ) ch_versions = ch_versions.mix(SAMSHEE.out.versions) From 1c87181af0b288d89c8668961c65f51bc0628936 Mon Sep 17 00:00:00 2001 From: nschcolnicov Date: Tue, 15 Oct 2024 14:19:24 +0000 Subject: [PATCH 07/11] Update samshee --- conf/modules.config | 5 +++++ modules/nf-core/samshee/main.nf | 9 +-------- modules/nf-core/samshee/tests/main.nf.test | 9 ++------- .../nf-core/samshee/tests/main.nf.test.snap | 18 +++++++++--------- modules/nf-core/samshee/tests/nextflow.config | 5 +++++ 5 files changed, 22 insertions(+), 24 deletions(-) create mode 100644 modules/nf-core/samshee/tests/nextflow.config diff --git a/conf/modules.config b/conf/modules.config index 2076d6dd..cfb3ca51 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -237,6 +237,11 @@ process { ext.json_schema_validator = { params.json_schema_validator ? "--schema '${params.json_schema_validator}'" : "" } ext.name_schema_validator = { params.name_schema_validator ? "--schema '${params.name_schema_validator}'" : "" } ext.v1 = { params.v1_schema ? "--output-format sectioned" : "" } + ext.args = [ + params.json_schema_validator ? "--schema '${params.json_schema_validator}'" : "", + params.name_schema_validator ? "--schema '${params.name_schema_validator}'" : "", + params.v1_schema ? "--output-format sectioned" : "", + ].join(" ").trim() errorStrategy = "terminate" publishDir = [ path: { "${params.outdir}/" }, diff --git a/modules/nf-core/samshee/main.nf b/modules/nf-core/samshee/main.nf index bd66f8f5..70bbd690 100644 --- a/modules/nf-core/samshee/main.nf +++ b/modules/nf-core/samshee/main.nf @@ -20,17 +20,10 @@ process SAMSHEE { script: def arg_file_schema_validator = file_schema_validator ? "--schema '{\"\$ref\": \"file:${file_schema_validator}\"}'" : "" - def arg_json_schema_validator = task.ext.json_schema_validator ?: "" - def arg_name_schema_validator = task.ext.name_schema_validator ?: "" - def arg_v1 = task.ext.v1 ?: "" - def args = task.ext.args ?: "" + def args = task.ext.args ?: "" """ # Run validation command and capture output python -m samshee $samplesheet \ - $arg_json_schema_validator \ - $arg_name_schema_validator \ - $arg_file_schema_validator \ - $arg_v1 \ $args \ > ${samplesheet.baseName}_formatted.csv diff --git a/modules/nf-core/samshee/tests/main.nf.test b/modules/nf-core/samshee/tests/main.nf.test index 20da1d7d..62c8994e 100644 --- a/modules/nf-core/samshee/tests/main.nf.test +++ b/modules/nf-core/samshee/tests/main.nf.test @@ -3,6 +3,7 @@ nextflow_process { name "Test Process samshee" script "../main.nf" process "SAMSHEE" + config "./nextflow.config" tag "modules" tag "modules_nfcore" tag "samshee" @@ -16,9 +17,7 @@ nextflow_process { process { """ input[0] = [ [ id: 'test', lane:1 ], file("https://raw.githubusercontent.com/nf-core/test-datasets/demultiplex/testdata/miseq_35147139/miseq_35147139_samplesheet.csv", checkIfExists: true) ] - input[1] = '{"required": ["Data"]}' - input[2] = [] - input[3] = [] + input[1] = [] """ } } @@ -36,8 +35,6 @@ nextflow_process { """ input[0] = [ [ id: 'test', lane:1 ], file("https://raw.githubusercontent.com/nf-core/test-datasets/demultiplex/testdata/NextSeq2000/SampleSheet.csv", checkIfExists: true) ] input[1] = [] - input[2] = '{"\$ref": "urn:samshee:illuminav2/v1"}' - input[3] = [] """ } } @@ -58,8 +55,6 @@ nextflow_process { """ input[0] = [ [ id: 'test', lane:1 ], file("https://raw.githubusercontent.com/nf-core/test-datasets/demultiplex/testdata/NextSeq2000/SampleSheet.csv", checkIfExists: true), [] ] input[1] = [] - input[2] = [] - input[3] = [] """ } } diff --git a/modules/nf-core/samshee/tests/main.nf.test.snap b/modules/nf-core/samshee/tests/main.nf.test.snap index 97900d59..e00d8af0 100644 --- a/modules/nf-core/samshee/tests/main.nf.test.snap +++ b/modules/nf-core/samshee/tests/main.nf.test.snap @@ -12,7 +12,7 @@ ] ], "1": [ - "versions.yml:md5,8fbb7f500f23ab9ecff6ad5b9f15f14a" + "versions.yml:md5,77af0194d386117bf52aaabdf350a976" ], "samplesheet": [ [ @@ -24,7 +24,7 @@ ] ], "versions": [ - "versions.yml:md5,8fbb7f500f23ab9ecff6ad5b9f15f14a" + "versions.yml:md5,77af0194d386117bf52aaabdf350a976" ] } ], @@ -32,7 +32,7 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-10-10T19:18:20.176809754" + "timestamp": "2024-10-15T12:33:58.766224702" }, "test samplesheet_v1": { "content": [ @@ -47,7 +47,7 @@ ] ], "1": [ - "versions.yml:md5,8fbb7f500f23ab9ecff6ad5b9f15f14a" + "versions.yml:md5,77af0194d386117bf52aaabdf350a976" ], "samplesheet": [ [ @@ -59,7 +59,7 @@ ] ], "versions": [ - "versions.yml:md5,8fbb7f500f23ab9ecff6ad5b9f15f14a" + "versions.yml:md5,77af0194d386117bf52aaabdf350a976" ] } ], @@ -67,7 +67,7 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-10-10T19:17:58.422458873" + "timestamp": "2024-10-15T12:30:57.470221349" }, "test samplesheet_v2": { "content": [ @@ -82,7 +82,7 @@ ] ], "1": [ - "versions.yml:md5,8fbb7f500f23ab9ecff6ad5b9f15f14a" + "versions.yml:md5,77af0194d386117bf52aaabdf350a976" ], "samplesheet": [ [ @@ -94,7 +94,7 @@ ] ], "versions": [ - "versions.yml:md5,8fbb7f500f23ab9ecff6ad5b9f15f14a" + "versions.yml:md5,77af0194d386117bf52aaabdf350a976" ] } ], @@ -102,6 +102,6 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-10-10T19:18:08.736465443" + "timestamp": "2024-10-15T12:31:16.99133356" } } \ No newline at end of file diff --git a/modules/nf-core/samshee/tests/nextflow.config b/modules/nf-core/samshee/tests/nextflow.config new file mode 100644 index 00000000..aa0e36ad --- /dev/null +++ b/modules/nf-core/samshee/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: SAMSHEE { + ext.args = { params.v1_schema ? "--output-format sectioned" : "" } + } +} From 978e3feec97e6bf610e5b62ef860a9a4c2956c79 Mon Sep 17 00:00:00 2001 From: nschcolnicov Date: Tue, 15 Oct 2024 14:24:34 +0000 Subject: [PATCH 08/11] revert unwanted changes --- .../nf-core/utils_nfvalidation_plugin/main.nf | 10 +++++----- .../utils_nfvalidation_plugin/meta.yml | 2 +- .../tests/main.nf.test | 20 +++++++++---------- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf b/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf index 4f41c9bd..2585b65d 100644 --- a/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf @@ -26,11 +26,11 @@ workflow UTILS_NFVALIDATION_PLUGIN { pre_help_text // string: string to be printed before help text and summary log post_help_text // string: string to be printed after help text and summary log validate_params // boolean: validate parameters - file_schema_validatorname // path: JSON schema file, null to use default value + schema_filename // path: JSON schema file, null to use default value main: - log.debug "Using schema file: ${file_schema_validatorname}" + log.debug "Using schema file: ${schema_filename}" // Default values for strings pre_help_text = pre_help_text ?: '' @@ -41,20 +41,20 @@ workflow UTILS_NFVALIDATION_PLUGIN { // Print help message if needed // if (print_help) { - log.info pre_help_text + paramsHelp(workflow_command, parameters_schema: file_schema_validatorname) + post_help_text + log.info pre_help_text + paramsHelp(workflow_command, parameters_schema: schema_filename) + post_help_text System.exit(0) } // // Print parameter summary to stdout // - log.info pre_help_text + paramsSummaryLog(workflow, parameters_schema: file_schema_validatorname) + post_help_text + log.info pre_help_text + paramsSummaryLog(workflow, parameters_schema: schema_filename) + post_help_text // // Validate parameters relative to the parameter JSON schema // if (validate_params){ - validateParameters(parameters_schema: file_schema_validatorname) + validateParameters(parameters_schema: schema_filename) } emit: diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml b/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml index b71432d0..3d4a6b04 100644 --- a/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml @@ -28,7 +28,7 @@ input: type: boolean description: | Validate the parameters and error if invalid. - - file_schema_validatorname: + - schema_filename: type: string description: | The filename of the schema to validate against. diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test index 9cb32fdf..5784a33f 100644 --- a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test @@ -26,14 +26,14 @@ nextflow_workflow { pre_help_text = null post_help_text = null validate_params = false - file_schema_validatorname = "$moduleTestDir/nextflow_schema.json" + schema_filename = "$moduleTestDir/nextflow_schema.json" input[0] = help input[1] = workflow_command input[2] = pre_help_text input[3] = post_help_text input[4] = validate_params - input[5] = file_schema_validatorname + input[5] = schema_filename """ } } @@ -61,14 +61,14 @@ nextflow_workflow { pre_help_text = null post_help_text = null validate_params = false - file_schema_validatorname = "$moduleTestDir/nextflow_schema.json" + schema_filename = "$moduleTestDir/nextflow_schema.json" input[0] = help input[1] = workflow_command input[2] = pre_help_text input[3] = post_help_text input[4] = validate_params - input[5] = file_schema_validatorname + input[5] = schema_filename """ } } @@ -98,14 +98,14 @@ nextflow_workflow { pre_help_text = null post_help_text = null validate_params = false - file_schema_validatorname = "$moduleTestDir/nextflow_schema.json" + schema_filename = "$moduleTestDir/nextflow_schema.json" input[0] = help input[1] = workflow_command input[2] = pre_help_text input[3] = post_help_text input[4] = validate_params - input[5] = file_schema_validatorname + input[5] = schema_filename """ } } @@ -137,14 +137,14 @@ nextflow_workflow { pre_help_text = "pre-help-text" post_help_text = "post-help-text" validate_params = false - file_schema_validatorname = "$moduleTestDir/nextflow_schema.json" + schema_filename = "$moduleTestDir/nextflow_schema.json" input[0] = help input[1] = workflow_command input[2] = pre_help_text input[3] = post_help_text input[4] = validate_params - input[5] = file_schema_validatorname + input[5] = schema_filename """ } } @@ -178,14 +178,14 @@ nextflow_workflow { pre_help_text = null post_help_text = null validate_params = true - file_schema_validatorname = "$moduleTestDir/nextflow_schema.json" + schema_filename = "$moduleTestDir/nextflow_schema.json" input[0] = help input[1] = workflow_command input[2] = pre_help_text input[3] = post_help_text input[4] = validate_params - input[5] = file_schema_validatorname + input[5] = schema_filename """ } } From 24156659d894a6ce4c8f4383f48560f45e5f62d7 Mon Sep 17 00:00:00 2001 From: nschcolnicov Date: Wed, 16 Oct 2024 19:43:57 +0000 Subject: [PATCH 09/11] Installed samshee from nf-core --- modules.json | 5 +++++ modules/nf-core/samshee/environment.yml | 6 ++--- modules/nf-core/samshee/main.nf | 1 + modules/nf-core/samshee/tests/main.nf.test | 22 ++++++++++++++++--- .../nf-core/samshee/tests/main.nf.test.snap | 6 ++--- modules/nf-core/samshee/tests/nextflow.config | 6 ++++- 6 files changed, 35 insertions(+), 11 deletions(-) diff --git a/modules.json b/modules.json index 91bf80bf..ae24885e 100644 --- a/modules.json +++ b/modules.json @@ -65,6 +65,11 @@ "git_sha": "b8d36829fa84b6e404364abff787e8b07f6d058c", "installed_by": ["modules"] }, + "samshee": { + "branch": "master", + "git_sha": "3c464e75051db485c1b37ab9f1ea2182fb3d3533", + "installed_by": ["modules"] + }, "seqtk/sample": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", diff --git a/modules/nf-core/samshee/environment.yml b/modules/nf-core/samshee/environment.yml index b46b46a6..35a8e2e7 100644 --- a/modules/nf-core/samshee/environment.yml +++ b/modules/nf-core/samshee/environment.yml @@ -2,7 +2,5 @@ channels: - conda-forge - bioconda dependencies: - - python>=3.9 - - pip - - pip: # FIXME https://github.com/nf-core/modules/issues/5814 - - samshee==0.2.1 + - bioconda::samshee=0.2.1 + - python=3.13.0 diff --git a/modules/nf-core/samshee/main.nf b/modules/nf-core/samshee/main.nf index 70bbd690..6d7ba2e6 100644 --- a/modules/nf-core/samshee/main.nf +++ b/modules/nf-core/samshee/main.nf @@ -25,6 +25,7 @@ process SAMSHEE { # Run validation command and capture output python -m samshee $samplesheet \ $args \ + $arg_file_schema_validator \ > ${samplesheet.baseName}_formatted.csv cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/samshee/tests/main.nf.test b/modules/nf-core/samshee/tests/main.nf.test index 62c8994e..5bec682f 100644 --- a/modules/nf-core/samshee/tests/main.nf.test +++ b/modules/nf-core/samshee/tests/main.nf.test @@ -12,12 +12,18 @@ nextflow_process { when { params { - v1_schema = true + v1_schema = true + json_schema_validator = '{"required": ["Data"]}' + name_schema_validator = null } process { """ input[0] = [ [ id: 'test', lane:1 ], file("https://raw.githubusercontent.com/nf-core/test-datasets/demultiplex/testdata/miseq_35147139/miseq_35147139_samplesheet.csv", checkIfExists: true) ] - input[1] = [] + input[1] = file("schema.json") + new File("schema.json").text = '''{ + "\$schema": "https://json-schema.org/draft/2020-12/schema", + "required": ["Settings"] + }''' """ } } @@ -31,6 +37,11 @@ nextflow_process { test("test samplesheet_v2") { when { + params { + v1_schema = null + json_schema_validator = null + name_schema_validator = '{"$ref": "urn:samshee:illuminav2/v1"}' + } process { """ input[0] = [ [ id: 'test', lane:1 ], file("https://raw.githubusercontent.com/nf-core/test-datasets/demultiplex/testdata/NextSeq2000/SampleSheet.csv", checkIfExists: true) ] @@ -51,9 +62,14 @@ nextflow_process { options "-stub" when { + params { + v1_schema = null + json_schema_validator = null + name_schema_validator = null + } process { """ - input[0] = [ [ id: 'test', lane:1 ], file("https://raw.githubusercontent.com/nf-core/test-datasets/demultiplex/testdata/NextSeq2000/SampleSheet.csv", checkIfExists: true), [] ] + input[0] = [ [ id: 'test', lane:1 ], file("https://raw.githubusercontent.com/nf-core/test-datasets/demultiplex/testdata/NextSeq2000/SampleSheet.csv", checkIfExists: true) ] input[1] = [] """ } diff --git a/modules/nf-core/samshee/tests/main.nf.test.snap b/modules/nf-core/samshee/tests/main.nf.test.snap index e00d8af0..b3729eba 100644 --- a/modules/nf-core/samshee/tests/main.nf.test.snap +++ b/modules/nf-core/samshee/tests/main.nf.test.snap @@ -32,7 +32,7 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-10-15T12:33:58.766224702" + "timestamp": "2024-10-16T15:25:40.722007136" }, "test samplesheet_v1": { "content": [ @@ -67,7 +67,7 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-10-15T12:30:57.470221349" + "timestamp": "2024-10-16T15:25:02.353128191" }, "test samplesheet_v2": { "content": [ @@ -102,6 +102,6 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-10-15T12:31:16.99133356" + "timestamp": "2024-10-16T15:25:24.540910786" } } \ No newline at end of file diff --git a/modules/nf-core/samshee/tests/nextflow.config b/modules/nf-core/samshee/tests/nextflow.config index aa0e36ad..ecf1ff66 100644 --- a/modules/nf-core/samshee/tests/nextflow.config +++ b/modules/nf-core/samshee/tests/nextflow.config @@ -1,5 +1,9 @@ process { withName: SAMSHEE { - ext.args = { params.v1_schema ? "--output-format sectioned" : "" } + ext.args = [ + params.json_schema_validator ? "--schema '${params.json_schema_validator}'" : "", + params.name_schema_validator ? "--schema '${params.name_schema_validator}'" : "", + params.v1_schema ? "--output-format sectioned" : "", + ].join(" ").trim() } } From 9adbcc2366640c36a0d81b3b76b49166cbf1360b Mon Sep 17 00:00:00 2001 From: nschcolnicov Date: Wed, 16 Oct 2024 20:00:40 +0000 Subject: [PATCH 10/11] Cleanup samshee modules.config --- conf/modules.config | 3 --- 1 file changed, 3 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index cfb3ca51..b65ec2a3 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -234,9 +234,6 @@ process { } withName: SAMSHEE { - ext.json_schema_validator = { params.json_schema_validator ? "--schema '${params.json_schema_validator}'" : "" } - ext.name_schema_validator = { params.name_schema_validator ? "--schema '${params.name_schema_validator}'" : "" } - ext.v1 = { params.v1_schema ? "--output-format sectioned" : "" } ext.args = [ params.json_schema_validator ? "--schema '${params.json_schema_validator}'" : "", params.name_schema_validator ? "--schema '${params.name_schema_validator}'" : "", From a12681f71af00c1c2bb945873846c4a84666f9e7 Mon Sep 17 00:00:00 2001 From: nschcolnicov Date: Mon, 21 Oct 2024 16:50:21 +0000 Subject: [PATCH 11/11] Linting fix and changelog update --- CHANGELOG.md | 2 +- nextflow_schema.json | 6 ------ 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cc8099af..12fc7b06 100755 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Changed` +- [#275](https://github.com/nf-core/demultiplex/pull/275) Update samshee module from nf-core. - [#276](https://github.com/nf-core/demultiplex/pull/276) Template update for nf-core/tools v3.0.2 ### `Fixed` @@ -43,7 +44,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [#225](https://github.com/nf-core/demultiplex/pull/225) Added test profile for multi-lane samples, updated handling of such samples and adapter trimming. - [#234](https://github.com/nf-core/demultiplex/pull/234) Added module for samplesheet validation. - [#236](https://github.com/nf-core/demultiplex/pull/236) Add samplesheet generation. -- [#275](https://github.com/nf-core/demultiplex/pull/275) Update samshee module from nf-core. ### `Changed` diff --git a/nextflow_schema.json b/nextflow_schema.json index ef00b65e..679f7d04 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -47,12 +47,6 @@ "v1_schema": { "type": "boolean", "description": "Whether or not illumina samplesheet is v1 " - }, - "downstream_pipeline": { - "type": "string", - "description": "Name of downstream nf-core pipeline (one of: rnaseq, atacseq, taxprofiler or default). Used to produce the input samplesheet for that pipeline.", - "default": "default", - "enum": ["rnaseq", "atacseq", "taxprofiler", "default"] } } },