From c53abeedd2e4831f43dbb5e9aa2f5255dfbb516a Mon Sep 17 00:00:00 2001 From: nschcolnicov Date: Wed, 30 Oct 2024 16:05:51 +0000 Subject: [PATCH] Fixes in samplesheet generator script and snap updates --- conf/modules.config | 9 +++ modules/local/fastq_to_samplesheet/main.nf | 78 +++++++++++++++------- tests/bases2fastq.nf.test | 5 +- tests/bases2fastq.nf.test.snap | 34 +++++++++- tests/bcl2fastq.nf.test | 5 +- tests/bcl2fastq.nf.test.snap | 34 +++++++++- tests/bclconvert.nf.test | 5 +- tests/bclconvert.nf.test.snap | 34 +++++++++- tests/bclconvert_mini.nf.test | 5 +- tests/bclconvert_mini.nf.test.snap | 41 +++++++++--- tests/fqtk.nf.test | 5 +- tests/fqtk.nf.test.snap | 60 +++++++++-------- tests/kraken.nf.test | 5 +- tests/kraken.nf.test.snap | 34 +++++++++- tests/lib/UTILS.groovy | 54 +++++++++++++++ tests/mkfastq.nf.test | 5 +- tests/mkfastq.nf.test.snap | 37 ++++++++-- tests/sgdemux.nf.test | 5 +- tests/sgdemux.nf.test.snap | 60 +++++++++-------- tests/skip_tools.nf.test | 5 +- tests/skip_tools.nf.test.snap | 50 +++++++++----- tests/test_pe.nf.test | 5 +- tests/test_pe.nf.test.snap | 34 +++++++++- workflows/demultiplex.nf | 25 +------ 24 files changed, 477 insertions(+), 157 deletions(-) create mode 100644 tests/lib/UTILS.groovy diff --git a/conf/modules.config b/conf/modules.config index de264c67..7dc14397 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -215,6 +215,7 @@ process { ] ] } + withName: 'MULTIQC' { cache = false ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } @@ -231,6 +232,7 @@ process { mode: params.publish_dir_mode ] } + withName: SAMSHEE { ext.args = [ params.json_schema_validator ? "--schema '${params.json_schema_validator}'" : "", @@ -244,5 +246,12 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } + + withName: 'FASTQ_TO_SAMPLESHEET*' { + publishDir = [ + path: { "${params.outdir}/samplesheet/" }, + mode: params.publish_dir_mode, + ] + } } diff --git a/modules/local/fastq_to_samplesheet/main.nf b/modules/local/fastq_to_samplesheet/main.nf index 5d6fb458..ef54bc65 100644 --- a/modules/local/fastq_to_samplesheet/main.nf +++ b/modules/local/fastq_to_samplesheet/main.nf @@ -5,7 +5,7 @@ process FASTQ_TO_SAMPLESHEET { memory 100.MB input: - val meta + val meta // Expecting a list of items val pipeline val strandedness @@ -13,39 +13,67 @@ process FASTQ_TO_SAMPLESHEET { tuple val(meta_clone), path("*samplesheet.csv"), emit: samplesheet exec: + // Initialize the samplesheet content + def samplesheetHeader = [] + def samplesheetRows = [] - // Calculate the dynamic output directory based on meta.lane - def outputDir = meta.publish_dir + // Sort meta by item.id + def sortedMeta = meta.sort { it.id } - // Add relevant fields to the map - def pipeline_map = [ - sample : meta.samplename, - fastq_1 : outputDir + '/' + file(meta.fastq_1).fileName - ] + // Collect all unique columns from all items and create rows + def allColumns = new LinkedHashSet() - // Add fastq_2 if it's a paired-end sample - if (!meta.single_end) { - pipeline_map.fastq_2 = outputDir + '/' + file(meta.fastq_2).fileName - } + sortedMeta.each { item -> + // Check for required keys in each item + if (!item.samplename) { + error "Item with id ${item.id} is missing the 'samplename' key." + } + if (!item.fastq_1) { + error "Item with id ${item.id} is missing the 'fastq_1' key." + } + + def pipeline_map = [:] // Initialize as an empty map + + // Prepare sample information + pipeline_map.sample = item.samplename + pipeline_map.fastq_1 = item.publish_dir + '/' + file(item.fastq_1).fileName + + // Add fastq_2 if it's a paired-end sample + if (!item.single_end && item.fastq_2) { + pipeline_map.fastq_2 = item.publish_dir + '/' + file(item.fastq_2).fileName ?: '' + } - // Add pipeline-specific entries - if (pipeline == 'rnaseq') { - pipeline_map << [ strandedness: strandedness ] - } else if (pipeline == 'atacseq') { - pipeline_map << [ replicate: 1 ] - } else if (pipeline == 'taxprofiler') { - pipeline_map << [ fasta: '' ] + // Add pipeline-specific entries + if (pipeline == 'rnaseq') { + pipeline_map.strandedness = strandedness ?: '' + } else if (pipeline == 'atacseq') { + pipeline_map.replicate = 1 + } else if (pipeline == 'taxprofiler') { + pipeline_map.fasta = '' + } + + // Add all keys to the set of unique columns + allColumns.addAll(pipeline_map.keySet()) + + // Prepare a row for the samplesheet, filling in missing values with empty strings + def rowValues = allColumns.collect { key -> + pipeline_map.containsKey(key) ? '"' + pipeline_map[key] + '"' : '""' + } + samplesheetRows << rowValues.join(",") } - // Create the samplesheet content - def samplesheet = pipeline_map.keySet().collect { '"' + it + '"' }.join(",") + '\n' - samplesheet += pipeline_map.values().collect { '"' + it + '"' }.join(",") + // Create a sorted list of headers + samplesheetHeader = allColumns.collect { '"' + it + '"' } + + // Create the complete samplesheet content + def samplesheet = samplesheetHeader.join(",") + '\n' + samplesheetRows.join("\n") // Write samplesheet to file - def samplesheet_file = task.workDir.resolve("${meta.id}.samplesheet.csv") + def samplesheet_file = task.workDir.resolve("${pipeline}_samplesheet.csv") samplesheet_file.text = samplesheet - meta_clone = meta.clone() - meta_clone.remove('publishdir') + // Clone the first item in meta for output + meta_clone = meta.first().clone() + meta_clone.remove('publish_dir') // Removing the publish_dir just in case, although output channel is not used by other process } diff --git a/tests/bases2fastq.nf.test b/tests/bases2fastq.nf.test index 71de1afb..e917f639 100644 --- a/tests/bases2fastq.nf.test +++ b/tests/bases2fastq.nf.test @@ -30,7 +30,10 @@ nextflow_pipeline { stable_name, // All files with stable contents stable_path - ).match() } + ).match() }, + { assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/atacseq_samplesheet.csv")).match("atacseq_samplesheet") }, + { assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/rnaseq_samplesheet.csv")).match("rnaseq_samplesheet") }, + { assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/taxprofiler_samplesheet.csv")).match("taxprofiler_samplesheet") } ) } } diff --git a/tests/bases2fastq.nf.test.snap b/tests/bases2fastq.nf.test.snap index 230a60e8..89f094eb 100644 --- a/tests/bases2fastq.nf.test.snap +++ b/tests/bases2fastq.nf.test.snap @@ -23,8 +23,6 @@ } }, [ - "fastq", - "fastq/DefaultSample.samplesheet.csv", "multiqc", "multiqc/multiqc_data", "multiqc/multiqc_data/fastp-insert-size-plot.txt", @@ -211,6 +209,36 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-10-29T17:48:00.306378134" + "timestamp": "2024-10-30T15:25:52.803280321" + }, + "rnaseq_samplesheet": { + "content": [ + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-30T15:25:55.693738747" + }, + "atacseq_samplesheet": { + "content": [ + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-30T15:25:54.286158931" + }, + "taxprofiler_samplesheet": { + "content": [ + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-30T15:25:57.12531458" } } \ No newline at end of file diff --git a/tests/bcl2fastq.nf.test b/tests/bcl2fastq.nf.test index ef69097b..cedbdedd 100644 --- a/tests/bcl2fastq.nf.test +++ b/tests/bcl2fastq.nf.test @@ -30,7 +30,10 @@ nextflow_pipeline { stable_name, // All files with stable contents stable_path - ).match() } + ).match() }, + { assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/atacseq_samplesheet.csv")).match("atacseq_samplesheet") }, + { assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/rnaseq_samplesheet.csv")).match("rnaseq_samplesheet") }, + { assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/taxprofiler_samplesheet.csv")).match("taxprofiler_samplesheet") } ) } } diff --git a/tests/bcl2fastq.nf.test.snap b/tests/bcl2fastq.nf.test.snap index cdda06ef..a83672b1 100644 --- a/tests/bcl2fastq.nf.test.snap +++ b/tests/bcl2fastq.nf.test.snap @@ -1,4 +1,24 @@ { + "rnaseq_samplesheet": { + "content": [ + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-30T15:27:18.557192574" + }, + "atacseq_samplesheet": { + "content": [ + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-30T15:27:13.631683141" + }, "Bcl2Fastq": { "content": [ 8, @@ -71,8 +91,6 @@ "220422_M11111_0222_000000000-K9H97/L001/Stats/DemuxSummaryF1L1.txt", "220422_M11111_0222_000000000-K9H97/L001/Stats/FastqSummaryF1L1.txt", "220422_M11111_0222_000000000-K9H97/L001/Stats/Stats.json", - "fastq", - "fastq/Sample1_S1_L001.samplesheet.csv", "multiqc", "multiqc/multiqc_data", "multiqc/multiqc_data/bcl2fastq-lane-stats-table.txt", @@ -241,6 +259,16 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-10-17T23:30:33.296858681" + "timestamp": "2024-10-30T15:27:08.66031858" + }, + "taxprofiler_samplesheet": { + "content": [ + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-30T15:27:23.563785123" } } \ No newline at end of file diff --git a/tests/bclconvert.nf.test b/tests/bclconvert.nf.test index efdc72c1..0400e16e 100644 --- a/tests/bclconvert.nf.test +++ b/tests/bclconvert.nf.test @@ -30,7 +30,10 @@ nextflow_pipeline { stable_name, // All files with stable contents stable_path - ).match() } + ).match() }, + { assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/atacseq_samplesheet.csv")).match("atacseq_samplesheet") }, + { assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/rnaseq_samplesheet.csv")).match("rnaseq_samplesheet") }, + { assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/taxprofiler_samplesheet.csv")).match("taxprofiler_samplesheet") } ) } } diff --git a/tests/bclconvert.nf.test.snap b/tests/bclconvert.nf.test.snap index 1339d5e3..bca8dab2 100644 --- a/tests/bclconvert.nf.test.snap +++ b/tests/bclconvert.nf.test.snap @@ -1,4 +1,14 @@ { + "rnaseq_samplesheet": { + "content": [ + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-30T15:28:48.222041833" + }, "BCL-CONVERT": { "content": [ 8, @@ -53,8 +63,6 @@ "220422_M11111_0222_000000000-K9H97/L001/Sample1_S1_L001_report.html", "220422_M11111_0222_000000000-K9H97/L001/Sample1_S1_L001_summary.txt", "220422_M11111_0222_000000000-K9H97/L001/Undetermined_S0_L001_R1_001.fastq.gz", - "fastq", - "fastq/Sample1_S1_L001.samplesheet.csv", "multiqc", "multiqc/multiqc_data", "multiqc/multiqc_data/bclconvert-lane-stats-table.txt", @@ -237,6 +245,26 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-10-17T23:31:58.719385372" + "timestamp": "2024-10-30T15:28:38.170386268" + }, + "atacseq_samplesheet": { + "content": [ + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-30T15:28:43.215827644" + }, + "taxprofiler_samplesheet": { + "content": [ + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-30T15:28:53.253703589" } } \ No newline at end of file diff --git a/tests/bclconvert_mini.nf.test b/tests/bclconvert_mini.nf.test index 19f24167..076d6a54 100644 --- a/tests/bclconvert_mini.nf.test +++ b/tests/bclconvert_mini.nf.test @@ -30,7 +30,10 @@ nextflow_pipeline { stable_name, // All files with stable contents stable_path - ).match() } + ).match() }, + { assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/atacseq_samplesheet.csv")).match("atacseq_samplesheet") }, + { assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/rnaseq_samplesheet.csv")).match("rnaseq_samplesheet") }, + { assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/taxprofiler_samplesheet.csv")).match("taxprofiler_samplesheet") } ) } } diff --git a/tests/bclconvert_mini.nf.test.snap b/tests/bclconvert_mini.nf.test.snap index 8bdd4e5c..00e80ff4 100644 --- a/tests/bclconvert_mini.nf.test.snap +++ b/tests/bclconvert_mini.nf.test.snap @@ -1,7 +1,17 @@ { + "rnaseq_samplesheet": { + "content": [ + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-30T15:33:18.621857802" + }, "BCL-CONVERT-mini": { "content": [ - 38, + 23, { "BCLCONVERT": { "bclconvert": "4.3.6" @@ -20,13 +30,6 @@ } }, [ - "fastq", - "fastq/HBRR1_S1_L001.samplesheet.csv", - "fastq/HBRR2_S2_L001.samplesheet.csv", - "fastq/HBRR3_S3_L001.samplesheet.csv", - "fastq/UHRR1_S4_L001.samplesheet.csv", - "fastq/UHRR2_S5_L001.samplesheet.csv", - "fastq/UHRR3_S6_L001.samplesheet.csv", "miniseq_truseq_smrna", "miniseq_truseq_smrna/HBRR1_S1_L001.fastp.fastq.gz", "miniseq_truseq_smrna/HBRR1_S1_L001.fastp.fastq.gz.md5", @@ -350,6 +353,26 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-10-17T23:34:50.670465424" + "timestamp": "2024-10-30T15:30:52.179362614" + }, + "atacseq_samplesheet": { + "content": [ + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-30T15:32:05.328537478" + }, + "taxprofiler_samplesheet": { + "content": [ + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-30T15:34:31.69965379" } } \ No newline at end of file diff --git a/tests/fqtk.nf.test b/tests/fqtk.nf.test index cd1841a1..9f41e885 100644 --- a/tests/fqtk.nf.test +++ b/tests/fqtk.nf.test @@ -30,7 +30,10 @@ nextflow_pipeline { stable_name, // All files with stable contents stable_path - ).match() } + ).match() }, + { assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/atacseq_samplesheet.csv")).match("atacseq_samplesheet") }, + { assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/rnaseq_samplesheet.csv")).match("rnaseq_samplesheet") }, + { assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/taxprofiler_samplesheet.csv")).match("taxprofiler_samplesheet") } ) } } diff --git a/tests/fqtk.nf.test.snap b/tests/fqtk.nf.test.snap index 13c7507d..c4a582aa 100644 --- a/tests/fqtk.nf.test.snap +++ b/tests/fqtk.nf.test.snap @@ -1,7 +1,7 @@ { "FQTK": { "content": [ - 179, + 107, { "CSV2TSV": { "sed": 4.8 @@ -28,32 +28,6 @@ [ "csv2tsv", "csv2tsv/samplesheet.tsv", - "fastq", - "fastq/s1.samplesheet.csv", - "fastq/s10.samplesheet.csv", - "fastq/s11.samplesheet.csv", - "fastq/s12.samplesheet.csv", - "fastq/s13.samplesheet.csv", - "fastq/s14.samplesheet.csv", - "fastq/s15.samplesheet.csv", - "fastq/s16.samplesheet.csv", - "fastq/s17.samplesheet.csv", - "fastq/s18.samplesheet.csv", - "fastq/s19.samplesheet.csv", - "fastq/s2.samplesheet.csv", - "fastq/s20.samplesheet.csv", - "fastq/s21.samplesheet.csv", - "fastq/s22.samplesheet.csv", - "fastq/s23.samplesheet.csv", - "fastq/s24.samplesheet.csv", - "fastq/s3.samplesheet.csv", - "fastq/s4.samplesheet.csv", - "fastq/s5.samplesheet.csv", - "fastq/s6.samplesheet.csv", - "fastq/s7.samplesheet.csv", - "fastq/s8.samplesheet.csv", - "fastq/s9.samplesheet.csv", - "fastq/unmatched.samplesheet.csv", "multiqc", "multiqc/multiqc_data", "multiqc/multiqc_data/fastp-seq-content-gc-plot_Read_1_After_filtering.txt", @@ -832,6 +806,36 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-10-29T17:33:21.811056238" + "timestamp": "2024-10-30T15:37:06.039601101" + }, + "rnaseq_samplesheet": { + "content": [ + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-30T15:37:09.37721135" + }, + "atacseq_samplesheet": { + "content": [ + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-30T15:37:07.737922949" + }, + "taxprofiler_samplesheet": { + "content": [ + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-30T15:37:11.05337654" } } \ No newline at end of file diff --git a/tests/kraken.nf.test b/tests/kraken.nf.test index ec4cead7..763df39b 100644 --- a/tests/kraken.nf.test +++ b/tests/kraken.nf.test @@ -36,7 +36,10 @@ nextflow_pipeline { stable_name, // All files with stable contents stable_path - ).match() } + ).match() }, + { assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/atacseq_samplesheet.csv")).match("atacseq_samplesheet") }, + { assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/rnaseq_samplesheet.csv")).match("rnaseq_samplesheet") }, + { assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/taxprofiler_samplesheet.csv")).match("taxprofiler_samplesheet") } ) } } diff --git a/tests/kraken.nf.test.snap b/tests/kraken.nf.test.snap index 7cdd3b53..f11ad54d 100644 --- a/tests/kraken.nf.test.snap +++ b/tests/kraken.nf.test.snap @@ -1,4 +1,14 @@ { + "rnaseq_samplesheet": { + "content": [ + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-30T15:38:36.921696486" + }, "kraken2": { "content": [ 11, @@ -77,8 +87,6 @@ "220422_M11111_0222_000000000-K9H97/L001/Stats/DemuxSummaryF1L1.txt", "220422_M11111_0222_000000000-K9H97/L001/Stats/FastqSummaryF1L1.txt", "220422_M11111_0222_000000000-K9H97/L001/Stats/Stats.json", - "fastq", - "fastq/Sample1_S1_L001.samplesheet.csv", "multiqc", "multiqc/multiqc_data", "multiqc/multiqc_data/bcl2fastq-lane-stats-table.txt", @@ -259,6 +267,26 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-10-17T23:39:05.363547947" + "timestamp": "2024-10-30T15:38:26.292866996" + }, + "atacseq_samplesheet": { + "content": [ + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-30T15:38:31.625158785" + }, + "taxprofiler_samplesheet": { + "content": [ + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-30T15:38:42.216897057" } } \ No newline at end of file diff --git a/tests/lib/UTILS.groovy b/tests/lib/UTILS.groovy new file mode 100644 index 00000000..2e31d28e --- /dev/null +++ b/tests/lib/UTILS.groovy @@ -0,0 +1,54 @@ +// Function to verify paths in downstream samplesheets + +import java.nio.file.* + + +class UTILS{ + public static boolean validateFastqPaths(Object csvFilePath) { + String path = csvFilePath.toString() // Convert GString or any other type to String + def csvFile = new File(path) + + if (!csvFile.exists()) { + throw new FileNotFoundException("CSV file not found at: $path") + } + + // Define allowed column names + def allowedColumns = ["sample", "fastq_1", "fastq_2", "strandedness", "replicate","fasta"] + + csvFile.withReader { reader -> + // Read the header and trim quotes + def header = reader.readLine().split(",").collect { it.replaceAll('"', '').trim() } + def fastq1Index = header.indexOf("fastq_1") + def fastq2Index = header.indexOf("fastq_2") + + // Check for the presence of 'fastq_1' column + if (fastq1Index == -1) { + throw new IllegalArgumentException("CSV file '$csvFile.name' does not contain a 'fastq_1' column.") + } + + // Check for valid column names + header.each { column -> + if (!allowedColumns.contains(column)) { + throw new IllegalArgumentException("Invalid column name: '$column' in downstream samplesheet file '$csvFile.name'. Allowed columns are: ${allowedColumns.join(', ')}.") + } + } + + reader.eachLine { line -> + def columns = line.split(",").collect { it.replaceAll('"', '').trim() } + def fastq1Path = columns[fastq1Index] + def fastq2Path = fastq2Index != -1 ? columns[fastq2Index] : null // Use null if fastq_2 is not present + + // Check if fastq_1 path is valid + if (!Files.exists(Paths.get(fastq1Path))) { + throw new FileNotFoundException("Incorrect R1 fastq file path: '$fastq1Path' in downstream samplesheet file '$csvFile.name'") // Raise error for fastq_1 + } + + // Check if fastq_2 path is valid if it exists + if (fastq2Path && !Files.exists(Paths.get(fastq2Path))) { + throw new FileNotFoundException("Incorrect R2 fastq file path: '$fastq2Path' in downstream samplesheet file '$csvFile.name'") // Raise error for fastq_2 + } + } + } + return true // All paths are valid if we reach this point + } +} diff --git a/tests/mkfastq.nf.test b/tests/mkfastq.nf.test index a9378098..9e94f851 100644 --- a/tests/mkfastq.nf.test +++ b/tests/mkfastq.nf.test @@ -30,7 +30,10 @@ nextflow_pipeline { stable_name, // All files with stable contents stable_path - ).match() } + ).match() }, + { assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/atacseq_samplesheet.csv")).match("atacseq_samplesheet") }, + { assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/rnaseq_samplesheet.csv")).match("rnaseq_samplesheet") }, + { assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/taxprofiler_samplesheet.csv")).match("taxprofiler_samplesheet") } ) } } diff --git a/tests/mkfastq.nf.test.snap b/tests/mkfastq.nf.test.snap index 155a792f..0314b3dc 100644 --- a/tests/mkfastq.nf.test.snap +++ b/tests/mkfastq.nf.test.snap @@ -1,7 +1,7 @@ { "MKFASTQ": { "content": [ - 14, + 11, { "CELLRANGER_MKFASTQ": { "cellranger": "8.0.0" @@ -83,9 +83,6 @@ "cellranger-tiny-bcl-simple/L001/undetermined/Undetermined_S0_L001_I1_001.fastq.gz", "cellranger-tiny-bcl-simple/L001/undetermined/Undetermined_S0_L001_R1_001.fastq.gz", "cellranger-tiny-bcl-simple/L001/undetermined/Undetermined_S0_L001_R2_001.fastq.gz", - "fastq", - "fastq/test_sample_S1_L001.samplesheet.csv", - "fastq/test_sample_S1_L001_I1_001.samplesheet.csv", "multiqc", "multiqc/multiqc_data", "multiqc/multiqc_data/fastp-seq-content-gc-plot_Read_1_After_filtering.txt", @@ -204,6 +201,36 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-10-17T23:40:20.21203835" + "timestamp": "2024-10-30T15:39:40.224337223" + }, + "rnaseq_samplesheet": { + "content": [ + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-30T15:40:09.552438955" + }, + "atacseq_samplesheet": { + "content": [ + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-30T15:39:54.928437506" + }, + "taxprofiler_samplesheet": { + "content": [ + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-30T15:40:24.166541035" } } \ No newline at end of file diff --git a/tests/sgdemux.nf.test b/tests/sgdemux.nf.test index 7fa83e77..fdb03a0c 100644 --- a/tests/sgdemux.nf.test +++ b/tests/sgdemux.nf.test @@ -30,7 +30,10 @@ nextflow_pipeline { stable_name, // All files with stable contents stable_path - ).match() } + ).match() }, + { assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/atacseq_samplesheet.csv")).match("atacseq_samplesheet") }, + { assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/rnaseq_samplesheet.csv")).match("rnaseq_samplesheet") }, + { assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/taxprofiler_samplesheet.csv")).match("taxprofiler_samplesheet") } ) } } diff --git a/tests/sgdemux.nf.test.snap b/tests/sgdemux.nf.test.snap index 941e5242..48fe96c9 100644 --- a/tests/sgdemux.nf.test.snap +++ b/tests/sgdemux.nf.test.snap @@ -1,7 +1,17 @@ { + "rnaseq_samplesheet": { + "content": [ + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-30T15:42:08.871060618" + }, "SGDEMUX": { "content": [ - 178, + 106, { "FALCO": { "falco": "1.2.1" @@ -23,32 +33,6 @@ } }, [ - "fastq", - "fastq/Undetermined_S25_L001.samplesheet.csv", - "fastq/s10_S10_L001.samplesheet.csv", - "fastq/s11_S11_L001.samplesheet.csv", - "fastq/s12_S12_L001.samplesheet.csv", - "fastq/s13_S13_L001.samplesheet.csv", - "fastq/s14_S14_L001.samplesheet.csv", - "fastq/s15_S15_L001.samplesheet.csv", - "fastq/s16_S16_L001.samplesheet.csv", - "fastq/s17_S17_L001.samplesheet.csv", - "fastq/s18_S18_L001.samplesheet.csv", - "fastq/s19_S19_L001.samplesheet.csv", - "fastq/s1_S1_L001.samplesheet.csv", - "fastq/s20_S20_L001.samplesheet.csv", - "fastq/s21_S21_L001.samplesheet.csv", - "fastq/s22_S22_L001.samplesheet.csv", - "fastq/s23_S23_L001.samplesheet.csv", - "fastq/s24_S24_L001.samplesheet.csv", - "fastq/s2_S2_L001.samplesheet.csv", - "fastq/s3_S3_L001.samplesheet.csv", - "fastq/s4_S4_L001.samplesheet.csv", - "fastq/s5_S5_L001.samplesheet.csv", - "fastq/s6_S6_L001.samplesheet.csv", - "fastq/s7_S7_L001.samplesheet.csv", - "fastq/s8_S8_L001.samplesheet.csv", - "fastq/s9_S9_L001.samplesheet.csv", "multiqc", "multiqc/multiqc_data", "multiqc/multiqc_data/fastp-seq-content-gc-plot_Read_1_After_filtering.txt", @@ -632,6 +616,26 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-10-29T17:49:16.207647882" + "timestamp": "2024-10-30T15:42:08.793174743" + }, + "atacseq_samplesheet": { + "content": [ + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-30T15:42:08.83444814" + }, + "taxprofiler_samplesheet": { + "content": [ + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-30T15:42:08.908513986" } } \ No newline at end of file diff --git a/tests/skip_tools.nf.test b/tests/skip_tools.nf.test index 938abc67..61cb4f9d 100644 --- a/tests/skip_tools.nf.test +++ b/tests/skip_tools.nf.test @@ -34,7 +34,10 @@ nextflow_pipeline { stable_name, // All files with stable contents stable_path - ).match() } + ).match() }, + { assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/atacseq_samplesheet.csv")).match("atacseq_samplesheet") }, + { assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/rnaseq_samplesheet.csv")).match("rnaseq_samplesheet") }, + { assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/taxprofiler_samplesheet.csv")).match("taxprofiler_samplesheet") } ) } } diff --git a/tests/skip_tools.nf.test.snap b/tests/skip_tools.nf.test.snap index 4a414e55..134ac029 100644 --- a/tests/skip_tools.nf.test.snap +++ b/tests/skip_tools.nf.test.snap @@ -47,8 +47,6 @@ "220422_M11111_0222_000000000-K9H97/L001/Sample1_S1_L001_report.html", "220422_M11111_0222_000000000-K9H97/L001/Sample1_S1_L001_summary.txt", "220422_M11111_0222_000000000-K9H97/L001/Undetermined_S0_L001_R1_001.fastq.gz", - "fastq", - "fastq/Sample1_S1_L001.samplesheet.csv", "multiqc", "multiqc/multiqc_data", "multiqc/multiqc_data/bclconvert-lane-stats-table.txt", @@ -190,7 +188,17 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-10-18T00:25:42.942384924" + "timestamp": "2024-10-30T16:00:27.087776653" + }, + "rnaseq_samplesheet": { + "content": [ + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-30T15:59:48.514552807" }, "Skip Trimming": { "content": [ @@ -245,8 +253,6 @@ "220422_M11111_0222_000000000-K9H97/L001/Sample1_S1_L001_report.html", "220422_M11111_0222_000000000-K9H97/L001/Sample1_S1_L001_summary.txt", "220422_M11111_0222_000000000-K9H97/L001/Undetermined_S0_L001_R1_001.fastq.gz", - "fastq", - "fastq/Sample1_S1_L001.samplesheet.csv", "multiqc", "multiqc/multiqc_data", "multiqc/multiqc_data/bclconvert-lane-stats-table.txt", @@ -428,7 +434,7 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-10-18T00:24:59.739085162" + "timestamp": "2024-10-30T15:59:43.275844831" }, "Skip MultiQC": { "content": [ @@ -484,8 +490,6 @@ "220422_M11111_0222_000000000-K9H97/L001/Sample1_S1_L001_report.html", "220422_M11111_0222_000000000-K9H97/L001/Sample1_S1_L001_summary.txt", "220422_M11111_0222_000000000-K9H97/L001/Undetermined_S0_L001_R1_001.fastq.gz", - "fastq", - "fastq/Sample1_S1_L001.samplesheet.csv", "pipeline_info", "pipeline_info/nf_core_pipeline_software_mqc_versions.yml", "samplesheet", @@ -527,7 +531,17 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-10-18T00:29:21.376431772" + "timestamp": "2024-10-30T16:03:37.651038824" + }, + "atacseq_samplesheet": { + "content": [ + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-30T15:59:45.903610848" }, "Skip Fastp & Fastqc": { "content": [ @@ -577,8 +591,6 @@ "220422_M11111_0222_000000000-K9H97/L001/Sample1_S1_L001_report.html", "220422_M11111_0222_000000000-K9H97/L001/Sample1_S1_L001_summary.txt", "220422_M11111_0222_000000000-K9H97/L001/Undetermined_S0_L001_R1_001.fastq.gz", - "fastq", - "fastq/Sample1_S1_L001.samplesheet.csv", "multiqc", "multiqc/multiqc_data", "multiqc/multiqc_data/bclconvert-lane-stats-table.txt", @@ -720,7 +732,7 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-10-18T00:28:05.245862992" + "timestamp": "2024-10-30T16:02:29.281437154" }, "Skip Fastqc": { "content": [ @@ -776,8 +788,6 @@ "220422_M11111_0222_000000000-K9H97/L001/Sample1_S1_L001_report.html", "220422_M11111_0222_000000000-K9H97/L001/Sample1_S1_L001_summary.txt", "220422_M11111_0222_000000000-K9H97/L001/Undetermined_S0_L001_R1_001.fastq.gz", - "fastq", - "fastq/Sample1_S1_L001.samplesheet.csv", "multiqc", "multiqc/multiqc_data", "multiqc/multiqc_data/bclconvert-lane-stats-table.txt", @@ -960,6 +970,16 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-10-18T00:27:11.29098668" + "timestamp": "2024-10-30T16:01:45.089730924" + }, + "taxprofiler_samplesheet": { + "content": [ + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-30T15:59:51.098330417" } } \ No newline at end of file diff --git a/tests/test_pe.nf.test b/tests/test_pe.nf.test index 9382ea56..48c9d4d3 100644 --- a/tests/test_pe.nf.test +++ b/tests/test_pe.nf.test @@ -30,7 +30,10 @@ nextflow_pipeline { stable_name, // All files with stable contents stable_path - ).match() } + ).match() }, + { assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/atacseq_samplesheet.csv")).match("atacseq_samplesheet") }, + { assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/rnaseq_samplesheet.csv")).match("rnaseq_samplesheet") }, + { assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/taxprofiler_samplesheet.csv")).match("taxprofiler_samplesheet") } ) } } diff --git a/tests/test_pe.nf.test.snap b/tests/test_pe.nf.test.snap index eaebb98b..f72e90a4 100644 --- a/tests/test_pe.nf.test.snap +++ b/tests/test_pe.nf.test.snap @@ -1,4 +1,24 @@ { + "rnaseq_samplesheet": { + "content": [ + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-30T15:49:50.982685279" + }, + "atacseq_samplesheet": { + "content": [ + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-30T15:49:40.878739318" + }, "Bcl2Fastq_pe": { "content": [ 9, @@ -71,8 +91,6 @@ "PE_Sample/L001/Stats/DemuxSummaryF1L1.txt", "PE_Sample/L001/Stats/FastqSummaryF1L1.txt", "PE_Sample/L001/Stats/Stats.json", - "fastq", - "fastq/Sample1_S1_L001.samplesheet.csv", "multiqc", "multiqc/multiqc_data", "multiqc/multiqc_data/bcl2fastq-lane-stats-table.txt", @@ -324,6 +342,16 @@ "nf-test": "0.9.0", "nextflow": "24.04.4" }, - "timestamp": "2024-10-18T00:32:00.49132579" + "timestamp": "2024-10-30T15:49:30.696307692" + }, + "taxprofiler_samplesheet": { + "content": [ + true + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-10-30T15:50:01.08084426" } } \ No newline at end of file diff --git a/workflows/demultiplex.nf b/workflows/demultiplex.nf index 3f4867da..0631ba67 100644 --- a/workflows/demultiplex.nf +++ b/workflows/demultiplex.nf @@ -282,32 +282,13 @@ workflow DEMULTIPLEX { // Module: FASTQ to samplesheet ch_meta_fastq_rnaseq = ch_meta_fastq - FASTQ_TO_SAMPLESHEET_RNASEQ(ch_meta_fastq_rnaseq, "rnaseq", strandedness) - FASTQ_TO_SAMPLESHEET_RNASEQ.out.samplesheet - .map { it[1] } - .collectFile(name:'tmp_rnaseq_samplesheet.csv', newLine: true, keepHeader: true, sort: { it.baseName }) - .map { it.text.tokenize('\n').join('\n') } - .collectFile(name:'rnaseq_samplesheet.csv', storeDir: "${params.outdir}/samplesheet") - .set { ch_samplesheet } + FASTQ_TO_SAMPLESHEET_RNASEQ(ch_meta_fastq_rnaseq.collect(), "rnaseq", strandedness) ch_meta_fastq_atacseq = ch_meta_fastq - FASTQ_TO_SAMPLESHEET_ATACSEQ(ch_meta_fastq_atacseq, "atacseq", strandedness) - FASTQ_TO_SAMPLESHEET_ATACSEQ.out.samplesheet - .map { it[1] } - .collectFile(name:'tmp_atac_seq_samplesheet.csv', newLine: true, keepHeader: true, sort: { it.baseName }) - .map { it.text.tokenize('\n').join('\n') } - .collectFile(name:'atacseq_samplesheet.csv', storeDir: "${params.outdir}/samplesheet") - .set { ch_samplesheet } + FASTQ_TO_SAMPLESHEET_ATACSEQ(ch_meta_fastq_atacseq.collect(), "atacseq", strandedness) ch_meta_fastq_taxprofiler = ch_meta_fastq - FASTQ_TO_SAMPLESHEET_TAXPROFILER(ch_meta_fastq_taxprofiler, "taxprofiler", strandedness) - FASTQ_TO_SAMPLESHEET_TAXPROFILER.out.samplesheet - .map { it[1] } - .collectFile(name:'tmp_taxprofiler_samplesheet.csv', newLine: true, keepHeader: true, sort: { it.baseName }) - .map { it.text.tokenize('\n').join('\n') } - .collectFile(name:'taxprofiler_samplesheet.csv', storeDir: "${params.outdir}/samplesheet") - .set { ch_samplesheet } - + FASTQ_TO_SAMPLESHEET_TAXPROFILER(ch_meta_fastq_taxprofiler.collect(), "taxprofiler", strandedness) // // Collate and save software versions //