Fixes in samplesheet generator script and snap updates

nf-core · Oct 30, 2024 · c53abee · c53abee
1 parent 833edde
commit c53abee
Show file tree

Hide file tree

Showing 24 changed files with 477 additions and 157 deletions.
diff --git a/conf/modules.config b/conf/modules.config
@@ -215,6 +215,7 @@ process {
             ]
         ]
     }
+
     withName: 'MULTIQC' {
         cache = false
         ext.args   = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' }
@@ -231,6 +232,7 @@ process {
             mode: params.publish_dir_mode
         ]
     }
+
     withName: SAMSHEE {
         ext.args = [
             params.json_schema_validator ? "--schema '${params.json_schema_validator}'" : "",
@@ -244,5 +246,12 @@ process {
             saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
         ]
     }
+
+    withName: 'FASTQ_TO_SAMPLESHEET*' {
+        publishDir = [
+            path: { "${params.outdir}/samplesheet/" },
+            mode: params.publish_dir_mode,
+        ]
+    }
 }
 
diff --git a/modules/local/fastq_to_samplesheet/main.nf b/modules/local/fastq_to_samplesheet/main.nf
@@ -5,47 +5,75 @@ process FASTQ_TO_SAMPLESHEET {
     memory 100.MB
 
     input:
-    val meta
+    val meta // Expecting a list of items
     val pipeline
     val strandedness
 
     output:
     tuple val(meta_clone), path("*samplesheet.csv"), emit: samplesheet
 
     exec:
+    // Initialize the samplesheet content
+    def samplesheetHeader = []
+    def samplesheetRows = []
 
-    // Calculate the dynamic output directory based on meta.lane
-    def outputDir = meta.publish_dir
+    // Sort meta by item.id
+    def sortedMeta = meta.sort { it.id }
 
-    // Add relevant fields to the map
-    def pipeline_map = [
-        sample  : meta.samplename,
-        fastq_1 : outputDir + '/' + file(meta.fastq_1).fileName
-    ]
+    // Collect all unique columns from all items and create rows
+    def allColumns = new LinkedHashSet()
 
-    // Add fastq_2 if it's a paired-end sample
-    if (!meta.single_end) {
-        pipeline_map.fastq_2 = outputDir + '/' + file(meta.fastq_2).fileName
-    }
+    sortedMeta.each { item ->
+        // Check for required keys in each item
+        if (!item.samplename) {
+            error "Item with id ${item.id} is missing the 'samplename' key."
+        }
+        if (!item.fastq_1) {
+            error "Item with id ${item.id} is missing the 'fastq_1' key."
+        }
+
+        def pipeline_map = [:] // Initialize as an empty map
+
+        // Prepare sample information
+        pipeline_map.sample = item.samplename
+        pipeline_map.fastq_1 = item.publish_dir + '/' + file(item.fastq_1).fileName
+
+        // Add fastq_2 if it's a paired-end sample
+        if (!item.single_end && item.fastq_2) {
+            pipeline_map.fastq_2 = item.publish_dir + '/' + file(item.fastq_2).fileName ?: ''
+        }
 
-    // Add pipeline-specific entries
-    if (pipeline == 'rnaseq') {
-        pipeline_map << [ strandedness: strandedness ]
-    } else if (pipeline == 'atacseq') {
-        pipeline_map << [ replicate: 1 ]
-    } else if (pipeline == 'taxprofiler') {
-        pipeline_map << [ fasta: '' ]
+        // Add pipeline-specific entries
+        if (pipeline == 'rnaseq') {
+            pipeline_map.strandedness = strandedness ?: ''
+        } else if (pipeline == 'atacseq') {
+            pipeline_map.replicate = 1
+        } else if (pipeline == 'taxprofiler') {
+            pipeline_map.fasta = ''
+        }
+
+        // Add all keys to the set of unique columns
+        allColumns.addAll(pipeline_map.keySet())
+
+        // Prepare a row for the samplesheet, filling in missing values with empty strings
+        def rowValues = allColumns.collect { key ->
+            pipeline_map.containsKey(key) ? '"' + pipeline_map[key] + '"' : '""'
+        }
+        samplesheetRows << rowValues.join(",")
     }
 
-    // Create the samplesheet content
-    def samplesheet = pipeline_map.keySet().collect { '"' + it + '"' }.join(",") + '\n'
-    samplesheet += pipeline_map.values().collect { '"' + it + '"' }.join(",")
+    // Create a sorted list of headers
+    samplesheetHeader = allColumns.collect { '"' + it + '"' }
+
+    // Create the complete samplesheet content
+    def samplesheet = samplesheetHeader.join(",") + '\n' + samplesheetRows.join("\n")
 
     // Write samplesheet to file
-    def samplesheet_file = task.workDir.resolve("${meta.id}.samplesheet.csv")
+    def samplesheet_file = task.workDir.resolve("${pipeline}_samplesheet.csv")
     samplesheet_file.text = samplesheet
 
-    meta_clone = meta.clone()
-    meta_clone.remove('publishdir')
+    // Clone the first item in meta for output
+    meta_clone = meta.first().clone()
+    meta_clone.remove('publish_dir') // Removing the publish_dir just in case, although output channel is not used by other process
 
 }
diff --git a/tests/bases2fastq.nf.test b/tests/bases2fastq.nf.test
@@ -30,7 +30,10 @@ nextflow_pipeline {
                     stable_name,
                     // All files with stable contents
                     stable_path
-                ).match() }
+                ).match() },
+                { assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/atacseq_samplesheet.csv")).match("atacseq_samplesheet") },
+                { assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/rnaseq_samplesheet.csv")).match("rnaseq_samplesheet") },
+                { assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/taxprofiler_samplesheet.csv")).match("taxprofiler_samplesheet") }
             )
         }
     }

diff --git a/tests/bases2fastq.nf.test.snap b/tests/bases2fastq.nf.test.snap
@@ -23,8 +23,6 @@
                 }
             },
             [
-                "fastq",
-                "fastq/DefaultSample.samplesheet.csv",
                 "multiqc",
                 "multiqc/multiqc_data",
                 "multiqc/multiqc_data/fastp-insert-size-plot.txt",
@@ -211,6 +209,36 @@
             "nf-test": "0.9.0",
             "nextflow": "24.04.4"
         },
-        "timestamp": "2024-10-29T17:48:00.306378134"
+        "timestamp": "2024-10-30T15:25:52.803280321"
+    },
+    "rnaseq_samplesheet": {
+        "content": [
+            true
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.4"
+        },
+        "timestamp": "2024-10-30T15:25:55.693738747"
+    },
+    "atacseq_samplesheet": {
+        "content": [
+            true
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.4"
+        },
+        "timestamp": "2024-10-30T15:25:54.286158931"
+    },
+    "taxprofiler_samplesheet": {
+        "content": [
+            true
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.4"
+        },
+        "timestamp": "2024-10-30T15:25:57.12531458"
     }
 }
diff --git a/tests/bcl2fastq.nf.test b/tests/bcl2fastq.nf.test
@@ -30,7 +30,10 @@ nextflow_pipeline {
                     stable_name,
                     // All files with stable contents
                     stable_path
-                ).match() }
+                ).match() },
+                { assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/atacseq_samplesheet.csv")).match("atacseq_samplesheet") },
+                { assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/rnaseq_samplesheet.csv")).match("rnaseq_samplesheet") },
+                { assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/taxprofiler_samplesheet.csv")).match("taxprofiler_samplesheet") }
             )
         }
     }

diff --git a/tests/bcl2fastq.nf.test.snap b/tests/bcl2fastq.nf.test.snap
@@ -1,4 +1,24 @@
 {
+    "rnaseq_samplesheet": {
+        "content": [
+            true
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.4"
+        },
+        "timestamp": "2024-10-30T15:27:18.557192574"
+    },
+    "atacseq_samplesheet": {
+        "content": [
+            true
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.4"
+        },
+        "timestamp": "2024-10-30T15:27:13.631683141"
+    },
     "Bcl2Fastq": {
         "content": [
             8,
@@ -71,8 +91,6 @@
                 "220422_M11111_0222_000000000-K9H97/L001/Stats/DemuxSummaryF1L1.txt",
                 "220422_M11111_0222_000000000-K9H97/L001/Stats/FastqSummaryF1L1.txt",
                 "220422_M11111_0222_000000000-K9H97/L001/Stats/Stats.json",
-                "fastq",
-                "fastq/Sample1_S1_L001.samplesheet.csv",
                 "multiqc",
                 "multiqc/multiqc_data",
                 "multiqc/multiqc_data/bcl2fastq-lane-stats-table.txt",
@@ -241,6 +259,16 @@
             "nf-test": "0.9.0",
             "nextflow": "24.04.4"
         },
-        "timestamp": "2024-10-17T23:30:33.296858681"
+        "timestamp": "2024-10-30T15:27:08.66031858"
+    },
+    "taxprofiler_samplesheet": {
+        "content": [
+            true
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.4"
+        },
+        "timestamp": "2024-10-30T15:27:23.563785123"
     }
 }
diff --git a/tests/bclconvert.nf.test b/tests/bclconvert.nf.test
@@ -30,7 +30,10 @@ nextflow_pipeline {
                     stable_name,
                     // All files with stable contents
                     stable_path
-                ).match() }
+                ).match() },
+                { assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/atacseq_samplesheet.csv")).match("atacseq_samplesheet") },
+                { assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/rnaseq_samplesheet.csv")).match("rnaseq_samplesheet") },
+                { assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/taxprofiler_samplesheet.csv")).match("taxprofiler_samplesheet") }
             )
         }
     }

diff --git a/tests/bclconvert.nf.test.snap b/tests/bclconvert.nf.test.snap
@@ -1,4 +1,14 @@
 {
+    "rnaseq_samplesheet": {
+        "content": [
+            true
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.4"
+        },
+        "timestamp": "2024-10-30T15:28:48.222041833"
+    },
     "BCL-CONVERT": {
         "content": [
             8,
@@ -53,8 +63,6 @@
                 "220422_M11111_0222_000000000-K9H97/L001/Sample1_S1_L001_report.html",
                 "220422_M11111_0222_000000000-K9H97/L001/Sample1_S1_L001_summary.txt",
                 "220422_M11111_0222_000000000-K9H97/L001/Undetermined_S0_L001_R1_001.fastq.gz",
-                "fastq",
-                "fastq/Sample1_S1_L001.samplesheet.csv",
                 "multiqc",
                 "multiqc/multiqc_data",
                 "multiqc/multiqc_data/bclconvert-lane-stats-table.txt",
@@ -237,6 +245,26 @@
             "nf-test": "0.9.0",
             "nextflow": "24.04.4"
         },
-        "timestamp": "2024-10-17T23:31:58.719385372"
+        "timestamp": "2024-10-30T15:28:38.170386268"
+    },
+    "atacseq_samplesheet": {
+        "content": [
+            true
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.4"
+        },
+        "timestamp": "2024-10-30T15:28:43.215827644"
+    },
+    "taxprofiler_samplesheet": {
+        "content": [
+            true
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.4"
+        },
+        "timestamp": "2024-10-30T15:28:53.253703589"
     }
 }
diff --git a/tests/bclconvert_mini.nf.test b/tests/bclconvert_mini.nf.test
@@ -30,7 +30,10 @@ nextflow_pipeline {
                     stable_name,
                     // All files with stable contents
                     stable_path
-                ).match() }
+                ).match() },
+                { assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/atacseq_samplesheet.csv")).match("atacseq_samplesheet") },
+                { assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/rnaseq_samplesheet.csv")).match("rnaseq_samplesheet") },
+                { assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/taxprofiler_samplesheet.csv")).match("taxprofiler_samplesheet") }
             )
         }
     }

diff --git a/tests/bclconvert_mini.nf.test.snap b/tests/bclconvert_mini.nf.test.snap
@@ -1,7 +1,17 @@
 {
+    "rnaseq_samplesheet": {
+        "content": [
+            true
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.4"
+        },
+        "timestamp": "2024-10-30T15:33:18.621857802"
+    },
     "BCL-CONVERT-mini": {
         "content": [
-            38,
+            23,
             {
                 "BCLCONVERT": {
                     "bclconvert": "4.3.6"
@@ -20,13 +30,6 @@
                 }
             },
             [
-                "fastq",
-                "fastq/HBRR1_S1_L001.samplesheet.csv",
-                "fastq/HBRR2_S2_L001.samplesheet.csv",
-                "fastq/HBRR3_S3_L001.samplesheet.csv",
-                "fastq/UHRR1_S4_L001.samplesheet.csv",
-                "fastq/UHRR2_S5_L001.samplesheet.csv",
-                "fastq/UHRR3_S6_L001.samplesheet.csv",
                 "miniseq_truseq_smrna",
                 "miniseq_truseq_smrna/HBRR1_S1_L001.fastp.fastq.gz",
                 "miniseq_truseq_smrna/HBRR1_S1_L001.fastp.fastq.gz.md5",
@@ -350,6 +353,26 @@
             "nf-test": "0.9.0",
             "nextflow": "24.04.4"
         },
-        "timestamp": "2024-10-17T23:34:50.670465424"
+        "timestamp": "2024-10-30T15:30:52.179362614"
+    },
+    "atacseq_samplesheet": {
+        "content": [
+            true
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.4"
+        },
+        "timestamp": "2024-10-30T15:32:05.328537478"
+    },
+    "taxprofiler_samplesheet": {
+        "content": [
+            true
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.4"
+        },
+        "timestamp": "2024-10-30T15:34:31.69965379"
     }
 }