Merge pull request #282 from nf-core/issue_272

Issue 272
nf-core · Oct 31, 2024 · 74b1869 · 74b1869
2 parents 9e11c07 + 1df1a08
commit 74b1869
Show file tree

Hide file tree

Showing 26 changed files with 643 additions and 171 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### `Fixed`
 
 - [#277](https://github.com/nf-core/demultiplex/pull/277) Improved samplesheet generation to always produce all types of samplesheets, added the ability to explicitly set strandedness, and fixed output paths to correctly reflect the `publishDir` subdirectory structure.
+- [#282](https://github.com/nf-core/demultiplex/pull/282) Fixed downstream samplesheet paths and `publishDir` config.
 
 ## 1.5.2 - 2024-10-07
 

diff --git a/conf/modules.config b/conf/modules.config
@@ -97,7 +97,7 @@ process {
                 path: { "${params.outdir}/${meta.id}/" },
                 mode: params.publish_dir_mode,
                 pattern: "output/Samples/**/*.{json,fastq.gz}",
-                saveAs: { filename -> filename.minus("output/") }
+                saveAs: { filename -> filename.split("/")[-1] }
             ],
             [
                 path: { "${params.outdir}/${meta.id}/" },
@@ -163,7 +163,7 @@ process {
             [
                 path: { meta.lane ? "${params.outdir}/${meta.id}/L00${meta.lane}" : "${params.outdir}/${meta.id}" },
                 mode: params.publish_dir_mode,
-                pattern: "output/*.{tsv, fastq.gz}",
+                pattern: "output/*.{tsv,fastq.gz}",
                 saveAs: { filename -> filename.minus("output/") }
             ],
         ]
@@ -174,7 +174,7 @@ process {
             [
                 path: { "${params.outdir}/${meta.id}/" },
                 mode: params.publish_dir_mode,
-                pattern: "output/*.{txt, fq.gz}",
+                pattern: "output/*.{txt,fq.gz}",
                 saveAs: { filename -> filename.minus("output/") }
             ],
         ]
@@ -215,6 +215,7 @@ process {
             ]
         ]
     }
+
     withName: 'MULTIQC' {
         cache = false
         ext.args   = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' }
@@ -231,6 +232,7 @@ process {
             mode: params.publish_dir_mode
         ]
     }
+
     withName: SAMSHEE {
         ext.args = [
             params.json_schema_validator ? "--schema '${params.json_schema_validator}'" : "",
@@ -244,5 +246,12 @@ process {
             saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
         ]
     }
+
+    withName: 'FASTQ_TO_SAMPLESHEET*' {
+        publishDir = [
+            path: { "${params.outdir}/samplesheet/" },
+            mode: params.publish_dir_mode,
+        ]
+    }
 }
 
diff --git a/modules/local/fastq_to_samplesheet/main.nf b/modules/local/fastq_to_samplesheet/main.nf
@@ -5,44 +5,75 @@ process FASTQ_TO_SAMPLESHEET {
     memory 100.MB
 
     input:
-    val meta
+    val meta // Expecting a list of items
     val pipeline
     val strandedness
 
     output:
-    tuple val(meta), path("*samplesheet.csv"), emit: samplesheet
+    tuple val(meta_clone), path("*samplesheet.csv"), emit: samplesheet
 
     exec:
+    // Initialize the samplesheet content
+    def samplesheetHeader = []
+    def samplesheetRows = []
 
-    // Calculate the dynamic output directory based on meta.lane
-    def outputDir = meta.lane ? "${params.outdir}/${meta.id}/L00${meta.lane}" : "${params.outdir}/${meta.id}"
+    // Sort meta by item.id
+    def sortedMeta = meta.sort { it.id }
 
-    // Add relevant fields to the map
-    def pipeline_map = [
-        sample  : meta.samplename,
-        fastq_1 : outputDir + '/' + file(meta.fastq_1).fileName
-    ]
+    // Collect all unique columns from all items and create rows
+    def allColumns = new LinkedHashSet()
 
-    // Add fastq_2 if it's a paired-end sample
-    if (!meta.single_end) {
-        pipeline_map.fastq_2 = outputDir + '/' + file(meta.fastq_2).fileName
-    }
+    sortedMeta.each { item ->
+        // Check for required keys in each item
+        if (!item.samplename) {
+            error "Item with id ${item.id} is missing the 'samplename' key."
+        }
+        if (!item.fastq_1) {
+            error "Item with id ${item.id} is missing the 'fastq_1' key."
+        }
+
+        def pipeline_map = [:] // Initialize as an empty map
+
+        // Prepare sample information
+        pipeline_map.sample = item.samplename
+        pipeline_map.fastq_1 = item.publish_dir + '/' + file(item.fastq_1).fileName
+
+        // Add fastq_2 if it's a paired-end sample
+        if (!item.single_end && item.fastq_2) {
+            pipeline_map.fastq_2 = item.publish_dir + '/' + file(item.fastq_2).fileName ?: ''
+        }
 
-    // Add pipeline-specific entries
-    if (pipeline == 'rnaseq') {
-        pipeline_map << [ strandedness: strandedness ]
-    } else if (pipeline == 'atacseq') {
-        pipeline_map << [ replicate: 1 ]
-    } else if (pipeline == 'taxprofiler') {
-        pipeline_map << [ fasta: '' ]
+        // Add pipeline-specific entries
+        if (pipeline == 'rnaseq') {
+            pipeline_map.strandedness = strandedness ?: ''
+        } else if (pipeline == 'atacseq') {
+            pipeline_map.replicate = 1
+        } else if (pipeline == 'taxprofiler') {
+            pipeline_map.fasta = ''
+        }
+
+        // Add all keys to the set of unique columns
+        allColumns.addAll(pipeline_map.keySet())
+
+        // Prepare a row for the samplesheet, filling in missing values with empty strings
+        def rowValues = allColumns.collect { key ->
+            pipeline_map.containsKey(key) ? '"' + pipeline_map[key] + '"' : '""'
+        }
+        samplesheetRows << rowValues.join(",")
     }
 
-    // Create the samplesheet content
-    def samplesheet = pipeline_map.keySet().collect { '"' + it + '"' }.join(",") + '\n'
-    samplesheet += pipeline_map.values().collect { '"' + it + '"' }.join(",")
+    // Create a sorted list of headers
+    samplesheetHeader = allColumns.collect { '"' + it + '"' }
+
+    // Create the complete samplesheet content
+    def samplesheet = samplesheetHeader.join(",") + '\n' + samplesheetRows.join("\n")
 
     // Write samplesheet to file
-    def samplesheet_file = task.workDir.resolve("${meta.id}.samplesheet.csv")
+    def samplesheet_file = task.workDir.resolve("${pipeline}_samplesheet.csv")
     samplesheet_file.text = samplesheet
 
+    // Clone the first item in meta for output
+    meta_clone = meta.first().clone()
+    meta_clone.remove('publish_dir') // Removing the publish_dir just in case, although output channel is not used by other process
+
 }
diff --git a/tests/.nftignore b/tests/.nftignore
@@ -29,6 +29,7 @@ sim-data/*.fastp.fastq.gz.md5
 sim-data/*.fastp.fastq.gz_fastqc_data.txt
 sim-data/*.fastp.fastq.gz_fastqc_report.html
 sim-data/*.fastp.fastq.gz_summary.txt
+sim-data/{Undetermined,s}*L001*.fastq.gz
 sim-data/RunStats.json
-sim-data/Samples/DefaultProject/DefaultSample/DefaultSample_stats.json
+sim-data/DefaultSample_stats.json
 sim-data/most_frequent_unmatched.tsv
diff --git a/tests/bases2fastq.nf.test b/tests/bases2fastq.nf.test
@@ -30,7 +30,10 @@ nextflow_pipeline {
                     stable_name,
                     // All files with stable contents
                     stable_path
-                ).match() }
+                ).match() },
+                { assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/atacseq_samplesheet.csv")).match("atacseq_samplesheet") },
+                { assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/rnaseq_samplesheet.csv")).match("rnaseq_samplesheet") },
+                { assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/taxprofiler_samplesheet.csv")).match("taxprofiler_samplesheet") }
             )
         }
     }

diff --git a/tests/bases2fastq.nf.test.snap b/tests/bases2fastq.nf.test.snap
@@ -23,8 +23,6 @@
                 }
             },
             [
-                "fastq",
-                "fastq/DefaultSample.samplesheet.csv",
                 "multiqc",
                 "multiqc/multiqc_data",
                 "multiqc/multiqc_data/fastp-insert-size-plot.txt",
@@ -166,15 +164,12 @@
                 "sim-data/DefaultSample_2.fastp.fastq.gz_fastqc_data.txt",
                 "sim-data/DefaultSample_2.fastp.fastq.gz_fastqc_report.html",
                 "sim-data/DefaultSample_2.fastp.fastq.gz_summary.txt",
+                "sim-data/DefaultSample_R1.fastq.gz",
+                "sim-data/DefaultSample_R2.fastq.gz",
+                "sim-data/DefaultSample_stats.json",
                 "sim-data/Metrics.csv",
                 "sim-data/RunManifest.json",
                 "sim-data/RunStats.json",
-                "sim-data/Samples",
-                "sim-data/Samples/DefaultProject",
-                "sim-data/Samples/DefaultProject/DefaultSample",
-                "sim-data/Samples/DefaultProject/DefaultSample/DefaultSample_R1.fastq.gz",
-                "sim-data/Samples/DefaultProject/DefaultSample/DefaultSample_R2.fastq.gz",
-                "sim-data/Samples/DefaultProject/DefaultSample/DefaultSample_stats.json",
                 "sim-data/UnassignedSequences.csv"
             ],
             [
@@ -203,17 +198,47 @@
                 "multiqc_citations.txt:md5,d35df50e9903a96a2b3bce3c1fbc8ad2",
                 "sim-data.csv:md5,1d1dab697bd88c411fff89c102024295",
                 "DefaultSample.fastp.json:md5,9c10bbfbc4beb9523ca3a114f95709db",
-                "Metrics.csv:md5,ad7af77573845924062e2554cc641b16",
-                "RunManifest.json:md5,cdb8051690645d0f070191286323f1c8",
                 "DefaultSample_R1.fastq.gz:md5,6c73b924e4eb53b716614aa6fd8b791a",
                 "DefaultSample_R2.fastq.gz:md5,54e2c0894844e42605b7cd567b1c7dbe",
+                "Metrics.csv:md5,ad7af77573845924062e2554cc641b16",
+                "RunManifest.json:md5,cdb8051690645d0f070191286323f1c8",
                 "UnassignedSequences.csv:md5,11c1693830ce941b8cfb8d2431a59097"
             ]
         ],
         "meta": {
             "nf-test": "0.9.0",
             "nextflow": "24.04.4"
         },
-        "timestamp": "2024-10-17T23:29:10.785062467"
+        "timestamp": "2024-10-30T15:25:52.803280321"
+    },
+    "rnaseq_samplesheet": {
+        "content": [
+            true
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.4"
+        },
+        "timestamp": "2024-10-30T15:25:55.693738747"
+    },
+    "atacseq_samplesheet": {
+        "content": [
+            true
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.4"
+        },
+        "timestamp": "2024-10-30T15:25:54.286158931"
+    },
+    "taxprofiler_samplesheet": {
+        "content": [
+            true
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.4"
+        },
+        "timestamp": "2024-10-30T15:25:57.12531458"
     }
 }
diff --git a/tests/bcl2fastq.nf.test b/tests/bcl2fastq.nf.test
@@ -30,7 +30,10 @@ nextflow_pipeline {
                     stable_name,
                     // All files with stable contents
                     stable_path
-                ).match() }
+                ).match() },
+                { assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/atacseq_samplesheet.csv")).match("atacseq_samplesheet") },
+                { assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/rnaseq_samplesheet.csv")).match("rnaseq_samplesheet") },
+                { assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/taxprofiler_samplesheet.csv")).match("taxprofiler_samplesheet") }
             )
         }
     }

diff --git a/tests/bcl2fastq.nf.test.snap b/tests/bcl2fastq.nf.test.snap
@@ -1,4 +1,24 @@
 {
+    "rnaseq_samplesheet": {
+        "content": [
+            true
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.4"
+        },
+        "timestamp": "2024-10-30T15:27:18.557192574"
+    },
+    "atacseq_samplesheet": {
+        "content": [
+            true
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.4"
+        },
+        "timestamp": "2024-10-30T15:27:13.631683141"
+    },
     "Bcl2Fastq": {
         "content": [
             8,
@@ -71,8 +91,6 @@
                 "220422_M11111_0222_000000000-K9H97/L001/Stats/DemuxSummaryF1L1.txt",
                 "220422_M11111_0222_000000000-K9H97/L001/Stats/FastqSummaryF1L1.txt",
                 "220422_M11111_0222_000000000-K9H97/L001/Stats/Stats.json",
-                "fastq",
-                "fastq/Sample1_S1_L001.samplesheet.csv",
                 "multiqc",
                 "multiqc/multiqc_data",
                 "multiqc/multiqc_data/bcl2fastq-lane-stats-table.txt",
@@ -241,6 +259,16 @@
             "nf-test": "0.9.0",
             "nextflow": "24.04.4"
         },
-        "timestamp": "2024-10-17T23:30:33.296858681"
+        "timestamp": "2024-10-30T15:27:08.66031858"
+    },
+    "taxprofiler_samplesheet": {
+        "content": [
+            true
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.4"
+        },
+        "timestamp": "2024-10-30T15:27:23.563785123"
     }
 }
diff --git a/tests/bclconvert.nf.test b/tests/bclconvert.nf.test
@@ -30,7 +30,10 @@ nextflow_pipeline {
                     stable_name,
                     // All files with stable contents
                     stable_path
-                ).match() }
+                ).match() },
+                { assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/atacseq_samplesheet.csv")).match("atacseq_samplesheet") },
+                { assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/rnaseq_samplesheet.csv")).match("rnaseq_samplesheet") },
+                { assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/taxprofiler_samplesheet.csv")).match("taxprofiler_samplesheet") }
             )
         }
     }

diff --git a/tests/bclconvert.nf.test.snap b/tests/bclconvert.nf.test.snap
@@ -1,4 +1,14 @@
 {
+    "rnaseq_samplesheet": {
+        "content": [
+            true
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.4"
+        },
+        "timestamp": "2024-10-30T15:28:48.222041833"
+    },
     "BCL-CONVERT": {
         "content": [
             8,
@@ -53,8 +63,6 @@
                 "220422_M11111_0222_000000000-K9H97/L001/Sample1_S1_L001_report.html",
                 "220422_M11111_0222_000000000-K9H97/L001/Sample1_S1_L001_summary.txt",
                 "220422_M11111_0222_000000000-K9H97/L001/Undetermined_S0_L001_R1_001.fastq.gz",
-                "fastq",
-                "fastq/Sample1_S1_L001.samplesheet.csv",
                 "multiqc",
                 "multiqc/multiqc_data",
                 "multiqc/multiqc_data/bclconvert-lane-stats-table.txt",
@@ -237,6 +245,26 @@
             "nf-test": "0.9.0",
             "nextflow": "24.04.4"
         },
-        "timestamp": "2024-10-17T23:31:58.719385372"
+        "timestamp": "2024-10-30T15:28:38.170386268"
+    },
+    "atacseq_samplesheet": {
+        "content": [
+            true
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.4"
+        },
+        "timestamp": "2024-10-30T15:28:43.215827644"
+    },
+    "taxprofiler_samplesheet": {
+        "content": [
+            true
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.4"
+        },
+        "timestamp": "2024-10-30T15:28:53.253703589"
     }
 }