Skip to content

Commit

Permalink
Merge pull request #282 from nf-core/issue_272
Browse files Browse the repository at this point in the history
Issue 272
  • Loading branch information
nschcolnicov authored Oct 31, 2024
2 parents 9e11c07 + 1df1a08 commit 74b1869
Show file tree
Hide file tree
Showing 26 changed files with 643 additions and 171 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### `Fixed`

- [#277](https://github.com/nf-core/demultiplex/pull/277) Improved samplesheet generation to always produce all types of samplesheets, added the ability to explicitly set strandedness, and fixed output paths to correctly reflect the `publishDir` subdirectory structure.
- [#282](https://github.com/nf-core/demultiplex/pull/282) Fixed downstream samplesheet paths and `publishDir` config.

## 1.5.2 - 2024-10-07

Expand Down
15 changes: 12 additions & 3 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ process {
path: { "${params.outdir}/${meta.id}/" },
mode: params.publish_dir_mode,
pattern: "output/Samples/**/*.{json,fastq.gz}",
saveAs: { filename -> filename.minus("output/") }
saveAs: { filename -> filename.split("/")[-1] }
],
[
path: { "${params.outdir}/${meta.id}/" },
Expand Down Expand Up @@ -163,7 +163,7 @@ process {
[
path: { meta.lane ? "${params.outdir}/${meta.id}/L00${meta.lane}" : "${params.outdir}/${meta.id}" },
mode: params.publish_dir_mode,
pattern: "output/*.{tsv, fastq.gz}",
pattern: "output/*.{tsv,fastq.gz}",
saveAs: { filename -> filename.minus("output/") }
],
]
Expand All @@ -174,7 +174,7 @@ process {
[
path: { "${params.outdir}/${meta.id}/" },
mode: params.publish_dir_mode,
pattern: "output/*.{txt, fq.gz}",
pattern: "output/*.{txt,fq.gz}",
saveAs: { filename -> filename.minus("output/") }
],
]
Expand Down Expand Up @@ -215,6 +215,7 @@ process {
]
]
}

withName: 'MULTIQC' {
cache = false
ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' }
Expand All @@ -231,6 +232,7 @@ process {
mode: params.publish_dir_mode
]
}

withName: SAMSHEE {
ext.args = [
params.json_schema_validator ? "--schema '${params.json_schema_validator}'" : "",
Expand All @@ -244,5 +246,12 @@ process {
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: 'FASTQ_TO_SAMPLESHEET*' {
publishDir = [
path: { "${params.outdir}/samplesheet/" },
mode: params.publish_dir_mode,
]
}
}

79 changes: 55 additions & 24 deletions modules/local/fastq_to_samplesheet/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -5,44 +5,75 @@ process FASTQ_TO_SAMPLESHEET {
memory 100.MB

input:
val meta
val meta // Expecting a list of items
val pipeline
val strandedness

output:
tuple val(meta), path("*samplesheet.csv"), emit: samplesheet
tuple val(meta_clone), path("*samplesheet.csv"), emit: samplesheet

exec:
// Initialize the samplesheet content
def samplesheetHeader = []
def samplesheetRows = []

// Calculate the dynamic output directory based on meta.lane
def outputDir = meta.lane ? "${params.outdir}/${meta.id}/L00${meta.lane}" : "${params.outdir}/${meta.id}"
// Sort meta by item.id
def sortedMeta = meta.sort { it.id }

// Add relevant fields to the map
def pipeline_map = [
sample : meta.samplename,
fastq_1 : outputDir + '/' + file(meta.fastq_1).fileName
]
// Collect all unique columns from all items and create rows
def allColumns = new LinkedHashSet()

// Add fastq_2 if it's a paired-end sample
if (!meta.single_end) {
pipeline_map.fastq_2 = outputDir + '/' + file(meta.fastq_2).fileName
}
sortedMeta.each { item ->
// Check for required keys in each item
if (!item.samplename) {
error "Item with id ${item.id} is missing the 'samplename' key."
}
if (!item.fastq_1) {
error "Item with id ${item.id} is missing the 'fastq_1' key."
}

def pipeline_map = [:] // Initialize as an empty map

// Prepare sample information
pipeline_map.sample = item.samplename
pipeline_map.fastq_1 = item.publish_dir + '/' + file(item.fastq_1).fileName

// Add fastq_2 if it's a paired-end sample
if (!item.single_end && item.fastq_2) {
pipeline_map.fastq_2 = item.publish_dir + '/' + file(item.fastq_2).fileName ?: ''
}

// Add pipeline-specific entries
if (pipeline == 'rnaseq') {
pipeline_map << [ strandedness: strandedness ]
} else if (pipeline == 'atacseq') {
pipeline_map << [ replicate: 1 ]
} else if (pipeline == 'taxprofiler') {
pipeline_map << [ fasta: '' ]
// Add pipeline-specific entries
if (pipeline == 'rnaseq') {
pipeline_map.strandedness = strandedness ?: ''
} else if (pipeline == 'atacseq') {
pipeline_map.replicate = 1
} else if (pipeline == 'taxprofiler') {
pipeline_map.fasta = ''
}

// Add all keys to the set of unique columns
allColumns.addAll(pipeline_map.keySet())

// Prepare a row for the samplesheet, filling in missing values with empty strings
def rowValues = allColumns.collect { key ->
pipeline_map.containsKey(key) ? '"' + pipeline_map[key] + '"' : '""'
}
samplesheetRows << rowValues.join(",")
}

// Create the samplesheet content
def samplesheet = pipeline_map.keySet().collect { '"' + it + '"' }.join(",") + '\n'
samplesheet += pipeline_map.values().collect { '"' + it + '"' }.join(",")
// Create a sorted list of headers
samplesheetHeader = allColumns.collect { '"' + it + '"' }

// Create the complete samplesheet content
def samplesheet = samplesheetHeader.join(",") + '\n' + samplesheetRows.join("\n")

// Write samplesheet to file
def samplesheet_file = task.workDir.resolve("${meta.id}.samplesheet.csv")
def samplesheet_file = task.workDir.resolve("${pipeline}_samplesheet.csv")
samplesheet_file.text = samplesheet

// Clone the first item in meta for output
meta_clone = meta.first().clone()
meta_clone.remove('publish_dir') // Removing the publish_dir just in case, although output channel is not used by other process

}
3 changes: 2 additions & 1 deletion tests/.nftignore
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ sim-data/*.fastp.fastq.gz.md5
sim-data/*.fastp.fastq.gz_fastqc_data.txt
sim-data/*.fastp.fastq.gz_fastqc_report.html
sim-data/*.fastp.fastq.gz_summary.txt
sim-data/{Undetermined,s}*L001*.fastq.gz
sim-data/RunStats.json
sim-data/Samples/DefaultProject/DefaultSample/DefaultSample_stats.json
sim-data/DefaultSample_stats.json
sim-data/most_frequent_unmatched.tsv
5 changes: 4 additions & 1 deletion tests/bases2fastq.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,10 @@ nextflow_pipeline {
stable_name,
// All files with stable contents
stable_path
).match() }
).match() },
{ assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/atacseq_samplesheet.csv")).match("atacseq_samplesheet") },
{ assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/rnaseq_samplesheet.csv")).match("rnaseq_samplesheet") },
{ assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/taxprofiler_samplesheet.csv")).match("taxprofiler_samplesheet") }
)
}
}
Expand Down
47 changes: 36 additions & 11 deletions tests/bases2fastq.nf.test.snap
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,6 @@
}
},
[
"fastq",
"fastq/DefaultSample.samplesheet.csv",
"multiqc",
"multiqc/multiqc_data",
"multiqc/multiqc_data/fastp-insert-size-plot.txt",
Expand Down Expand Up @@ -166,15 +164,12 @@
"sim-data/DefaultSample_2.fastp.fastq.gz_fastqc_data.txt",
"sim-data/DefaultSample_2.fastp.fastq.gz_fastqc_report.html",
"sim-data/DefaultSample_2.fastp.fastq.gz_summary.txt",
"sim-data/DefaultSample_R1.fastq.gz",
"sim-data/DefaultSample_R2.fastq.gz",
"sim-data/DefaultSample_stats.json",
"sim-data/Metrics.csv",
"sim-data/RunManifest.json",
"sim-data/RunStats.json",
"sim-data/Samples",
"sim-data/Samples/DefaultProject",
"sim-data/Samples/DefaultProject/DefaultSample",
"sim-data/Samples/DefaultProject/DefaultSample/DefaultSample_R1.fastq.gz",
"sim-data/Samples/DefaultProject/DefaultSample/DefaultSample_R2.fastq.gz",
"sim-data/Samples/DefaultProject/DefaultSample/DefaultSample_stats.json",
"sim-data/UnassignedSequences.csv"
],
[
Expand Down Expand Up @@ -203,17 +198,47 @@
"multiqc_citations.txt:md5,d35df50e9903a96a2b3bce3c1fbc8ad2",
"sim-data.csv:md5,1d1dab697bd88c411fff89c102024295",
"DefaultSample.fastp.json:md5,9c10bbfbc4beb9523ca3a114f95709db",
"Metrics.csv:md5,ad7af77573845924062e2554cc641b16",
"RunManifest.json:md5,cdb8051690645d0f070191286323f1c8",
"DefaultSample_R1.fastq.gz:md5,6c73b924e4eb53b716614aa6fd8b791a",
"DefaultSample_R2.fastq.gz:md5,54e2c0894844e42605b7cd567b1c7dbe",
"Metrics.csv:md5,ad7af77573845924062e2554cc641b16",
"RunManifest.json:md5,cdb8051690645d0f070191286323f1c8",
"UnassignedSequences.csv:md5,11c1693830ce941b8cfb8d2431a59097"
]
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-10-17T23:29:10.785062467"
"timestamp": "2024-10-30T15:25:52.803280321"
},
"rnaseq_samplesheet": {
"content": [
true
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-10-30T15:25:55.693738747"
},
"atacseq_samplesheet": {
"content": [
true
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-10-30T15:25:54.286158931"
},
"taxprofiler_samplesheet": {
"content": [
true
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-10-30T15:25:57.12531458"
}
}
5 changes: 4 additions & 1 deletion tests/bcl2fastq.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,10 @@ nextflow_pipeline {
stable_name,
// All files with stable contents
stable_path
).match() }
).match() },
{ assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/atacseq_samplesheet.csv")).match("atacseq_samplesheet") },
{ assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/rnaseq_samplesheet.csv")).match("rnaseq_samplesheet") },
{ assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/taxprofiler_samplesheet.csv")).match("taxprofiler_samplesheet") }
)
}
}
Expand Down
34 changes: 31 additions & 3 deletions tests/bcl2fastq.nf.test.snap
Original file line number Diff line number Diff line change
@@ -1,4 +1,24 @@
{
"rnaseq_samplesheet": {
"content": [
true
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-10-30T15:27:18.557192574"
},
"atacseq_samplesheet": {
"content": [
true
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-10-30T15:27:13.631683141"
},
"Bcl2Fastq": {
"content": [
8,
Expand Down Expand Up @@ -71,8 +91,6 @@
"220422_M11111_0222_000000000-K9H97/L001/Stats/DemuxSummaryF1L1.txt",
"220422_M11111_0222_000000000-K9H97/L001/Stats/FastqSummaryF1L1.txt",
"220422_M11111_0222_000000000-K9H97/L001/Stats/Stats.json",
"fastq",
"fastq/Sample1_S1_L001.samplesheet.csv",
"multiqc",
"multiqc/multiqc_data",
"multiqc/multiqc_data/bcl2fastq-lane-stats-table.txt",
Expand Down Expand Up @@ -241,6 +259,16 @@
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-10-17T23:30:33.296858681"
"timestamp": "2024-10-30T15:27:08.66031858"
},
"taxprofiler_samplesheet": {
"content": [
true
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-10-30T15:27:23.563785123"
}
}
5 changes: 4 additions & 1 deletion tests/bclconvert.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,10 @@ nextflow_pipeline {
stable_name,
// All files with stable contents
stable_path
).match() }
).match() },
{ assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/atacseq_samplesheet.csv")).match("atacseq_samplesheet") },
{ assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/rnaseq_samplesheet.csv")).match("rnaseq_samplesheet") },
{ assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/taxprofiler_samplesheet.csv")).match("taxprofiler_samplesheet") }
)
}
}
Expand Down
34 changes: 31 additions & 3 deletions tests/bclconvert.nf.test.snap
Original file line number Diff line number Diff line change
@@ -1,4 +1,14 @@
{
"rnaseq_samplesheet": {
"content": [
true
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-10-30T15:28:48.222041833"
},
"BCL-CONVERT": {
"content": [
8,
Expand Down Expand Up @@ -53,8 +63,6 @@
"220422_M11111_0222_000000000-K9H97/L001/Sample1_S1_L001_report.html",
"220422_M11111_0222_000000000-K9H97/L001/Sample1_S1_L001_summary.txt",
"220422_M11111_0222_000000000-K9H97/L001/Undetermined_S0_L001_R1_001.fastq.gz",
"fastq",
"fastq/Sample1_S1_L001.samplesheet.csv",
"multiqc",
"multiqc/multiqc_data",
"multiqc/multiqc_data/bclconvert-lane-stats-table.txt",
Expand Down Expand Up @@ -237,6 +245,26 @@
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-10-17T23:31:58.719385372"
"timestamp": "2024-10-30T15:28:38.170386268"
},
"atacseq_samplesheet": {
"content": [
true
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-10-30T15:28:43.215827644"
},
"taxprofiler_samplesheet": {
"content": [
true
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-10-30T15:28:53.253703589"
}
}
Loading

0 comments on commit 74b1869

Please sign in to comment.