Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue 272 #282

Merged
merged 6 commits into from
Oct 31, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### `Fixed`

- [#277](https://github.com/nf-core/demultiplex/pull/277) Improved samplesheet generation to always produce all types of samplesheets, added the ability to explicitly set strandedness, and fixed output paths to correctly reflect the `publishDir` subdirectory structure.
- [#282](https://github.com/nf-core/demultiplex/pull/282) Fixed downstream samplesheet paths and `publishDir` config.

## 1.5.2 - 2024-10-07

Expand Down
15 changes: 12 additions & 3 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ process {
path: { "${params.outdir}/${meta.id}/" },
mode: params.publish_dir_mode,
pattern: "output/Samples/**/*.{json,fastq.gz}",
saveAs: { filename -> filename.minus("output/") }
saveAs: { filename -> filename.split("/")[-1] }
],
[
path: { "${params.outdir}/${meta.id}/" },
Expand Down Expand Up @@ -163,7 +163,7 @@ process {
[
path: { meta.lane ? "${params.outdir}/${meta.id}/L00${meta.lane}" : "${params.outdir}/${meta.id}" },
mode: params.publish_dir_mode,
pattern: "output/*.{tsv, fastq.gz}",
pattern: "output/*.{tsv,fastq.gz}",
saveAs: { filename -> filename.minus("output/") }
],
]
Expand All @@ -174,7 +174,7 @@ process {
[
path: { "${params.outdir}/${meta.id}/" },
mode: params.publish_dir_mode,
pattern: "output/*.{txt, fq.gz}",
pattern: "output/*.{txt,fq.gz}",
saveAs: { filename -> filename.minus("output/") }
],
]
Expand Down Expand Up @@ -215,6 +215,7 @@ process {
]
]
}

withName: 'MULTIQC' {
cache = false
ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' }
Expand All @@ -231,6 +232,7 @@ process {
mode: params.publish_dir_mode
]
}

withName: SAMSHEE {
ext.args = [
params.json_schema_validator ? "--schema '${params.json_schema_validator}'" : "",
Expand All @@ -244,5 +246,12 @@ process {
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: 'FASTQ_TO_SAMPLESHEET*' {
publishDir = [
path: { "${params.outdir}/samplesheet/" },
mode: params.publish_dir_mode,
]
}
}

79 changes: 55 additions & 24 deletions modules/local/fastq_to_samplesheet/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -5,44 +5,75 @@ process FASTQ_TO_SAMPLESHEET {
memory 100.MB

input:
val meta
val meta // Expecting a list of items
val pipeline
val strandedness

output:
tuple val(meta), path("*samplesheet.csv"), emit: samplesheet
tuple val(meta_clone), path("*samplesheet.csv"), emit: samplesheet

exec:
// Initialize the samplesheet content
def samplesheetHeader = []
def samplesheetRows = []

// Calculate the dynamic output directory based on meta.lane
def outputDir = meta.lane ? "${params.outdir}/${meta.id}/L00${meta.lane}" : "${params.outdir}/${meta.id}"
// Sort meta by item.id
def sortedMeta = meta.sort { it.id }

// Add relevant fields to the map
def pipeline_map = [
sample : meta.samplename,
fastq_1 : outputDir + '/' + file(meta.fastq_1).fileName
]
// Collect all unique columns from all items and create rows
def allColumns = new LinkedHashSet()

// Add fastq_2 if it's a paired-end sample
if (!meta.single_end) {
pipeline_map.fastq_2 = outputDir + '/' + file(meta.fastq_2).fileName
}
sortedMeta.each { item ->
// Check for required keys in each item
if (!item.samplename) {
error "Item with id ${item.id} is missing the 'samplename' key."
}
if (!item.fastq_1) {
error "Item with id ${item.id} is missing the 'fastq_1' key."
}

def pipeline_map = [:] // Initialize as an empty map

// Prepare sample information
pipeline_map.sample = item.samplename
pipeline_map.fastq_1 = item.publish_dir + '/' + file(item.fastq_1).fileName

// Add fastq_2 if it's a paired-end sample
if (!item.single_end && item.fastq_2) {
pipeline_map.fastq_2 = item.publish_dir + '/' + file(item.fastq_2).fileName ?: ''
}

// Add pipeline-specific entries
if (pipeline == 'rnaseq') {
pipeline_map << [ strandedness: strandedness ]
} else if (pipeline == 'atacseq') {
pipeline_map << [ replicate: 1 ]
} else if (pipeline == 'taxprofiler') {
pipeline_map << [ fasta: '' ]
// Add pipeline-specific entries
if (pipeline == 'rnaseq') {
pipeline_map.strandedness = strandedness ?: ''
} else if (pipeline == 'atacseq') {
pipeline_map.replicate = 1
} else if (pipeline == 'taxprofiler') {
pipeline_map.fasta = ''
}

// Add all keys to the set of unique columns
allColumns.addAll(pipeline_map.keySet())

// Prepare a row for the samplesheet, filling in missing values with empty strings
def rowValues = allColumns.collect { key ->
pipeline_map.containsKey(key) ? '"' + pipeline_map[key] + '"' : '""'
}
samplesheetRows << rowValues.join(",")
}

// Create the samplesheet content
def samplesheet = pipeline_map.keySet().collect { '"' + it + '"' }.join(",") + '\n'
samplesheet += pipeline_map.values().collect { '"' + it + '"' }.join(",")
// Create a sorted list of headers
samplesheetHeader = allColumns.collect { '"' + it + '"' }

// Create the complete samplesheet content
def samplesheet = samplesheetHeader.join(",") + '\n' + samplesheetRows.join("\n")

// Write samplesheet to file
def samplesheet_file = task.workDir.resolve("${meta.id}.samplesheet.csv")
def samplesheet_file = task.workDir.resolve("${pipeline}_samplesheet.csv")
samplesheet_file.text = samplesheet

// Clone the first item in meta for output
meta_clone = meta.first().clone()
meta_clone.remove('publish_dir') // Removing the publish_dir just in case, although output channel is not used by other process
Comment on lines +75 to +77
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doesn't seem to be used anywhere?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This process is the last one that uses the samplesheet, we are outputting the samplesheet and the meta, regardless, in case we use it for anything in the future. I added the publish_dir key to the meta map for easier handling of the paths, so I'm removing it to revert it back to its original state


}
3 changes: 2 additions & 1 deletion tests/.nftignore
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ sim-data/*.fastp.fastq.gz.md5
sim-data/*.fastp.fastq.gz_fastqc_data.txt
sim-data/*.fastp.fastq.gz_fastqc_report.html
sim-data/*.fastp.fastq.gz_summary.txt
sim-data/{Undetermined,s}*L001*.fastq.gz
sim-data/RunStats.json
sim-data/Samples/DefaultProject/DefaultSample/DefaultSample_stats.json
sim-data/DefaultSample_stats.json
sim-data/most_frequent_unmatched.tsv
5 changes: 4 additions & 1 deletion tests/bases2fastq.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,10 @@ nextflow_pipeline {
stable_name,
// All files with stable contents
stable_path
).match() }
).match() },
{ assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/atacseq_samplesheet.csv")).match("atacseq_samplesheet") },
{ assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/rnaseq_samplesheet.csv")).match("rnaseq_samplesheet") },
{ assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/taxprofiler_samplesheet.csv")).match("taxprofiler_samplesheet") }
)
}
}
Expand Down
47 changes: 36 additions & 11 deletions tests/bases2fastq.nf.test.snap
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,6 @@
}
},
[
"fastq",
"fastq/DefaultSample.samplesheet.csv",
"multiqc",
"multiqc/multiqc_data",
"multiqc/multiqc_data/fastp-insert-size-plot.txt",
Expand Down Expand Up @@ -166,15 +164,12 @@
"sim-data/DefaultSample_2.fastp.fastq.gz_fastqc_data.txt",
"sim-data/DefaultSample_2.fastp.fastq.gz_fastqc_report.html",
"sim-data/DefaultSample_2.fastp.fastq.gz_summary.txt",
"sim-data/DefaultSample_R1.fastq.gz",
"sim-data/DefaultSample_R2.fastq.gz",
"sim-data/DefaultSample_stats.json",
"sim-data/Metrics.csv",
"sim-data/RunManifest.json",
"sim-data/RunStats.json",
"sim-data/Samples",
"sim-data/Samples/DefaultProject",
"sim-data/Samples/DefaultProject/DefaultSample",
"sim-data/Samples/DefaultProject/DefaultSample/DefaultSample_R1.fastq.gz",
"sim-data/Samples/DefaultProject/DefaultSample/DefaultSample_R2.fastq.gz",
"sim-data/Samples/DefaultProject/DefaultSample/DefaultSample_stats.json",
"sim-data/UnassignedSequences.csv"
],
[
Expand Down Expand Up @@ -203,17 +198,47 @@
"multiqc_citations.txt:md5,d35df50e9903a96a2b3bce3c1fbc8ad2",
"sim-data.csv:md5,1d1dab697bd88c411fff89c102024295",
"DefaultSample.fastp.json:md5,9c10bbfbc4beb9523ca3a114f95709db",
"Metrics.csv:md5,ad7af77573845924062e2554cc641b16",
"RunManifest.json:md5,cdb8051690645d0f070191286323f1c8",
"DefaultSample_R1.fastq.gz:md5,6c73b924e4eb53b716614aa6fd8b791a",
"DefaultSample_R2.fastq.gz:md5,54e2c0894844e42605b7cd567b1c7dbe",
"Metrics.csv:md5,ad7af77573845924062e2554cc641b16",
"RunManifest.json:md5,cdb8051690645d0f070191286323f1c8",
"UnassignedSequences.csv:md5,11c1693830ce941b8cfb8d2431a59097"
]
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-10-17T23:29:10.785062467"
"timestamp": "2024-10-30T15:25:52.803280321"
},
"rnaseq_samplesheet": {
"content": [
true
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-10-30T15:25:55.693738747"
},
"atacseq_samplesheet": {
"content": [
true
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-10-30T15:25:54.286158931"
},
"taxprofiler_samplesheet": {
"content": [
true
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-10-30T15:25:57.12531458"
}
}
5 changes: 4 additions & 1 deletion tests/bcl2fastq.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,10 @@ nextflow_pipeline {
stable_name,
// All files with stable contents
stable_path
).match() }
).match() },
{ assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/atacseq_samplesheet.csv")).match("atacseq_samplesheet") },
{ assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/rnaseq_samplesheet.csv")).match("rnaseq_samplesheet") },
{ assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/taxprofiler_samplesheet.csv")).match("taxprofiler_samplesheet") }
)
}
}
Expand Down
34 changes: 31 additions & 3 deletions tests/bcl2fastq.nf.test.snap
Original file line number Diff line number Diff line change
@@ -1,4 +1,24 @@
{
"rnaseq_samplesheet": {
"content": [
true
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-10-30T15:27:18.557192574"
},
"atacseq_samplesheet": {
"content": [
true
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-10-30T15:27:13.631683141"
},
"Bcl2Fastq": {
"content": [
8,
Expand Down Expand Up @@ -71,8 +91,6 @@
"220422_M11111_0222_000000000-K9H97/L001/Stats/DemuxSummaryF1L1.txt",
"220422_M11111_0222_000000000-K9H97/L001/Stats/FastqSummaryF1L1.txt",
"220422_M11111_0222_000000000-K9H97/L001/Stats/Stats.json",
"fastq",
"fastq/Sample1_S1_L001.samplesheet.csv",
"multiqc",
"multiqc/multiqc_data",
"multiqc/multiqc_data/bcl2fastq-lane-stats-table.txt",
Expand Down Expand Up @@ -241,6 +259,16 @@
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-10-17T23:30:33.296858681"
"timestamp": "2024-10-30T15:27:08.66031858"
},
"taxprofiler_samplesheet": {
"content": [
true
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-10-30T15:27:23.563785123"
}
}
5 changes: 4 additions & 1 deletion tests/bclconvert.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,10 @@ nextflow_pipeline {
stable_name,
// All files with stable contents
stable_path
).match() }
).match() },
{ assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/atacseq_samplesheet.csv")).match("atacseq_samplesheet") },
{ assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/rnaseq_samplesheet.csv")).match("rnaseq_samplesheet") },
{ assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/taxprofiler_samplesheet.csv")).match("taxprofiler_samplesheet") }
)
}
}
Expand Down
34 changes: 31 additions & 3 deletions tests/bclconvert.nf.test.snap
Original file line number Diff line number Diff line change
@@ -1,4 +1,14 @@
{
"rnaseq_samplesheet": {
"content": [
true
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-10-30T15:28:48.222041833"
},
"BCL-CONVERT": {
"content": [
8,
Expand Down Expand Up @@ -53,8 +63,6 @@
"220422_M11111_0222_000000000-K9H97/L001/Sample1_S1_L001_report.html",
"220422_M11111_0222_000000000-K9H97/L001/Sample1_S1_L001_summary.txt",
"220422_M11111_0222_000000000-K9H97/L001/Undetermined_S0_L001_R1_001.fastq.gz",
"fastq",
"fastq/Sample1_S1_L001.samplesheet.csv",
"multiqc",
"multiqc/multiqc_data",
"multiqc/multiqc_data/bclconvert-lane-stats-table.txt",
Expand Down Expand Up @@ -237,6 +245,26 @@
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-10-17T23:31:58.719385372"
"timestamp": "2024-10-30T15:28:38.170386268"
},
"atacseq_samplesheet": {
"content": [
true
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-10-30T15:28:43.215827644"
},
"taxprofiler_samplesheet": {
"content": [
true
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-10-30T15:28:53.253703589"
}
}
Loading
Loading