Skip to content

Commit

Permalink
Fixes in samplesheet generator script and snap updates
Browse files Browse the repository at this point in the history
  • Loading branch information
nschcolnicov committed Oct 30, 2024
1 parent 833edde commit c53abee
Show file tree
Hide file tree
Showing 24 changed files with 477 additions and 157 deletions.
9 changes: 9 additions & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,7 @@ process {
]
]
}

withName: 'MULTIQC' {
cache = false
ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' }
Expand All @@ -231,6 +232,7 @@ process {
mode: params.publish_dir_mode
]
}

withName: SAMSHEE {
ext.args = [
params.json_schema_validator ? "--schema '${params.json_schema_validator}'" : "",
Expand All @@ -244,5 +246,12 @@ process {
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: 'FASTQ_TO_SAMPLESHEET*' {
publishDir = [
path: { "${params.outdir}/samplesheet/" },
mode: params.publish_dir_mode,
]
}
}

78 changes: 53 additions & 25 deletions modules/local/fastq_to_samplesheet/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -5,47 +5,75 @@ process FASTQ_TO_SAMPLESHEET {
memory 100.MB

input:
val meta
val meta // Expecting a list of items
val pipeline
val strandedness

output:
tuple val(meta_clone), path("*samplesheet.csv"), emit: samplesheet

exec:
// Initialize the samplesheet content
def samplesheetHeader = []
def samplesheetRows = []

// Calculate the dynamic output directory based on meta.lane
def outputDir = meta.publish_dir
// Sort meta by item.id
def sortedMeta = meta.sort { it.id }

// Add relevant fields to the map
def pipeline_map = [
sample : meta.samplename,
fastq_1 : outputDir + '/' + file(meta.fastq_1).fileName
]
// Collect all unique columns from all items and create rows
def allColumns = new LinkedHashSet()

// Add fastq_2 if it's a paired-end sample
if (!meta.single_end) {
pipeline_map.fastq_2 = outputDir + '/' + file(meta.fastq_2).fileName
}
sortedMeta.each { item ->
// Check for required keys in each item
if (!item.samplename) {
error "Item with id ${item.id} is missing the 'samplename' key."
}
if (!item.fastq_1) {
error "Item with id ${item.id} is missing the 'fastq_1' key."
}

def pipeline_map = [:] // Initialize as an empty map

// Prepare sample information
pipeline_map.sample = item.samplename
pipeline_map.fastq_1 = item.publish_dir + '/' + file(item.fastq_1).fileName

// Add fastq_2 if it's a paired-end sample
if (!item.single_end && item.fastq_2) {
pipeline_map.fastq_2 = item.publish_dir + '/' + file(item.fastq_2).fileName ?: ''
}

// Add pipeline-specific entries
if (pipeline == 'rnaseq') {
pipeline_map << [ strandedness: strandedness ]
} else if (pipeline == 'atacseq') {
pipeline_map << [ replicate: 1 ]
} else if (pipeline == 'taxprofiler') {
pipeline_map << [ fasta: '' ]
// Add pipeline-specific entries
if (pipeline == 'rnaseq') {
pipeline_map.strandedness = strandedness ?: ''
} else if (pipeline == 'atacseq') {
pipeline_map.replicate = 1
} else if (pipeline == 'taxprofiler') {
pipeline_map.fasta = ''
}

// Add all keys to the set of unique columns
allColumns.addAll(pipeline_map.keySet())

// Prepare a row for the samplesheet, filling in missing values with empty strings
def rowValues = allColumns.collect { key ->
pipeline_map.containsKey(key) ? '"' + pipeline_map[key] + '"' : '""'
}
samplesheetRows << rowValues.join(",")
}

// Create the samplesheet content
def samplesheet = pipeline_map.keySet().collect { '"' + it + '"' }.join(",") + '\n'
samplesheet += pipeline_map.values().collect { '"' + it + '"' }.join(",")
// Create a sorted list of headers
samplesheetHeader = allColumns.collect { '"' + it + '"' }

// Create the complete samplesheet content
def samplesheet = samplesheetHeader.join(",") + '\n' + samplesheetRows.join("\n")

// Write samplesheet to file
def samplesheet_file = task.workDir.resolve("${meta.id}.samplesheet.csv")
def samplesheet_file = task.workDir.resolve("${pipeline}_samplesheet.csv")
samplesheet_file.text = samplesheet

meta_clone = meta.clone()
meta_clone.remove('publishdir')
// Clone the first item in meta for output
meta_clone = meta.first().clone()
meta_clone.remove('publish_dir') // Removing the publish_dir just in case, although output channel is not used by other process

}
5 changes: 4 additions & 1 deletion tests/bases2fastq.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,10 @@ nextflow_pipeline {
stable_name,
// All files with stable contents
stable_path
).match() }
).match() },
{ assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/atacseq_samplesheet.csv")).match("atacseq_samplesheet") },
{ assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/rnaseq_samplesheet.csv")).match("rnaseq_samplesheet") },
{ assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/taxprofiler_samplesheet.csv")).match("taxprofiler_samplesheet") }
)
}
}
Expand Down
34 changes: 31 additions & 3 deletions tests/bases2fastq.nf.test.snap
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,6 @@
}
},
[
"fastq",
"fastq/DefaultSample.samplesheet.csv",
"multiqc",
"multiqc/multiqc_data",
"multiqc/multiqc_data/fastp-insert-size-plot.txt",
Expand Down Expand Up @@ -211,6 +209,36 @@
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-10-29T17:48:00.306378134"
"timestamp": "2024-10-30T15:25:52.803280321"
},
"rnaseq_samplesheet": {
"content": [
true
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-10-30T15:25:55.693738747"
},
"atacseq_samplesheet": {
"content": [
true
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-10-30T15:25:54.286158931"
},
"taxprofiler_samplesheet": {
"content": [
true
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-10-30T15:25:57.12531458"
}
}
5 changes: 4 additions & 1 deletion tests/bcl2fastq.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,10 @@ nextflow_pipeline {
stable_name,
// All files with stable contents
stable_path
).match() }
).match() },
{ assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/atacseq_samplesheet.csv")).match("atacseq_samplesheet") },
{ assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/rnaseq_samplesheet.csv")).match("rnaseq_samplesheet") },
{ assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/taxprofiler_samplesheet.csv")).match("taxprofiler_samplesheet") }
)
}
}
Expand Down
34 changes: 31 additions & 3 deletions tests/bcl2fastq.nf.test.snap
Original file line number Diff line number Diff line change
@@ -1,4 +1,24 @@
{
"rnaseq_samplesheet": {
"content": [
true
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-10-30T15:27:18.557192574"
},
"atacseq_samplesheet": {
"content": [
true
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-10-30T15:27:13.631683141"
},
"Bcl2Fastq": {
"content": [
8,
Expand Down Expand Up @@ -71,8 +91,6 @@
"220422_M11111_0222_000000000-K9H97/L001/Stats/DemuxSummaryF1L1.txt",
"220422_M11111_0222_000000000-K9H97/L001/Stats/FastqSummaryF1L1.txt",
"220422_M11111_0222_000000000-K9H97/L001/Stats/Stats.json",
"fastq",
"fastq/Sample1_S1_L001.samplesheet.csv",
"multiqc",
"multiqc/multiqc_data",
"multiqc/multiqc_data/bcl2fastq-lane-stats-table.txt",
Expand Down Expand Up @@ -241,6 +259,16 @@
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-10-17T23:30:33.296858681"
"timestamp": "2024-10-30T15:27:08.66031858"
},
"taxprofiler_samplesheet": {
"content": [
true
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-10-30T15:27:23.563785123"
}
}
5 changes: 4 additions & 1 deletion tests/bclconvert.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,10 @@ nextflow_pipeline {
stable_name,
// All files with stable contents
stable_path
).match() }
).match() },
{ assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/atacseq_samplesheet.csv")).match("atacseq_samplesheet") },
{ assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/rnaseq_samplesheet.csv")).match("rnaseq_samplesheet") },
{ assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/taxprofiler_samplesheet.csv")).match("taxprofiler_samplesheet") }
)
}
}
Expand Down
34 changes: 31 additions & 3 deletions tests/bclconvert.nf.test.snap
Original file line number Diff line number Diff line change
@@ -1,4 +1,14 @@
{
"rnaseq_samplesheet": {
"content": [
true
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-10-30T15:28:48.222041833"
},
"BCL-CONVERT": {
"content": [
8,
Expand Down Expand Up @@ -53,8 +63,6 @@
"220422_M11111_0222_000000000-K9H97/L001/Sample1_S1_L001_report.html",
"220422_M11111_0222_000000000-K9H97/L001/Sample1_S1_L001_summary.txt",
"220422_M11111_0222_000000000-K9H97/L001/Undetermined_S0_L001_R1_001.fastq.gz",
"fastq",
"fastq/Sample1_S1_L001.samplesheet.csv",
"multiqc",
"multiqc/multiqc_data",
"multiqc/multiqc_data/bclconvert-lane-stats-table.txt",
Expand Down Expand Up @@ -237,6 +245,26 @@
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-10-17T23:31:58.719385372"
"timestamp": "2024-10-30T15:28:38.170386268"
},
"atacseq_samplesheet": {
"content": [
true
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-10-30T15:28:43.215827644"
},
"taxprofiler_samplesheet": {
"content": [
true
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-10-30T15:28:53.253703589"
}
}
5 changes: 4 additions & 1 deletion tests/bclconvert_mini.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,10 @@ nextflow_pipeline {
stable_name,
// All files with stable contents
stable_path
).match() }
).match() },
{ assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/atacseq_samplesheet.csv")).match("atacseq_samplesheet") },
{ assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/rnaseq_samplesheet.csv")).match("rnaseq_samplesheet") },
{ assert snapshot(UTILS.validateFastqPaths("$outputDir/samplesheet/taxprofiler_samplesheet.csv")).match("taxprofiler_samplesheet") }
)
}
}
Expand Down
41 changes: 32 additions & 9 deletions tests/bclconvert_mini.nf.test.snap
Original file line number Diff line number Diff line change
@@ -1,7 +1,17 @@
{
"rnaseq_samplesheet": {
"content": [
true
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-10-30T15:33:18.621857802"
},
"BCL-CONVERT-mini": {
"content": [
38,
23,
{
"BCLCONVERT": {
"bclconvert": "4.3.6"
Expand All @@ -20,13 +30,6 @@
}
},
[
"fastq",
"fastq/HBRR1_S1_L001.samplesheet.csv",
"fastq/HBRR2_S2_L001.samplesheet.csv",
"fastq/HBRR3_S3_L001.samplesheet.csv",
"fastq/UHRR1_S4_L001.samplesheet.csv",
"fastq/UHRR2_S5_L001.samplesheet.csv",
"fastq/UHRR3_S6_L001.samplesheet.csv",
"miniseq_truseq_smrna",
"miniseq_truseq_smrna/HBRR1_S1_L001.fastp.fastq.gz",
"miniseq_truseq_smrna/HBRR1_S1_L001.fastp.fastq.gz.md5",
Expand Down Expand Up @@ -350,6 +353,26 @@
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-10-17T23:34:50.670465424"
"timestamp": "2024-10-30T15:30:52.179362614"
},
"atacseq_samplesheet": {
"content": [
true
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-10-30T15:32:05.328537478"
},
"taxprofiler_samplesheet": {
"content": [
true
],
"meta": {
"nf-test": "0.9.0",
"nextflow": "24.04.4"
},
"timestamp": "2024-10-30T15:34:31.69965379"
}
}
Loading

0 comments on commit c53abee

Please sign in to comment.