From d86b934e3f1cd58a2af879cbdc1caa264cbb83c2 Mon Sep 17 00:00:00 2001 From: glichtenstein Date: Mon, 22 Jan 2024 14:33:48 -0500 Subject: [PATCH] dropped file logging to file. I need help for that. --- subworkflows/nf-core/bcl_demultiplex/main.nf | 54 +++++++++----------- 1 file changed, 23 insertions(+), 31 deletions(-) diff --git a/subworkflows/nf-core/bcl_demultiplex/main.nf b/subworkflows/nf-core/bcl_demultiplex/main.nf index 64e8e90e..6fb108fe 100644 --- a/subworkflows/nf-core/bcl_demultiplex/main.nf +++ b/subworkflows/nf-core/bcl_demultiplex/main.nf @@ -7,9 +7,6 @@ include { BCLCONVERT } from "../../../modules/nf-core/bclconvert/main" include { BCL2FASTQ } from "../../../modules/nf-core/bcl2fastq/main" -// invalid fastq logging -ch_invalid_fastq = Channel.empty() - workflow BCL_DEMULTIPLEX { take: ch_flowcell // [[id:"", lane:""],samplesheet.csv, path/to/bcl/files] @@ -64,25 +61,11 @@ workflow BCL_DEMULTIPLEX { ch_versions = ch_versions.mix(BCL2FASTQ.out.versions) } - // Split the channel into valid and invalid FASTQ entries - ch_fastq_meta = generate_fastq_meta(ch_fastq) - - ch_invalid_fastq = ch_fastq_meta - .filter { it[0] == null } - .map { it[1] } // Extracting the file path - .view { println("Invalid FASTQ Detected: $it") } - .collectFile(name: 'invalid_fastqs.txt', newLine: true) - .view { file -> - println("Invalid FASTQs collected in file: ${file.absolutePath}") - println("Contents of the file:") - file.eachLine { println(it) } - } - - ch_valid_fastq = ch_fastq_meta - .filter { it[0] != null } + // Generate meta for each fastq + ch_fastq_with_meta = generate_fastq_meta(ch_fastq) emit: - fastq = ch_valid_fastq + fastq = ch_fastq_with_meta reports = ch_reports stats = ch_stats interop = ch_interop @@ -93,6 +76,7 @@ workflow BCL_DEMULTIPLEX { FUNCTIONS */ +// Add meta values to fastq channel and skip invalid FASTQ files def generate_fastq_meta(ch_reads) { ch_reads.transpose().map { fc_meta, fastq -> // Check if the FASTQ file is empty or has invalid content @@ -115,9 +99,7 @@ def generate_fastq_meta(ch_reads) { meta.readgroup = readgroup_from_fastq(fastq) meta.readgroup.SM = meta.samplename } else { - println("Invalid FASTQ file detected: ${fastq}") - ch_invalid_fastq << fastq - ch_invalid_fastq.view() + println("Skipping invalid or empty FASTQ file: ${fastq}") fastq = null } @@ -133,25 +115,35 @@ def generate_fastq_meta(ch_reads) { } } +// https://github.com/nf-core/sarek/blob/7ba61bde8e4f3b1932118993c766ed33b5da465e/workflows/sarek.nf#L1014-L1040 def readgroup_from_fastq(path) { + // expected format: + // xx:yy:FLOWCELLID:LANE:... (seven fields) + def line + path.withInputStream { InputStream gzipStream = new java.util.zip.GZIPInputStream(it) Reader decoder = new InputStreamReader(gzipStream, 'ASCII') BufferedReader buffered = new BufferedReader(decoder) line = buffered.readLine() - if (line == null) { - println("Warning: Empty or invalid FASTQ file: ${path}") - //ch_invalid_fastq << path - return null - } - assert line.startsWith('@') : "FASTQ file does not start with '@': ${path}" } + assert line.startsWith('@') line = line.substring(1) def fields = line.split(':') def rg = [:] - rg.ID = [fields[2],fields[3]].join(".") - rg.PU = [fields[2], fields[3], fields[-1] =~ /[GATC+-]/ ? fields[-1] : ""].findAll().join(".") + + // CASAVA 1.8+ format, from https://support.illumina.com/help/BaseSpace_OLH_009008/Content/Source/Informatics/BS/FileFormat_FASTQ-files_swBS.htm + // "@::::::: :::" + sequencer_serial = fields[0] + run_nubmer = fields[1] + fcid = fields[2] + lane = fields[3] + index = fields[-1] =~ /[GATC+-]/ ? fields[-1] : "" + + rg.ID = [fcid,lane].join(".") + rg.PU = [fcid, lane, index].findAll().join(".") rg.PL = "ILLUMINA" + return rg }