diff --git a/modules/nf-core/mirtrace/qc/environment.yml b/modules/nf-core/mirtrace/qc/environment.yml new file mode 100644 index 00000000000..253b36b8d4e --- /dev/null +++ b/modules/nf-core/mirtrace/qc/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +name: "mirtrace_qc" +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::mirtrace=1.0.1" diff --git a/modules/nf-core/mirtrace/qc/main.nf b/modules/nf-core/mirtrace/qc/main.nf new file mode 100644 index 00000000000..ac9575bb35c --- /dev/null +++ b/modules/nf-core/mirtrace/qc/main.nf @@ -0,0 +1,64 @@ +process MIRTRACE_QC { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mirtrace:1.0.1--0': + 'biocontainers/mirtrace:1.0.1--0' }" + + input: + tuple val(meta), path(reads) + val(mirtrace_species) + + output: + tuple val(meta), path ("*.html") , emit: html + tuple val(meta), path ("*.json") , emit: json + tuple val(meta), path ("*.tsv") , emit: tsv + tuple val(meta), path ("qc_passed_reads.all.collapsed/*.{fa,fasta}") , emit: all_fa + tuple val(meta), path ("qc_passed_reads.rnatype_unknown.collapsed/*.{fa,fasta}") , emit: rnatype_unknown_fa + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def file_list = reads.collect { it.toString() } + + """ + mirtrace qc \\ + --species ${mirtrace_species} \\ + --write-fasta \\ + --output-dir . \\ + --force \\ + ${file_list.join(' ')} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mirtrace: \$(echo \$(mirtrace -v)) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.fa + touch ${prefix}.html + touch ${prefix}.json + touch ${prefix}.tsv + + mkdir -p qc_passed_reads.all.collapsed + mkdir -p qc_passed_reads.rnatype_unknown.collapsed + + touch qc_passed_reads.all.collapsed/${prefix}.fa + touch qc_passed_reads.rnatype_unknown.collapsed/${prefix}.fa + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mirtrace: \$(echo \$(mirtrace -v)) + END_VERSIONS + """ +} diff --git a/modules/nf-core/mirtrace/qc/meta.yml b/modules/nf-core/mirtrace/qc/meta.yml new file mode 100644 index 00000000000..7f58ae2bd02 --- /dev/null +++ b/modules/nf-core/mirtrace/qc/meta.yml @@ -0,0 +1,66 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "mirtrace_qc" +description: "A tool for quality control and tracing taxonomic origins of microRNA sequencing data" +keywords: + - microRNA + - smrnaseq + - QC +tools: + - "mirtrace": + description: "miRTrace is a new quality control and taxonomic tracing tool developed specifically for small RNA sequencing data (sRNA-Seq). Each sample is characterized by profiling sequencing quality, read length, sequencing depth and miRNA complexity and also the amounts of miRNAs versus undesirable sequences (derived from tRNAs, rRNAs and sequencing artifacts). In addition to these routine quality control (QC) analyses, miRTrace can accurately and sensitively resolve taxonomic origins of small RNA-Seq data based on the composition of clade-specific miRNAs. This feature can be used to detect cross-clade contaminations in typical lab settings. It can also be applied for more specific applications in forensics, food quality control and clinical diagnosis, for instance tracing the origins of meat products or detecting parasitic microRNAs in host serum." + homepage: "https://github.com/friedlanderlab/mirtrace/tree/master" + documentation: "https://github.com/friedlanderlab/mirtrace/blob/master/release-bundle-includes/doc/manual/mirtrace_manual.pdf" + tool_dev_url: "https://github.com/friedlanderlab/mirtrace/tree/master" + doi: "10.1186/s13059-018-1588-9" + licence: ["GPL v2"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - reads: + type: file + description: microRNA sequencing data + pattern: "*.{fastq,fastq.gz}" + - mirtrace_species: + type: string + description: Target species in microRNA sequencing data (miRbase encoding, e.g. “hsa” for Homo sapiens) + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - html: + type: file + description: HTML file + pattern: "*.{html}" + - json: + type: file + description: JSON file + pattern: "*.{json}" + - tsv: + type: file + description: TSV file + pattern: "*.{tsv}" + - all_fa: + type: file + description: QC-passed reads in FASTA file. Identical reads are collapsed. Entries are sorted by abundance. + pattern: "*.{fa,fasta}" + - rnatype_unknown_fa: + type: file + description: Unknown RNA type QC-passed reads in FASTA file. Identical reads are collapsed. Entries are sorted by abundance. + pattern: "*.{fa,fasta}" + +authors: + - "@atrigila" +maintainers: + - "@atrigila" diff --git a/modules/nf-core/mirtrace/qc/tests/main.nf.test b/modules/nf-core/mirtrace/qc/tests/main.nf.test new file mode 100644 index 00000000000..ea2f99fbf2f --- /dev/null +++ b/modules/nf-core/mirtrace/qc/tests/main.nf.test @@ -0,0 +1,81 @@ +nextflow_process { + + name "Test Process MIRTRACE_QC" + script "../main.nf" + process "MIRTRACE_QC" + + tag "modules" + tag "modules_nfcore" + tag "mirtrace" + tag "mirtrace/qc" + + test("human - fastq") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true), + ] + ] + input[1] = "hsa" + """ + } + } + + then { + assertAll( + { assert process.success }, + + // Check HTML + { assert path(process.out.html.get(0).get(1)).text.contains("This file is part of miRTrace.")} , + + // Check JSON + { assert path(process.out.json.get(0).get(1)).json.results[0].stats.uniqueQCPassedSeqsCount == 912 }, + + // Check TSV + { assert snapshot(process.out.tsv).match("tsv") }, + + // Check FASTA files + { assert snapshot(process.out.rnatype_unknown_fa).match("rnatype_unknown_fa") }, + { assert snapshot(process.out.all_fa).match("all_fa") }, + + // Check versions + { assert snapshot(process.out.versions).match("versions") } + + ) + } + + } + + test("human - fastq - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true), + ] + ] + input[1] = "hsa" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/mirtrace/qc/tests/main.nf.test.snap b/modules/nf-core/mirtrace/qc/tests/main.nf.test.snap new file mode 100644 index 00000000000..7f7f3742e29 --- /dev/null +++ b/modules/nf-core/mirtrace/qc/tests/main.nf.test.snap @@ -0,0 +1,189 @@ +{ + "all_fa": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_rnaseq_1.fasta:md5,0181296141177654088bbc2a96b29560", + "test_rnaseq_2.fasta:md5,302d432f8b5cbf6556e1143679c42847" + ] + ] + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-02T13:42:28.226466479" + }, + "versions": { + "content": [ + [ + "versions.yml:md5,b50529beb497fc9882232140b636d9ce" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-02T14:00:50.712738593" + }, + "tsv": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "mirtrace-stats-contamination_basic.tsv:md5,b668899b3ad2f006e073474122103b7a", + "mirtrace-stats-contamination_detailed.tsv:md5,84437ccb74715e956fab549f0567c5c9", + "mirtrace-stats-length.tsv:md5,37a0d254fdb800a0467c8fba7215d724", + "mirtrace-stats-mirna-complexity.tsv:md5,12cebc277d1d7873e4cb707291a82dd2", + "mirtrace-stats-phred.tsv:md5,118bbdc67433ce6fd38c74c37b0fcd3b", + "mirtrace-stats-qcstatus.tsv:md5,6bf4bf54faca386a74bb92592ae4f8ba", + "mirtrace-stats-rnatype.tsv:md5,10ec4e1ad0837efeecc97913cbab5d0f" + ] + ] + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-02T13:42:28.194166262" + }, + "human - fastq - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + "versions.yml:md5,b50529beb497fc9882232140b636d9ce" + ], + "all_fa": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test", + "single_end": false + }, + "test.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "rnatype_unknown_fa": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tsv": [ + [ + { + "id": "test", + "single_end": false + }, + "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,b50529beb497fc9882232140b636d9ce" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-02T13:43:42.209949119" + }, + "rnatype_unknown_fa": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_rnaseq_1.fasta:md5,0181296141177654088bbc2a96b29560", + "test_rnaseq_2.fasta:md5,302d432f8b5cbf6556e1143679c42847" + ] + ] + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-02T13:42:28.208336131" + } +} \ No newline at end of file