diff --git a/conf/test.config b/conf/test.config index a4e5f05..5bcf4c0 100644 --- a/conf/test.config +++ b/conf/test.config @@ -15,8 +15,8 @@ params { config_profile_description = 'Minimal test dataset to check pipeline function' // Limit resources so that this can run on GitHub Actions - max_cpus = 2 - max_memory = '6.GB' + max_cpus = 4 + max_memory = '16.GB' max_time = '6.h' // Input data @@ -25,6 +25,9 @@ params { run_kneaddata = true kneaddata_db_version = 'human_genome' run_metaphlan = true - metaphlan_db_version = 'mpa_vJan21_TOY_CHOCOPhlAnSGB_202103' + metaphlan_db_version = 'mpa_vOct22_CHOCOPhlAnSGB_202212' + run_humann = true + chocophlan_db_version = 'DEMO' + uniref_db_version = 'DEMO_diamond' } diff --git a/modules/local/humann/humann/environment.yml b/modules/local/humann/humann/environment.yml new file mode 100644 index 0000000..92f963f --- /dev/null +++ b/modules/local/humann/humann/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::humann=3.8 diff --git a/modules/local/humann/humann/main.nf b/modules/local/humann/humann/main.nf new file mode 100644 index 0000000..5ba7c21 --- /dev/null +++ b/modules/local/humann/humann/main.nf @@ -0,0 +1,58 @@ +process HUMANN_HUMANN { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/humann:3.8--pyh7cba7a3_0': + 'biocontainers/humann:3.8--pyh7cba7a3_0' }" + + input: + tuple val(meta) , path(reads) + tuple val(meta2), path(metaphlan_profile) + path chocophlan_db + path uniref_db + + output: + tuple val(meta), path("*_genefamilies.tsv") , emit: genefamilies + tuple val(meta), path("*_pathabundance.tsv"), emit: pathabundance + tuple val(meta), path("*_pathcoverage.tsv") , emit: pathcoverage + tuple val(meta), path("*.log") , emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def input_data = !meta.single_end ? "--input ${reads[0]} --input ${reads[1]}" : "--input $reads" + """ + humann \\ + ${input_data} \\ + --output ./ \\ + --threads ${task.cpus} \\ + --taxonomic-profile ${metaphlan_profile} \\ + --nucleotide-database ${chocophlan_db} \\ + --protein-database ${uniref_db} \\ + --o-log ${prefix}.log \\ + ${args} + cat <<-END_VERSIONS > versions.yml + "${task.process}": + humann: \$(echo \$(humann --version 2>&1 | sed 's/^.*humann //; s/Using.*\$//' )) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}_genefamilies.tsv + touch ${prefix}_pathabundance.tsv + touch ${prefix}_pathcoverage.tsv + touch ${prefix}.log + cat <<-END_VERSIONS > versions.yml + "${task.process}": + humann: \$(echo \$(humann --version 2>&1 | sed 's/^.*humann //; s/Using.*\$//' )) + END_VERSIONS + """ +} diff --git a/modules/local/humann/humann/nextflow.config b/modules/local/humann/humann/nextflow.config new file mode 100644 index 0000000..52ad9a3 --- /dev/null +++ b/modules/local/humann/humann/nextflow.config @@ -0,0 +1,27 @@ +process { + withName: HUMANN_HUMANN { + publishDir = [ + [ + path: { "${params.outdir}/humann/humann" }, + mode: params.publish_dir_mode, + pattern: '*_genefamilies.tsv' + ], + [ + path: { "${params.outdir}/humann/humann" }, + mode: params.publish_dir_mode, + pattern: '*_pathabundance.tsv' + ], + [ + path: { "${params.outdir}/humann/humann" }, + mode: params.publish_dir_mode, + pattern: '*_pathcoverage.tsv' + ], + [ + path: { "${params.outdir}/humann/humann" }, + mode: params.publish_dir_mode, + pattern: '*.log' + ], + ] + ext.args = params.humann_options ? params.humann_options : "" + } +} diff --git a/modules/local/humann/humann/tests/main.nf.test b/modules/local/humann/humann/tests/main.nf.test new file mode 100644 index 0000000..cae1a75 --- /dev/null +++ b/modules/local/humann/humann/tests/main.nf.test @@ -0,0 +1,115 @@ +nextflow_process { + + name "Test Process HUMANN_HUMANN" + script "../main.nf" + process "HUMANN_HUMANN" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "humann" + tag "humann/humann" + + setup { + run("HUMANN_DOWNLOADCHOCOPHLANDB") { + script "../../downloadchocophlandb/main.nf" + process { + """ + input[0] = 'TODO - some chocophlan db version - or maybe skip this setup and build in a mini test db' + """ + } + } + + run("HUMANN_DOWNLOADUNIREFDB") { + script "../../downloadunirefdb/main.nf" + process { + """ + input[0] = 'TODO - some uniref db version - or maybe skip this setup and build in a mini test db' + """ + } + } + } + + test("kneaded fastq.gz") { + + when { + process { + """ + input[0] = Channel.of( + [ + [ id: 'test' ], + [ + file(params.modules_testdata_base_path + "TODO_some_kneaded_1.fastq.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "TODO_some_kneaded_2.fastq.gz", checkIfExists: true) + ] + ] + ) + input[1] = Channel.of( + [ + [id: 'test'], + [ + file(params.modules_testdata_base_path + "TODO_some_metaphlan_profile.tsv", checkIfExists: true) + ] + ] + ) + input[2] = HUMANN_DOWNLOADCHOCOPHLANDB.out.chocophlan_db + input[3] = HUMANN_DOWNLOADUNIREFDB.out.uniref_db + """ + } + } + + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.genefamilies + process.out.pathabundance + process.out.pathcoverage, + process.out.versions + ).match() + }, + { assert path(process.out.log[0][1]).text.contains("TODO a line indicates its running and gives a version, hopefully?") } + ) + } + } + + test("kneaded fastq.gz - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of( + [ + [ id: 'test' ], + [ + file(params.modules_testdata_base_path + "TODO_some_kneaded_1.fastq.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "TODO_some_kneaded_2.fastq.gz", checkIfExists: true) + ] + ] + ) + input[1] = Channel.of( + [ + [id: 'test'], + [ + file(params.modules_testdata_base_path + "TODO_some_metaphlan_profile.tsv", checkIfExists: true) + ] + ] + ) + input[2] = HUMANN_DOWNLOADCHOCOPHLANDB.out.chocophlan_db + input[3] = HUMANN_DOWNLOADUNIREFDB.out.uniref_db + """ + } + } + + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/local/humann/humann/tests/nextflow.config b/modules/local/humann/humann/tests/nextflow.config new file mode 100644 index 0000000..52ad9a3 --- /dev/null +++ b/modules/local/humann/humann/tests/nextflow.config @@ -0,0 +1,27 @@ +process { + withName: HUMANN_HUMANN { + publishDir = [ + [ + path: { "${params.outdir}/humann/humann" }, + mode: params.publish_dir_mode, + pattern: '*_genefamilies.tsv' + ], + [ + path: { "${params.outdir}/humann/humann" }, + mode: params.publish_dir_mode, + pattern: '*_pathabundance.tsv' + ], + [ + path: { "${params.outdir}/humann/humann" }, + mode: params.publish_dir_mode, + pattern: '*_pathcoverage.tsv' + ], + [ + path: { "${params.outdir}/humann/humann" }, + mode: params.publish_dir_mode, + pattern: '*.log' + ], + ] + ext.args = params.humann_options ? params.humann_options : "" + } +} diff --git a/modules/local/humann/join/environment.yml b/modules/local/humann/join/environment.yml new file mode 100644 index 0000000..92f963f --- /dev/null +++ b/modules/local/humann/join/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::humann=3.8 diff --git a/modules/local/humann/join/main.nf b/modules/local/humann/join/main.nf new file mode 100644 index 0000000..17f6027 --- /dev/null +++ b/modules/local/humann/join/main.nf @@ -0,0 +1,39 @@ +process HUMANN_JOIN { + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/humann:3.8--pyh7cba7a3_0': + 'biocontainers/humann:3.8--pyh7cba7a3_0' }" + + input: + tuple val(meta), path(input) + val file_name_pattern + + output: + path("*_joined.tsv") , emit: joined + path "versions.yml" , emit: versions + + script: + def args = task.ext.args ?: '' + """ + humann_join_tables \\ + --input . \\ + --output ${file_name_pattern}_joined.tsv \\ + --file_name $file_name_pattern \\ + ${args} + cat <<-END_VERSIONS > versions.yml + "${task.process}": + humann: \$(echo \$(humann --version 2>&1 | sed 's/^.*humann //; s/Using.*\$//' )) + END_VERSIONS + """ + + stub: + """ + touch ${file_name_pattern}_joined.tsv + cat <<-END_VERSIONS > versions.yml + "${task.process}": + humann: \$(echo \$(humann --version 2>&1 | sed 's/^.*humann //; s/Using.*\$//' )) + END_VERSIONS + """ +} diff --git a/modules/local/humann/join/nextflow.config b/modules/local/humann/join/nextflow.config new file mode 100644 index 0000000..2c90039 --- /dev/null +++ b/modules/local/humann/join/nextflow.config @@ -0,0 +1,12 @@ +process { + withName: HUMANN_JOIN { + publishDir = [ + [ + path: { "${params.outdir}/humann/results" }, + mode: params.publish_dir_mode, + pattern: '*_joined.tsv.gz' + ] + ] + ext.args = params.humann_options ? params.humann_options : "" + } +} diff --git a/modules/local/humann/join/tests/main.nf.test b/modules/local/humann/join/tests/main.nf.test new file mode 100644 index 0000000..474626f --- /dev/null +++ b/modules/local/humann/join/tests/main.nf.test @@ -0,0 +1,54 @@ +nextflow_process { + + name "Test Process HUMANN_JOIN" + script "../main.nf" + process "HUMANN_JOIN" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "humann" + tag "humann/join" + + test("humann/join") { + when { + process { + """ + input[0] = "genefamilies" + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.joined + process.out.versions + ).match() + }, + { assert path(process.out.log[0][1]).text.contains("TODO a line indicates its running and gives a version, hopefully?") } + ) + } + } + + test("humann/join - stub") { + + options "-stub" + + when { + process { + """ + input[0] = "genefamilies" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/local/humann/join/tests/nextflow.config b/modules/local/humann/join/tests/nextflow.config new file mode 100644 index 0000000..2c90039 --- /dev/null +++ b/modules/local/humann/join/tests/nextflow.config @@ -0,0 +1,12 @@ +process { + withName: HUMANN_JOIN { + publishDir = [ + [ + path: { "${params.outdir}/humann/results" }, + mode: params.publish_dir_mode, + pattern: '*_joined.tsv.gz' + ] + ] + ext.args = params.humann_options ? params.humann_options : "" + } +} diff --git a/modules/local/humann/regroup/environment.yml b/modules/local/humann/regroup/environment.yml new file mode 100644 index 0000000..92f963f --- /dev/null +++ b/modules/local/humann/regroup/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::humann=3.8 diff --git a/modules/local/humann/regroup/main.nf b/modules/local/humann/regroup/main.nf new file mode 100644 index 0000000..f2fe5dc --- /dev/null +++ b/modules/local/humann/regroup/main.nf @@ -0,0 +1,42 @@ +process HUMANN_REGROUP { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/humann:3.8--pyh7cba7a3_0': + 'biocontainers/humann:3.8--pyh7cba7a3_0' }" + + input: + tuple val(meta), path(input) + val groups + + output: + tuple val(meta), path("*_regroup.tsv") , emit: regroup + path "versions.yml" , emit: versions + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + humann_regroup_table \\ + --input $input \\ + --output ${prefix}_${groups}_regroup.tsv \\ + --groups $groups \\ + ${args} + cat <<-END_VERSIONS > versions.yml + "${task.process}": + humann: \$(echo \$(humann --version 2>&1 | sed 's/^.*humann //; s/Using.*\$//' )) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}_${groups}_regroup.tsv + cat <<-END_VERSIONS > versions.yml + "${task.process}": + humann: \$(echo \$(humann --version 2>&1 | sed 's/^.*humann //; s/Using.*\$//' )) + END_VERSIONS + """ +} diff --git a/modules/local/humann/regroup/nextflow.config b/modules/local/humann/regroup/nextflow.config new file mode 100644 index 0000000..ff58126 --- /dev/null +++ b/modules/local/humann/regroup/nextflow.config @@ -0,0 +1,12 @@ +process { + withName: HUMANN_REGROUP { + publishDir = [ + [ + path: { "${params.outdir}/humann/regroup" }, + mode: params.publish_dir_mode, + pattern: '*_regroup.tsv.gz' + ] + ] + ext.args = params.humann_options ? params.humann_options : "" + } +} diff --git a/modules/local/humann/regroup/tests/main.nf.test b/modules/local/humann/regroup/tests/main.nf.test new file mode 100644 index 0000000..9dc218c --- /dev/null +++ b/modules/local/humann/regroup/tests/main.nf.test @@ -0,0 +1,70 @@ +nextflow_process { + + name "Test Process HUMANN_REGROUP" + script "../main.nf" + process "HUMANN_REGROUP" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "humann" + tag "humann/regroup" + + test("humann/regroup") { + when { + process { + """ + input[0] = Channel.of( + [ + [id: 'test'], + [ + file(params.modules_testdata_base_path + "TODO_some_genefamilies.tsv", checkIfExists: true) + ] + ] + ) + input[1] = "uniref90_rxn" + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.regroup + process.out.versions + ).match() + }, + { assert path(process.out.log[0][1]).text.contains("TODO a line indicates its running and gives a version, hopefully?") } + ) + } + } + + test("humann/regroup - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of( + [ + [id: 'test'], + [ + file(params.modules_testdata_base_path + "TODO_some_genefamilies.tsv", checkIfExists: true) + ] + ] + ) + input[1] = "uniref90_rxn" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/local/humann/regroup/tests/nextflow.config b/modules/local/humann/regroup/tests/nextflow.config new file mode 100644 index 0000000..ff58126 --- /dev/null +++ b/modules/local/humann/regroup/tests/nextflow.config @@ -0,0 +1,12 @@ +process { + withName: HUMANN_REGROUP { + publishDir = [ + [ + path: { "${params.outdir}/humann/regroup" }, + mode: params.publish_dir_mode, + pattern: '*_regroup.tsv.gz' + ] + ] + ext.args = params.humann_options ? params.humann_options : "" + } +} diff --git a/modules/local/humann/rename/environment.yml b/modules/local/humann/rename/environment.yml new file mode 100644 index 0000000..92f963f --- /dev/null +++ b/modules/local/humann/rename/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::humann=3.8 diff --git a/modules/local/humann/rename/main.nf b/modules/local/humann/rename/main.nf new file mode 100644 index 0000000..f3c320f --- /dev/null +++ b/modules/local/humann/rename/main.nf @@ -0,0 +1,42 @@ +process HUMANN_RENAME { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/humann:3.8--pyh7cba7a3_0': + 'biocontainers/humann:3.8--pyh7cba7a3_0' }" + + input: + tuple val(meta), path(input) + val names + + output: + tuple val(meta), path("*_renamed.tsv") , emit: renamed + path "versions.yml" , emit: versions + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + humann_rename_table \\ + --input $input \\ + --output ${prefix}_${names}_rename.tsv \\ + --names $names \\ + ${args} + cat <<-END_VERSIONS > versions.yml + "${task.process}": + humann: \$(echo \$(humann --version 2>&1 | sed 's/^.*humann //; s/Using.*\$//' )) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}_${names}_rename.tsv + cat <<-END_VERSIONS > versions.yml + "${task.process}": + humann: \$(echo \$(humann --version 2>&1 | sed 's/^.*humann //; s/Using.*\$//' )) + END_VERSIONS + """ +} diff --git a/modules/local/humann/rename/nextflow.config b/modules/local/humann/rename/nextflow.config new file mode 100644 index 0000000..a990469 --- /dev/null +++ b/modules/local/humann/rename/nextflow.config @@ -0,0 +1,12 @@ +process { + withName: HUMANN_RENAME { + publishDir = [ + [ + path: { "${params.outdir}/humann/rename" }, + mode: params.publish_dir_mode, + pattern: '*_renamed.tsv.gz' + ] + ] + ext.args = params.humann_options ? params.humann_options : "" + } +} diff --git a/modules/local/humann/rename/tests/main.nf.test b/modules/local/humann/rename/tests/main.nf.test new file mode 100644 index 0000000..edee5b6 --- /dev/null +++ b/modules/local/humann/rename/tests/main.nf.test @@ -0,0 +1,70 @@ +nextflow_process { + + name "Test Process HUMANN_RENAME" + script "../main.nf" + process "HUMANN_RENAME" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "humann" + tag "humann/rename" + + test("humann/rename") { + when { + process { + """ + input[0] = Channel.of( + [ + [id: 'test'], + [ + file(params.modules_testdata_base_path + "TODO_some_rxn.tsv", checkIfExists: true) + ] + ] + ) + input[1] = "metacyc-rxn" + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.renamed + process.out.versions + ).match() + }, + { assert path(process.out.log[0][1]).text.contains("TODO a line indicates its running and gives a version, hopefully?") } + ) + } + } + + test("humann/rename - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of( + [ + [id: 'test'], + [ + file(params.modules_testdata_base_path + "TODO_some_rxn.tsv", checkIfExists: true) + ] + ] + ) + input[1] = "uniref90_rxn" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/local/humann/rename/tests/nextflow.config b/modules/local/humann/rename/tests/nextflow.config new file mode 100644 index 0000000..a990469 --- /dev/null +++ b/modules/local/humann/rename/tests/nextflow.config @@ -0,0 +1,12 @@ +process { + withName: HUMANN_RENAME { + publishDir = [ + [ + path: { "${params.outdir}/humann/rename" }, + mode: params.publish_dir_mode, + pattern: '*_renamed.tsv.gz' + ] + ] + ext.args = params.humann_options ? params.humann_options : "" + } +} diff --git a/modules/local/humann/renorm/environment.yml b/modules/local/humann/renorm/environment.yml new file mode 100644 index 0000000..92f963f --- /dev/null +++ b/modules/local/humann/renorm/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::humann=3.8 diff --git a/modules/local/humann/renorm/main.nf b/modules/local/humann/renorm/main.nf new file mode 100644 index 0000000..b5a4c5f --- /dev/null +++ b/modules/local/humann/renorm/main.nf @@ -0,0 +1,43 @@ +process HUMANN_RENORM { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/humann:3.8--pyh7cba7a3_0': + 'biocontainers/humann:3.8--pyh7cba7a3_0' }" + + input: + tuple val(meta), path(input) + val units + + output: + tuple val(meta), path("*_renorm.tsv") , emit: renorm + path "versions.yml" , emit: versions + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + humann_renorm_table \\ + --input $input \\ + --output ${prefix}_${units}_renorm.tsv \\ + --units $units \\ + --update-snames \\ + ${args} + cat <<-END_VERSIONS > versions.yml + "${task.process}": + humann: \$(echo \$(humann --version 2>&1 | sed 's/^.*humann //; s/Using.*\$//' )) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}_${units}_renorm.tsv + cat <<-END_VERSIONS > versions.yml + "${task.process}": + humann: \$(echo \$(humann --version 2>&1 | sed 's/^.*humann //; s/Using.*\$//' )) + END_VERSIONS + """ +} diff --git a/modules/local/humann/renorm/nextflow.config b/modules/local/humann/renorm/nextflow.config new file mode 100644 index 0000000..9298e72 --- /dev/null +++ b/modules/local/humann/renorm/nextflow.config @@ -0,0 +1,12 @@ +process { + withName: HUMANN_RENORM { + publishDir = [ + [ + path: { "${params.outdir}/humann/renorm" }, + mode: params.publish_dir_mode, + pattern: '*_renorm.tsv.gz' + ] + ] + ext.args = params.humann_options ? params.humann_options : "" + } +} diff --git a/modules/local/humann/renorm/tests/main.nf.test b/modules/local/humann/renorm/tests/main.nf.test new file mode 100644 index 0000000..2888606 --- /dev/null +++ b/modules/local/humann/renorm/tests/main.nf.test @@ -0,0 +1,70 @@ +nextflow_process { + + name "Test Process HUMANN_RENORM" + script "../main.nf" + process "HUMANN_RENORM" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "humann" + tag "humann/renorm" + + test("humann/renorm") { + when { + process { + """ + input[0] = Channel.of( + [ + [id: 'test'], + [ + file(params.modules_testdata_base_path + "TODO_some_genefamilies.tsv", checkIfExists: true) + ] + ] + ) + input[1] = "cpm" + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.renorm + process.out.versions + ).match() + }, + { assert path(process.out.log[0][1]).text.contains("TODO a line indicates its running and gives a version, hopefully?") } + ) + } + } + + test("humann/renorm - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of( + [ + [id: 'test'], + [ + file(params.modules_testdata_base_path + "TODO_some_genefamilies.tsv", checkIfExists: true) + ] + ] + ) + input[1] = "cpm" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/local/humann/renorm/tests/nextflow.config b/modules/local/humann/renorm/tests/nextflow.config new file mode 100644 index 0000000..9298e72 --- /dev/null +++ b/modules/local/humann/renorm/tests/nextflow.config @@ -0,0 +1,12 @@ +process { + withName: HUMANN_RENORM { + publishDir = [ + [ + path: { "${params.outdir}/humann/renorm" }, + mode: params.publish_dir_mode, + pattern: '*_renorm.tsv.gz' + ] + ] + ext.args = params.humann_options ? params.humann_options : "" + } +} diff --git a/nextflow.config b/nextflow.config index 0c0b78d..1128606 100644 --- a/nextflow.config +++ b/nextflow.config @@ -31,10 +31,12 @@ params { metaphlan_sgb2gtbd_file = "https://github.com/biobakery/MetaPhlAn/raw/master/metaphlan/utils/mpa_vOct22_CHOCOPhlAnSGB_202212_SGB2GTDB.tsv" // HUMAnN options - run_humann = false - chocophlan_db_version ='DEMO' - uniref_db_version ='DEMO_diamond' - utility_mapping_version ='DEMO' + run_humann = false + chocophlan_db = null + chocophlan_db_version = 'DEMO' + uniref_db = null + uniref_db_version = 'DEMO_diamond' + utility_mapping_version = 'DEMO' // MultiQC options multiqc_config = null diff --git a/nextflow_schema.json b/nextflow_schema.json index 3c1e17c..c6e2503 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -124,11 +124,19 @@ "type": "boolean", "description": "Run HUMAnN to assess functional capacity of a metagenome?" }, + "chocophlan_db": { + "type": "string", + "description": "Path to pre-downloaded ChocoPhlAn database" + }, "chocophlan_db_version": { "type": "string", "default": "DEMO", "description": "ChocoPhlAn database version to download" }, + "uniref_db": { + "type": "string", + "description": "Path to pre-downloaded UNIREF database" + }, "uniref_db_version": { "type": "string", "default": "DEMO_diamond", diff --git a/subworkflows/local/fastq_microbial_pathway_humann/main.nf b/subworkflows/local/fastq_microbial_pathway_humann/main.nf new file mode 100644 index 0000000..8b82438 --- /dev/null +++ b/subworkflows/local/fastq_microbial_pathway_humann/main.nf @@ -0,0 +1,110 @@ +// +// SUBWORKFLOW: Identify gene families and pathways associated with reads using HUMAnN 3 +// + +include { HUMANN_DOWNLOADCHOCOPHLANDB } from '../../../modules/local/humann/downloadchocophlandb/main' +include { HUMANN_DOWNLOADUNIREFDB } from '../../../modules/local/humann/downloadunirefdb/main' +include { HUMANN_HUMANN } from '../../../modules/local/humann/humann/main' +include { HUMANN_JOIN as JOIN_GENES } from '../../../modules/local/humann/join/main' +include { HUMANN_JOIN as JOIN_PATHABUND } from '../../../modules/local/humann/join/main' +include { HUMANN_JOIN as JOIN_PATHCOV } from '../../../modules/local/humann/join/main' +include { HUMANN_JOIN as JOIN_EC } from '../../../modules/local/humann/join/main' +include { HUMANN_REGROUP } from '../../../modules/local/humann/regroup/main' +include { HUMANN_RENAME } from '../../../modules/local/humann/rename/main' +include { HUMANN_RENORM } from '../../../modules/local/humann/renorm/main' + +workflow FASTQ_MICROBIAL_PATHWAY_HUMANN { + + take: + processed_reads_fastq_gz // channel: [ val(meta), [ processed_reads_1.fastq.gz, processed_reads_2.fastq.gz ] ] (MANDATORY) + metaphlan_profile // channel: [ val(meta2), metaphlan_profile.tsv ] (MANDATORY) + chocophlan_db // channel: [ chocophlan_db ] (OPTIONAL) + chocophlan_db_version // value: '' (OPTIONAL) + uniref_db // channel: [ uniref_db ] (OPTIONAL) + uniref_db_version // value: '' (OPTIONAL) + + main: + + ch_versions = Channel.empty() + + // if chocophlan_db exists, skip HUMANN_DOWNLOADCHOCOPHLANDB + if ( chocophlan_db ){ + ch_chocophlan_db = chocophlan_db + } else { + // + // MODULE: Download ChocoPhlAn database + // + ch_chocophlan_db = HUMANN_DOWNLOADCHOCOPHLANDB ( chocophlan_db_version ).chocophlan_db + ch_versions = ch_versions.mix(HUMANN_DOWNLOADCHOCOPHLANDB.out.versions) + } + + // if uniref_db exists, skip HUMANN_DOWNLOADUNIREFDB + if ( uniref_db ){ + ch_uniref_db = uniref_db + } else { + // + // MODULE: Download UniRef database + // + ch_uniref_db = HUMANN_DOWNLOADUNIREFDB ( uniref_db_version ).uniref_db + ch_versions = ch_versions.mix(HUMANN_DOWNLOADUNIREFDB.out.versions) + } + + // + // MODULE: Run HUMAnN 3 for raw outputs + // + ch_humann_genefamilies_raw = HUMANN_HUMANN ( processed_reads_fastq_gz, metaphlan_profile, ch_chocophlan_db, ch_uniref_db ).genefamilies + ch_humann_pathabundance_raw = HUMANN_HUMANN.out.pathabundance + ch_humann_pathcoverage_raw = HUMANN_HUMANN.out.pathcoverage // TODO is this still right? looking at humann docs, might not get this file any longer? + ch_humann_logs = HUMANN_HUMANN.out.log + ch_versions = ch_versions.mix(HUMANN_HUMANN.out.versions) + + // collect log files and store in a directory + ch_combined_humann_logs = ch_humann_logs + .map { [ [ id:'all_samples' ], it[1] ] } + .groupTuple( sort: 'deep' ) + + // + // MODULE: renormalize raw gene families from HUMAnN outputs to cpm + // + ch_humann_genefamilies_cpm = HUMANN_RENORM ( ch_humann_genefamilies_raw, 'cpm' ).renorm + ch_versions = ch_versions.mix(HUMANN_RENORM.out.versions) + + // + // MODULE: regroup cpm gene families to EC numbers + // + ch_humann_ec = HUMANN_REGROUP(ch_humann_genefamilies_cpm, 'uniref90_rxn').regroup + ch_versions = ch_versions.mix(HUMANN_REGROUP.out.versions) + + // + // MODULE: rename ec number outputs to include descriptors + // + ch_humann_ec_renamed = HUMANN_RENAME(ch_humann_ec, 'ec').renamed // TODO make sure 'ec' is valid arg + ch_versions = ch_versions.mix(HUMANN_RENAME.out.versions) + + // + // MODULE: join gene abundances across all samples into one file + // + ch_humann_genefamilies_joined = JOIN_GENES(ch_humann_genefamilies_cpm, 'cpm').joined + + // + // MODULE: join ec abundances across all samples into one file + // + ch_humann_ec_joined = JOIN_EC(ch_humann_ec_renamed, 'ec').joined // TODO check the file name pattern + + // + // MODULE: join pathway abundances across all samples into one file + // + ch_humann_pathabundance_joined = JOIN_PATHABUND(ch_humann_pathabundance_raw, 'pathabundance').joined + + // + // MODULE: join pathway coverage across all samples into one file + // + ch_humann_pathcoverage_joined = JOIN_PATHCOV(ch_humann_pathcoverage_raw, 'pathcoverage').joined + + emit: + humann_genefamilies = ch_humann_genefamilies_joined // channel: [ val(meta), genefamilies.tsv ] + humann_ec = ch_humann_ec_joined // channel: [ val(meta), read_counts.tsv ] + humann_pathabundance = ch_humann_pathabundance_joined // channel: [ val(meta), pathabundance.tsv ] + humann_pathcoverage = ch_humann_pathcoverage_joined // channel: [ val(meta), pathcoverage.tsv ] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/fastq_microbial_pathway_humann/nextflow.config b/subworkflows/local/fastq_microbial_pathway_humann/nextflow.config index 1a8763f..dccc682 100644 --- a/subworkflows/local/fastq_microbial_pathway_humann/nextflow.config +++ b/subworkflows/local/fastq_microbial_pathway_humann/nextflow.config @@ -1,2 +1,7 @@ includeConfig '../../../modules/local/humann/downloadchocophlandb/nextflow.config' includeConfig '../../../modules/local/humann/downloadunirefdb/nextflow.config' +includeConfig '../../../modules/local/humann/humann/nextflow.config' +includeConfig '../../../modules/local/humann/join/nextflow.config' +includeConfig '../../../modules/local/humann/regroup/nextflow.config' +includeConfig '../../../modules/local/humann/rename/nextflow.config' +includeConfig '../../../modules/local/humann/renorm/nextflow.config' diff --git a/subworkflows/local/fastq_microbial_pathway_humann/tests/main.nf.test b/subworkflows/local/fastq_microbial_pathway_humann/tests/main.nf.test new file mode 100644 index 0000000..fb8d06d --- /dev/null +++ b/subworkflows/local/fastq_microbial_pathway_humann/tests/main.nf.test @@ -0,0 +1,54 @@ +nextflow_workflow { + + name "Test Subworkflow: FASTQ_MICROBIAL_PATHWAY_HUMANN" + script "../main.nf" + workflow "FASTQ_MICROBIAL_PATHWAY_HUMANN" + + tag "subworkflows" + tag "subworkflows_local" + tag "fastq_microbial_pathway_humann" + tag "fastq_microbial_pathway_humann_default" + + + // TODO update inputs here, these are copied from the metaphlan subworkflow which obviously isnt what we actually need here + // the first should be processed fastq + // the second should be metaphlan profile output from that subworkflow + // the third is chocophlan db, either that or a chocophlan db version as fourth input should be provided (multiple tests) + // the fifth is uniref db, either that or a uniref db version as sixth input should be provided (multiple tests) + + test("fastq.gz") { + + when { + workflow { + """ + input[0] = Channel.of( + [ + [ id: 'test' ], + [ + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_2.fastq.gz", checkIfExists: true) + ] + ], + [ + [ id: 'test2' ], + [ + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test2_1.fastq.gz", checkIfExists: true), + file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test2_2.fastq.gz", checkIfExists: true) + ] + ] + ) + input[1] = "https://github.com/biobakery/MetaPhlAn/raw/master/metaphlan/utils/mpa_vOct22_CHOCOPhlAnSGB_202212_SGB2GTDB.tsv" + input[2] = null + input[3] = 'mpa_vJan21_TOY_CHOCOPhlAnSGB_202103' + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } +} diff --git a/subworkflows/local/fastq_microbial_pathway_humann/tests/tags.yml b/subworkflows/local/fastq_microbial_pathway_humann/tests/tags.yml new file mode 100644 index 0000000..1467ae5 --- /dev/null +++ b/subworkflows/local/fastq_microbial_pathway_humann/tests/tags.yml @@ -0,0 +1,2 @@ +fastq_microbial_pathway_humann_default: + - subworkflows/local/fastq_microbial_pathway_humann/** diff --git a/subworkflows/local/fastq_read_preprocessing_kneaddata/main.nf b/subworkflows/local/fastq_read_preprocessing_kneaddata/main.nf index d7b18b0..4b1aac7 100644 --- a/subworkflows/local/fastq_read_preprocessing_kneaddata/main.nf +++ b/subworkflows/local/fastq_read_preprocessing_kneaddata/main.nf @@ -31,7 +31,7 @@ workflow FASTQ_READ_PREPROCESSING_KNEADDATA { // // MODULE: Trim and remove human reads // - ch_preprocessed_reads_fastq_gz = KNEADDATA_KNEADDATA ( raw_reads_fastq_gz, ch_kneaddata_db.first() ).preprocessed_reads + ch_preprocessed_reads_fastq_gz = KNEADDATA_KNEADDATA ( raw_reads_fastq_gz, ch_kneaddata_db ).preprocessed_reads ch_kneaddata_logs = KNEADDATA_KNEADDATA.out.kneaddata_log ch_versions = ch_versions.mix(KNEADDATA_KNEADDATA.out.versions) diff --git a/subworkflows/local/fastq_read_taxonomy_metaphlan/main.nf b/subworkflows/local/fastq_read_taxonomy_metaphlan/main.nf index a1205f0..1ca8ab5 100644 --- a/subworkflows/local/fastq_read_taxonomy_metaphlan/main.nf +++ b/subworkflows/local/fastq_read_taxonomy_metaphlan/main.nf @@ -33,7 +33,7 @@ workflow FASTQ_READ_TAXONOMY_METAPHLAN { // // MODULE: Trim and remove human reads // - ch_metaphlan_profile_txt = METAPHLAN_METAPHLAN ( preprocessed_reads_fastq_gz, ch_metaphlan_db.first() ).profile + ch_metaphlan_profile_txt = METAPHLAN_METAPHLAN ( preprocessed_reads_fastq_gz, ch_metaphlan_db ).profile ch_versions = ch_versions.mix(METAPHLAN_METAPHLAN.out.versions) // diff --git a/tests/main.nf.test b/tests/main.nf.test index 4e471eb..6583270 100644 --- a/tests/main.nf.test +++ b/tests/main.nf.test @@ -14,6 +14,9 @@ nextflow_pipeline { kneaddata_db_version = 'human_genome' run_metaphlan = true metaphlan_db_version = 'mpa_vJan21_TOY_CHOCOPhlAnSGB_202103' + run_humann = true + chocophlan_db_version = 'DEMO' + uniref_db_version = 'DEMO_diamond' } } diff --git a/workflows/biobakerymgx/main.nf b/workflows/biobakerymgx/main.nf index d045911..6368515 100644 --- a/workflows/biobakerymgx/main.nf +++ b/workflows/biobakerymgx/main.nf @@ -9,6 +9,7 @@ // include { FASTQ_READ_PREPROCESSING_KNEADDATA } from '../../subworkflows/local/fastq_read_preprocessing_kneaddata/main' include { FASTQ_READ_TAXONOMY_METAPHLAN } from '../../subworkflows/local/fastq_read_taxonomy_metaphlan/main' +include { FASTQ_MICROBIAL_PATHWAY_HUMANN } from '../../subworkflows/local/fastq_microbial_pathway_humann/main' /* @@ -130,7 +131,7 @@ workflow BIOBAKERYMGX { Taxonomic classification: MetaPhlAn -----------------------------------------------------------------------------------*/ if ( params.run_metaphlan ) { - // create channel from params.kneaddata_db + // create channel from params.metaphlan_db if ( !params.metaphlan_db ){ ch_metaphlan_db = null } else { @@ -146,13 +147,63 @@ workflow BIOBAKERYMGX { // // SUBWORKFLOW: MetaPhlAn // - ch_read_taxonomy_tsv = FASTQ_READ_TAXONOMY_METAPHLAN ( ch_preprocessed_fastq_gz, ch_metaphlan_sgb2gtbd_file, ch_metaphlan_db, params.metaphlan_db_version ).metaphlan_profiles_merged_tsv + ch_read_taxonomy_tsv = FASTQ_READ_TAXONOMY_METAPHLAN ( + ch_preprocessed_fastq_gz, + ch_metaphlan_sgb2gtbd_file, + ch_metaphlan_db, + params.metaphlan_db_version + ).metaphlan_profiles_merged_tsv ch_versions = ch_versions.mix(FASTQ_READ_TAXONOMY_METAPHLAN.out.versions) } else { ch_read_taxonomy_tsv = Channel.empty() } + /*----------------------------------------------------------------------------------- + Functional classification: HUMAnN + -----------------------------------------------------------------------------------*/ + if ( params.run_humann ) { + // create channel from params.chocophlan_db + if ( !params.chocophlan_db ) { + ch_chocophlan_db = null + } else { + ch_chocophlan_db = Channel.value( file( params.chocophlan_db, checkIfExists: true ) ) + } + + // create channel from params.uniref_db + if ( !params.uniref_db ) { + ch_uniref_db = null + } else { + ch_uniref_db = Channel.value( file( params.uniref_db, checkIfExists: true ) ) + } + + // theres probably a better way to handle this. but good enough for me for now.. + if ( !params.run_metaphlan ) { + error "Error: run_humann is true but run_metaphlan is false. Cannot run HUMAnN without MetaPhlAn." + } + + // + // SUBWORKFLOW: HUMAnN + // + // TODO double check the metaphlan output channel. not sure its the format i was expecting in the module + ch_genefamilies_tsv = FASTQ_MICROBIAL_PATHWAY_HUMANN( + ch_preprocessed_fastq_gz, + ch_read_taxonomy_tsv, + ch_chocophlan_db, + params.chocophlan_db_version, + ch_uniref_db, + params.uniref_db_version).humann_genefamilies + ch_ec_tsv = FASTQ_MICROBIAL_PATHWAY_HUMANN.out.humann_ec + ch_pathabundance_tsv = FASTQ_MICROBIAL_PATHWAY_HUMANN.out.humann_pathabundance + ch_pathcoverage_tsv = FASTQ_MICROBIAL_PATHWAY_HUMANN.out.humann_pathcoverage + ch_versions = ch_versions.mix(FASTQ_MICROBIAL_PATHWAY_HUMANN.out.versions) + } else { + ch_genefamilies_tsv = Channel.empty() + ch_ec_tsv = Channel.empty() + ch_pathabundance_tsv = Channel.empty() + ch_pathcoverage_tsv = Channel.empty() + } + /*----------------------------------------------------------------------------------- Pipeline report utilities @@ -192,6 +243,10 @@ workflow BIOBAKERYMGX { preprocessed_fastq_gz = ch_preprocessed_fastq_gz preprocessed_read_counts_tsv = ch_preprocessed_read_counts_tsv read_taxonomy_tsv = ch_read_taxonomy_tsv + genefamilies_tsv = ch_genefamilies_tsv + ec_tsv = ch_ec_tsv + pathabundance_tsv = ch_pathabundance_tsv + pathcoverage_tsv = ch_pathcoverage_tsv multiqc_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html versions = ch_versions } diff --git a/workflows/biobakerymgx/tests/main.nf.test b/workflows/biobakerymgx/tests/main.nf.test index 163ca0b..8faae8e 100644 --- a/workflows/biobakerymgx/tests/main.nf.test +++ b/workflows/biobakerymgx/tests/main.nf.test @@ -33,6 +33,8 @@ nextflow_workflow { outdir = "$outputDir" kneaddata_db_version = 'human_genome' metaphlan_db_version = 'mpa_vJan21_TOY_CHOCOPhlAnSGB_202103' + chocophlan_db_version = 'DEMO' + uniref_db_version = 'DEMO_diamond' max_cpus = 1 } } @@ -44,6 +46,10 @@ nextflow_workflow { workflow.out.preprocessed_fastq_gz, workflow.out.preprocessed_read_counts_tsv, workflow.out.read_taxonomy_tsv, + workflow.out.genefamilies_tsv, + workflow.out.ec_tsv, + workflow.out.pathabundance_tsv, + workflow.out.pathcoverage_tsv, workflow.out.versions ).match() },