Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Hello Humann #24

Open
wants to merge 25 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
bff5441
add skeleton for humann module- wip
d-callan Sep 16, 2024
1882f62
fleshed out humann modules, tests still wip
d-callan Sep 17, 2024
b520116
add draft humann subworkflow
d-callan Sep 17, 2024
ce04afe
fleshing out humann subworkflow some
d-callan Sep 18, 2024
06fa7df
draft adding humann subworkflow to workflow
d-callan Sep 18, 2024
1de7d40
update expected outputs in workflow test
d-callan Sep 18, 2024
69007fa
dont keep compressing and uncompressing intermediate files
d-callan Sep 25, 2024
4237a24
remove unneeded first operator on value channels
d-callan Sep 25, 2024
b160d8f
run_humann true for test
d-callan Sep 25, 2024
3fc30ec
updating some config
d-callan Sep 26, 2024
91d7df5
some chocophlan misspelling
d-callan Sep 26, 2024
88bb41b
omg more chocophlan typos
d-callan Sep 26, 2024
80df04d
fix emit for humann subworkflow
d-callan Sep 26, 2024
cb338e8
handle better paired fastq for humann
d-callan Sep 30, 2024
f58934e
change test version of metaphlan db
d-callan Oct 1, 2024
4e3e615
update humann/join module to use current work dir as input
d-callan Oct 1, 2024
9eb971e
increase test resources
d-callan Oct 1, 2024
39a5500
Merge branch 'humann' of github.com:d-callan/biobakerymgx into humann
d-callan Oct 1, 2024
79fb191
typo in humann module inputs
d-callan Oct 1, 2024
51b768d
Merge branch 'humann' of github.com:d-callan/biobakerymgx into humann
d-callan Oct 1, 2024
2b1cd7b
silly
d-callan Oct 1, 2024
ca95d10
forgot meta in humann/join module
d-callan Oct 1, 2024
c5823ad
typo in humann/renorm
d-callan Oct 1, 2024
c3e9f6d
fix humann_regroup to rxn
d-callan Oct 1, 2024
c15761b
fix join_genes file name pattern
d-callan Oct 1, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions conf/test.config
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ params {
config_profile_description = 'Minimal test dataset to check pipeline function'

// Limit resources so that this can run on GitHub Actions
max_cpus = 2
max_memory = '6.GB'
max_cpus = 4
max_memory = '16.GB'
max_time = '6.h'

// Input data
Expand All @@ -25,6 +25,9 @@ params {
run_kneaddata = true
kneaddata_db_version = 'human_genome'
run_metaphlan = true
metaphlan_db_version = 'mpa_vJan21_TOY_CHOCOPhlAnSGB_202103'
metaphlan_db_version = 'mpa_vOct22_CHOCOPhlAnSGB_202212'
run_humann = true
chocophlan_db_version = 'DEMO'
uniref_db_version = 'DEMO_diamond'

}
6 changes: 6 additions & 0 deletions modules/local/humann/humann/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- bioconda::humann=3.8
58 changes: 58 additions & 0 deletions modules/local/humann/humann/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
process HUMANN_HUMANN {
tag "$meta.id"
label 'process_medium'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/humann:3.8--pyh7cba7a3_0':
'biocontainers/humann:3.8--pyh7cba7a3_0' }"

input:
tuple val(meta) , path(reads)
tuple val(meta2), path(metaphlan_profile)
path chocophlan_db
path uniref_db

output:
tuple val(meta), path("*_genefamilies.tsv") , emit: genefamilies
tuple val(meta), path("*_pathabundance.tsv"), emit: pathabundance
tuple val(meta), path("*_pathcoverage.tsv") , emit: pathcoverage
tuple val(meta), path("*.log") , emit: log
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def input_data = !meta.single_end ? "--input ${reads[0]} --input ${reads[1]}" : "--input $reads"
"""
humann \\
${input_data} \\
--output ./ \\
--threads ${task.cpus} \\
--taxonomic-profile ${metaphlan_profile} \\
--nucleotide-database ${chocophlan_db} \\
--protein-database ${uniref_db} \\
--o-log ${prefix}.log \\
${args}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
humann: \$(echo \$(humann --version 2>&1 | sed 's/^.*humann //; s/Using.*\$//' ))
END_VERSIONS
"""

stub:
def prefix = task.ext.prefix ?: "${meta.id}"
"""
touch ${prefix}_genefamilies.tsv
touch ${prefix}_pathabundance.tsv
touch ${prefix}_pathcoverage.tsv
touch ${prefix}.log
cat <<-END_VERSIONS > versions.yml
"${task.process}":
humann: \$(echo \$(humann --version 2>&1 | sed 's/^.*humann //; s/Using.*\$//' ))
END_VERSIONS
"""
}
27 changes: 27 additions & 0 deletions modules/local/humann/humann/nextflow.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
process {
withName: HUMANN_HUMANN {
publishDir = [
[
path: { "${params.outdir}/humann/humann" },
mode: params.publish_dir_mode,
pattern: '*_genefamilies.tsv'
],
[
path: { "${params.outdir}/humann/humann" },
mode: params.publish_dir_mode,
pattern: '*_pathabundance.tsv'
],
[
path: { "${params.outdir}/humann/humann" },
mode: params.publish_dir_mode,
pattern: '*_pathcoverage.tsv'
],
[
path: { "${params.outdir}/humann/humann" },
mode: params.publish_dir_mode,
pattern: '*.log'
],
]
ext.args = params.humann_options ? params.humann_options : ""
}
}
115 changes: 115 additions & 0 deletions modules/local/humann/humann/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
nextflow_process {

name "Test Process HUMANN_HUMANN"
script "../main.nf"
process "HUMANN_HUMANN"
config "./nextflow.config"

tag "modules"
tag "modules_nfcore"
tag "humann"
tag "humann/humann"

setup {
run("HUMANN_DOWNLOADCHOCOPHLANDB") {
script "../../downloadchocophlandb/main.nf"
process {
"""
input[0] = 'TODO - some chocophlan db version - or maybe skip this setup and build in a mini test db'
"""
}
}

run("HUMANN_DOWNLOADUNIREFDB") {
script "../../downloadunirefdb/main.nf"
process {
"""
input[0] = 'TODO - some uniref db version - or maybe skip this setup and build in a mini test db'
"""
}
}
}

test("kneaded fastq.gz") {

when {
process {
"""
input[0] = Channel.of(
[
[ id: 'test' ],
[
file(params.modules_testdata_base_path + "TODO_some_kneaded_1.fastq.gz", checkIfExists: true),
file(params.modules_testdata_base_path + "TODO_some_kneaded_2.fastq.gz", checkIfExists: true)
]
]
)
input[1] = Channel.of(
[
[id: 'test'],
[
file(params.modules_testdata_base_path + "TODO_some_metaphlan_profile.tsv", checkIfExists: true)
]
]
)
input[2] = HUMANN_DOWNLOADCHOCOPHLANDB.out.chocophlan_db
input[3] = HUMANN_DOWNLOADUNIREFDB.out.uniref_db
"""
}
}


then {
assertAll (
{ assert process.success },
{ assert snapshot(
process.out.genefamilies
process.out.pathabundance
process.out.pathcoverage,
process.out.versions
).match()
},
{ assert path(process.out.log[0][1]).text.contains("TODO a line indicates its running and gives a version, hopefully?") }
)
}
}

test("kneaded fastq.gz - stub") {

options "-stub"

when {
process {
"""
input[0] = Channel.of(
[
[ id: 'test' ],
[
file(params.modules_testdata_base_path + "TODO_some_kneaded_1.fastq.gz", checkIfExists: true),
file(params.modules_testdata_base_path + "TODO_some_kneaded_2.fastq.gz", checkIfExists: true)
]
]
)
input[1] = Channel.of(
[
[id: 'test'],
[
file(params.modules_testdata_base_path + "TODO_some_metaphlan_profile.tsv", checkIfExists: true)
]
]
)
input[2] = HUMANN_DOWNLOADCHOCOPHLANDB.out.chocophlan_db
input[3] = HUMANN_DOWNLOADUNIREFDB.out.uniref_db
"""
}
}


then {
assertAll (
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}
}
}
27 changes: 27 additions & 0 deletions modules/local/humann/humann/tests/nextflow.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
process {
withName: HUMANN_HUMANN {
publishDir = [
[
path: { "${params.outdir}/humann/humann" },
mode: params.publish_dir_mode,
pattern: '*_genefamilies.tsv'
],
[
path: { "${params.outdir}/humann/humann" },
mode: params.publish_dir_mode,
pattern: '*_pathabundance.tsv'
],
[
path: { "${params.outdir}/humann/humann" },
mode: params.publish_dir_mode,
pattern: '*_pathcoverage.tsv'
],
[
path: { "${params.outdir}/humann/humann" },
mode: params.publish_dir_mode,
pattern: '*.log'
],
]
ext.args = params.humann_options ? params.humann_options : ""
}
}
6 changes: 6 additions & 0 deletions modules/local/humann/join/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- bioconda::humann=3.8
39 changes: 39 additions & 0 deletions modules/local/humann/join/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
process HUMANN_JOIN {
label 'process_low'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/humann:3.8--pyh7cba7a3_0':
'biocontainers/humann:3.8--pyh7cba7a3_0' }"

input:
tuple val(meta), path(input)
val file_name_pattern

output:
path("*_joined.tsv") , emit: joined
path "versions.yml" , emit: versions

script:
def args = task.ext.args ?: ''
"""
humann_join_tables \\
--input . \\
--output ${file_name_pattern}_joined.tsv \\
--file_name $file_name_pattern \\
${args}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
humann: \$(echo \$(humann --version 2>&1 | sed 's/^.*humann //; s/Using.*\$//' ))
END_VERSIONS
"""

stub:
"""
touch ${file_name_pattern}_joined.tsv
cat <<-END_VERSIONS > versions.yml
"${task.process}":
humann: \$(echo \$(humann --version 2>&1 | sed 's/^.*humann //; s/Using.*\$//' ))
END_VERSIONS
"""
}
12 changes: 12 additions & 0 deletions modules/local/humann/join/nextflow.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
process {
withName: HUMANN_JOIN {
publishDir = [
[
path: { "${params.outdir}/humann/results" },
mode: params.publish_dir_mode,
pattern: '*_joined.tsv.gz'
]
]
ext.args = params.humann_options ? params.humann_options : ""
}
}
54 changes: 54 additions & 0 deletions modules/local/humann/join/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
nextflow_process {

name "Test Process HUMANN_JOIN"
script "../main.nf"
process "HUMANN_JOIN"
config "./nextflow.config"

tag "modules"
tag "modules_nfcore"
tag "humann"
tag "humann/join"

test("humann/join") {
when {
process {
"""
input[0] = "genefamilies"
"""
}
}

then {
assertAll (
{ assert process.success },
{ assert snapshot(
process.out.joined
process.out.versions
).match()
},
{ assert path(process.out.log[0][1]).text.contains("TODO a line indicates its running and gives a version, hopefully?") }
)
}
}

test("humann/join - stub") {

options "-stub"

when {
process {
"""
input[0] = "genefamilies"
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}
}
}
12 changes: 12 additions & 0 deletions modules/local/humann/join/tests/nextflow.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
process {
withName: HUMANN_JOIN {
publishDir = [
[
path: { "${params.outdir}/humann/results" },
mode: params.publish_dir_mode,
pattern: '*_joined.tsv.gz'
]
]
ext.args = params.humann_options ? params.humann_options : ""
}
}
6 changes: 6 additions & 0 deletions modules/local/humann/regroup/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- bioconda::humann=3.8
Loading