From 31767ccf9b5d24286e34718caa9541ccdfa79bdb Mon Sep 17 00:00:00 2001 From: prototaxites Date: Mon, 24 Jul 2023 19:55:25 +0000 Subject: [PATCH 01/19] Add galah --- modules/nf-core/galah/main.nf | 57 ++++++++++++++++++ modules/nf-core/galah/meta.yml | 64 +++++++++++++++++++++ tests/config/pytest_modules.yml | 4 ++ tests/modules/nf-core/galah/main.nf | 21 +++++++ tests/modules/nf-core/galah/nextflow.config | 5 ++ tests/modules/nf-core/galah/test.yml | 8 +++ 6 files changed, 159 insertions(+) create mode 100644 modules/nf-core/galah/main.nf create mode 100644 modules/nf-core/galah/meta.yml create mode 100644 tests/modules/nf-core/galah/main.nf create mode 100644 tests/modules/nf-core/galah/nextflow.config create mode 100644 tests/modules/nf-core/galah/test.yml diff --git a/modules/nf-core/galah/main.nf b/modules/nf-core/galah/main.nf new file mode 100644 index 00000000000..f498663d5a4 --- /dev/null +++ b/modules/nf-core/galah/main.nf @@ -0,0 +1,57 @@ +process GALAH { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::galah=0.3.1" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/galah:0.3.1--hec16e2b_1': + 'biocontainers/galah:0.3.1--hec16e2b_1' }" + + input: + tuple val(meta), path(bins) + path(checkm_tab_table) + path(genome_info) + + output: + tuple val(meta), path("*.tsv") , emit: tsv + tuple val(meta), path("${prefix}-dereplicated/*") , emit: dereplicated_bins, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def qc_input = checkm_tab_table ? "--checkm-tab-table ${checkm_tab_table}" : (genome_info ? "--genome-info ${genome_info}" : "") + if (checkm_tab_table && genome_info) { error "genome_info table and checkm_tab_table both provided: please provide one or the other." } + """ + mkdir ${prefix}-dereplicated + + galah cluster \\ + --threads ${task.cpus} \\ + --genome-fasta-files ${bins} \\ + ${qc_input} \\ + --output-cluster-definition ${prefix}-dereplicated_bins.tsv \\ + ${args} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + : \$(galah --version | sed 's/galah //') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + mkdir ${prefix}-dereplicated/ + touch ${prefix}-dereplicated/test.fa + touch ${prefix}-dereplicated_bins.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + : \$(galah --version | sed 's/galah //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/galah/meta.yml b/modules/nf-core/galah/meta.yml new file mode 100644 index 00000000000..15c198424f8 --- /dev/null +++ b/modules/nf-core/galah/meta.yml @@ -0,0 +1,64 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json +name: "galah" +description: Cluster genome FASTA files by average nucleotide identity +keywords: + - genomics + - cluster + - genome + - metagenomics +tools: + - "galah": + description: "Galah aims to be a more scalable metagenome assembled genome (MAG) dereplication method." + homepage: "https://github.com/wwood/galah" + documentation: "https://github.com/wwood/galah" + tool_dev_url: "https://github.com/wwood/galah" + doi: "" + licence: "['GPL v3']" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - bins: + type: file + description: A list of fasta-formatted genomes for dereplication + pattern: "*.{fa,fna,fa.gz, etc}" + - checkm_tab_table: + type: file + description: | + (optional) (CheckM)[https://nf-co.re/modules/checkm_lineagewf] summary TSV containing information on the completeness and contamination of the input genomes. + Conflicts with genome_info. + pattern: "*.tsv" + - genome_info: + type: file + description: | + (optional) CSV file with the header genome,completeness,contamination. + `genome` is the name of the genome file, minus the file extension, and `completeness` and + `contamination` are the completeness and contamination of each genome in percentages (0-100). + Alternative to checkm_tab_table if genome QC scores come from another source (e.g. BUSCO). + Conflicts with checkm_tab_table. + pattern: "*.csv" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - tsv: + type: file + description: TSV file in the format `representative_genome` \t `member_genome` + pattern: "*.tsv" + - dereplicated_bins: + type: file + description: (optional, depending on input args) The representative genomes following dereplication by galah. + pattern: "*" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@prototaxites" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 39a14960902..5e8a442b579 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -1243,6 +1243,10 @@ freyja/variants: - modules/nf-core/freyja/variants/** - tests/modules/nf-core/freyja/variants/** +galah: + - modules/nf-core/galah/** + - tests/modules/nf-core/galah/** + gamma/gamma: - modules/nf-core/gamma/gamma/** - tests/modules/nf-core/gamma/gamma/** diff --git a/tests/modules/nf-core/galah/main.nf b/tests/modules/nf-core/galah/main.nf new file mode 100644 index 00000000000..e01e2912295 --- /dev/null +++ b/tests/modules/nf-core/galah/main.nf @@ -0,0 +1,21 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { GALAH } from '../../../../modules/nf-core/galah/main.nf' +include { BIOAWK as BIOAWK_CHECKM } from '../../../../modules/nf-core/bioawk/main.nf' +include { BIOAWK as BIOAWK_GENOMEINFO } from '../../../../modules/nf-core/bioawk/main.nf' +include { GUNZIP } from '../../../../modules/nf-core/gunzip/main.nf' + + +workflow test_galah { + + input = [ + [ id:'test' ], // meta map + [file("https://github.com/nf-core/test-datasets/raw/magmap/testdata/GCA_002688505.1_ASM268850v1_genomic.fna.gz", checkIfExists: true), + file("https://github.com/nf-core/test-datasets/raw/magmap/testdata/GCF_004296495.1_ASM429649v1_genomic.fna.gz", checkIfExists: true)] + ] + + GALAH ( input, [], [] ) + +} diff --git a/tests/modules/nf-core/galah/nextflow.config b/tests/modules/nf-core/galah/nextflow.config new file mode 100644 index 00000000000..8730f1c4b93 --- /dev/null +++ b/tests/modules/nf-core/galah/nextflow.config @@ -0,0 +1,5 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + +} diff --git a/tests/modules/nf-core/galah/test.yml b/tests/modules/nf-core/galah/test.yml new file mode 100644 index 00000000000..674af2d80ff --- /dev/null +++ b/tests/modules/nf-core/galah/test.yml @@ -0,0 +1,8 @@ +- name: galah test_galah + command: nextflow run ./tests/modules/nf-core/galah -entry test_galah -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/galah/nextflow.config + tags: + - galah + files: + - path: output/galah/test-dereplicated_bins.tsv + md5sum: d2f8a621bfa5794467f4fdd759e2bce7 + - path: output/galah/versions.yml From b066cc2c3350c7c263cf63e03510b8a3caef6b22 Mon Sep 17 00:00:00 2001 From: prototaxites Date: Mon, 24 Jul 2023 20:07:19 +0000 Subject: [PATCH 02/19] NO DOI --- modules/nf-core/galah/meta.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/nf-core/galah/meta.yml b/modules/nf-core/galah/meta.yml index 15c198424f8..73ac4b4c21b 100644 --- a/modules/nf-core/galah/meta.yml +++ b/modules/nf-core/galah/meta.yml @@ -13,7 +13,7 @@ tools: homepage: "https://github.com/wwood/galah" documentation: "https://github.com/wwood/galah" tool_dev_url: "https://github.com/wwood/galah" - doi: "" + doi: "10.1111/XXXX" licence: "['GPL v3']" input: From 65079b563d4d3ddec6ddcd513e0327890106d93d Mon Sep 17 00:00:00 2001 From: prototaxites Date: Mon, 24 Jul 2023 20:10:53 +0000 Subject: [PATCH 03/19] fix versions --- modules/nf-core/galah/main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/nf-core/galah/main.nf b/modules/nf-core/galah/main.nf index f498663d5a4..1c18dd2cbf8 100644 --- a/modules/nf-core/galah/main.nf +++ b/modules/nf-core/galah/main.nf @@ -37,7 +37,7 @@ process GALAH { cat <<-END_VERSIONS > versions.yml "${task.process}": - : \$(galah --version | sed 's/galah //') + galah: \$(galah --version | sed 's/galah //') END_VERSIONS """ @@ -51,7 +51,7 @@ process GALAH { cat <<-END_VERSIONS > versions.yml "${task.process}": - : \$(galah --version | sed 's/galah //') + galah: \$(galah --version | sed 's/galah //') END_VERSIONS """ } From c7f9abdaf62abeff139402ca76f604f39828bf72 Mon Sep 17 00:00:00 2001 From: prototaxites Date: Tue, 25 Jul 2023 07:33:14 +0000 Subject: [PATCH 04/19] fix dashing version in conda --- modules/nf-core/galah/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/nf-core/galah/main.nf b/modules/nf-core/galah/main.nf index 1c18dd2cbf8..ae5f849ad03 100644 --- a/modules/nf-core/galah/main.nf +++ b/modules/nf-core/galah/main.nf @@ -2,7 +2,7 @@ process GALAH { tag "$meta.id" label 'process_medium' - conda "bioconda::galah=0.3.1" + conda "bioconda::galah=0.3.1 bioconda::dashing=0.4.0-3" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/galah:0.3.1--hec16e2b_1': 'biocontainers/galah:0.3.1--hec16e2b_1' }" From 5e4c0af1cea9d59fcf94b0447dad709874d33b8e Mon Sep 17 00:00:00 2001 From: prototaxites Date: Tue, 25 Jul 2023 07:37:26 +0000 Subject: [PATCH 05/19] fix inputs/outputs --- modules/nf-core/galah/main.nf | 4 ++-- modules/nf-core/galah/meta.yml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/nf-core/galah/main.nf b/modules/nf-core/galah/main.nf index ae5f849ad03..f1c62924a63 100644 --- a/modules/nf-core/galah/main.nf +++ b/modules/nf-core/galah/main.nf @@ -14,7 +14,7 @@ process GALAH { output: tuple val(meta), path("*.tsv") , emit: tsv - tuple val(meta), path("${prefix}-dereplicated/*") , emit: dereplicated_bins, optional: true + tuple val(meta), path("${prefix}-dereplicated/*") , emit: dereplicated_bins path "versions.yml" , emit: versions when: @@ -33,7 +33,7 @@ process GALAH { --genome-fasta-files ${bins} \\ ${qc_input} \\ --output-cluster-definition ${prefix}-dereplicated_bins.tsv \\ - ${args} + --output-representative-fasta-directory ${prefix}-dereplicated cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/galah/meta.yml b/modules/nf-core/galah/meta.yml index 73ac4b4c21b..c999cd0395d 100644 --- a/modules/nf-core/galah/meta.yml +++ b/modules/nf-core/galah/meta.yml @@ -54,7 +54,7 @@ output: pattern: "*.tsv" - dereplicated_bins: type: file - description: (optional, depending on input args) The representative genomes following dereplication by galah. + description: The representative genomes following dereplication by galah. pattern: "*" - versions: type: file From 5b672307a637642fe29db240fa43620e0b7151fc Mon Sep 17 00:00:00 2001 From: prototaxites Date: Tue, 25 Jul 2023 07:46:59 +0000 Subject: [PATCH 06/19] fix dashing version --- modules/nf-core/galah/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/nf-core/galah/main.nf b/modules/nf-core/galah/main.nf index f1c62924a63..006914df4e2 100644 --- a/modules/nf-core/galah/main.nf +++ b/modules/nf-core/galah/main.nf @@ -2,7 +2,7 @@ process GALAH { tag "$meta.id" label 'process_medium' - conda "bioconda::galah=0.3.1 bioconda::dashing=0.4.0-3" + conda "bioconda::galah=0.3.1 bioconda::dashing=0.4.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/galah:0.3.1--hec16e2b_1': 'biocontainers/galah:0.3.1--hec16e2b_1' }" From f3feb1cefd135bd06ddabd3dce5df52ac436f566 Mon Sep 17 00:00:00 2001 From: prototaxites Date: Tue, 25 Jul 2023 18:09:36 +0000 Subject: [PATCH 07/19] Update model inputs, add tests --- modules/nf-core/galah/main.nf | 10 ++++---- modules/nf-core/galah/meta.yml | 27 ++++++++++---------- tests/modules/nf-core/galah/main.nf | 28 ++++++++++++++++++--- tests/modules/nf-core/galah/nextflow.config | 7 ++++++ tests/modules/nf-core/galah/test.yml | 23 +++++++++++++++++ 5 files changed, 72 insertions(+), 23 deletions(-) diff --git a/modules/nf-core/galah/main.nf b/modules/nf-core/galah/main.nf index 006914df4e2..6effe726fb4 100644 --- a/modules/nf-core/galah/main.nf +++ b/modules/nf-core/galah/main.nf @@ -2,15 +2,15 @@ process GALAH { tag "$meta.id" label 'process_medium' - conda "bioconda::galah=0.3.1 bioconda::dashing=0.4.0" + conda "bioconda::galah=0.3.1 bioconda::dashing=0.4.0 bioconda::fastani=1.31" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/galah:0.3.1--hec16e2b_1': 'biocontainers/galah:0.3.1--hec16e2b_1' }" input: tuple val(meta), path(bins) - path(checkm_tab_table) - path(genome_info) + path(qc_table) + val(qc_format) output: tuple val(meta), path("*.tsv") , emit: tsv @@ -23,8 +23,8 @@ process GALAH { script: def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" - def qc_input = checkm_tab_table ? "--checkm-tab-table ${checkm_tab_table}" : (genome_info ? "--genome-info ${genome_info}" : "") - if (checkm_tab_table && genome_info) { error "genome_info table and checkm_tab_table both provided: please provide one or the other." } + def qc_input = qc_table ? ((qc_format == "checkm") ? "--checkm-tab-table ${qc_table}" : "--genome-info ${qc_table}") : "" + if( qc_table && !(qc_format in ["checkm", "genome_info"]) ) error "Invalid qc_format supplied!" """ mkdir ${prefix}-dereplicated diff --git a/modules/nf-core/galah/meta.yml b/modules/nf-core/galah/meta.yml index c999cd0395d..85263a8f591 100644 --- a/modules/nf-core/galah/meta.yml +++ b/modules/nf-core/galah/meta.yml @@ -13,7 +13,7 @@ tools: homepage: "https://github.com/wwood/galah" documentation: "https://github.com/wwood/galah" tool_dev_url: "https://github.com/wwood/galah" - doi: "10.1111/XXXX" + doi: "10.1111/NODOI" licence: "['GPL v3']" input: @@ -26,21 +26,20 @@ input: type: file description: A list of fasta-formatted genomes for dereplication pattern: "*.{fa,fna,fa.gz, etc}" - - checkm_tab_table: + - qc_table: type: file description: | - (optional) (CheckM)[https://nf-co.re/modules/checkm_lineagewf] summary TSV containing information on the completeness and contamination of the input genomes. - Conflicts with genome_info. - pattern: "*.tsv" - - genome_info: - type: file - description: | - (optional) CSV file with the header genome,completeness,contamination. - `genome` is the name of the genome file, minus the file extension, and `completeness` and - `contamination` are the completeness and contamination of each genome in percentages (0-100). - Alternative to checkm_tab_table if genome QC scores come from another source (e.g. BUSCO). - Conflicts with checkm_tab_table. - pattern: "*.csv" + (optional) Either a (CheckM)[https://nf-co.re/modules/checkm_lineagewf] summary TSV containing + information on the completeness and contamination of the input genomes (13 columns), + or a 3-column csv with the header `genome,completeness,contamination`. + In both cases the first column should contain the names of the input genome files, + minus the last file extension + (i.e. if the genome is gzipped, the genome name should retain the .fasta extension). + pattern: "*.{csv,tsv}" + - qc_format: + type: string + description: Defines the type if input table in `qc_table`, if specified. + pattern: "checkm|genome_info" output: - meta: diff --git a/tests/modules/nf-core/galah/main.nf b/tests/modules/nf-core/galah/main.nf index e01e2912295..d260cc67ed0 100644 --- a/tests/modules/nf-core/galah/main.nf +++ b/tests/modules/nf-core/galah/main.nf @@ -1,13 +1,9 @@ -#!/usr/bin/env nextflow - -nextflow.enable.dsl = 2 include { GALAH } from '../../../../modules/nf-core/galah/main.nf' include { BIOAWK as BIOAWK_CHECKM } from '../../../../modules/nf-core/bioawk/main.nf' include { BIOAWK as BIOAWK_GENOMEINFO } from '../../../../modules/nf-core/bioawk/main.nf' include { GUNZIP } from '../../../../modules/nf-core/gunzip/main.nf' - workflow test_galah { input = [ @@ -19,3 +15,27 @@ workflow test_galah { GALAH ( input, [], [] ) } + +workflow test_galah_genomeinfo { + + input = [ + [ id:'test' ], // meta map + [file("https://github.com/nf-core/test-datasets/raw/magmap/testdata/GCA_002688505.1_ASM268850v1_genomic.fna.gz", checkIfExists: true), + file("https://github.com/nf-core/test-datasets/raw/magmap/testdata/GCF_004296495.1_ASM429649v1_genomic.fna.gz", checkIfExists: true)] + ] + + genomeinfo = [ + [ id: 'genomeinfo' ], + file("https://raw.githubusercontent.com/nf-core/test-datasets/magmap/testdata/checkm.lineage_wf.qa_2.tsv", checkIfExists: true) + ] + + BIOAWK_GENOMEINFO(genomeinfo) + + GUNZIP(BIOAWK_GENOMEINFO.out.output) + + ch_genomeinfo = GUNZIP.out.gunzip + .map { meta, tsv -> [tsv] } + + GALAH ( input, ch_genomeinfo, "genome_info") + +} diff --git a/tests/modules/nf-core/galah/nextflow.config b/tests/modules/nf-core/galah/nextflow.config index 8730f1c4b93..8eacddfbc81 100644 --- a/tests/modules/nf-core/galah/nextflow.config +++ b/tests/modules/nf-core/galah/nextflow.config @@ -2,4 +2,11 @@ process { publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + // write a horrid awk to munge the checkm_qa tsv to a 3 column csv + // as files are gzipped, genome has to include extension "fna" as galah expects the genome name to be + // the file name minus the last extension + withName: BIOAWK_GENOMEINFO { + ext.args = '\'BEGIN {{FS="\t"; OFS=","}} NR==1 {print "genome","completeness","contamination"} NR>1 {print $1".fna",$6, $7}\'' + } + } diff --git a/tests/modules/nf-core/galah/test.yml b/tests/modules/nf-core/galah/test.yml index 674af2d80ff..9eec3cc7eb7 100644 --- a/tests/modules/nf-core/galah/test.yml +++ b/tests/modules/nf-core/galah/test.yml @@ -3,6 +3,29 @@ tags: - galah files: + - path: output/galah/test-dereplicated/GCA_002688505.1_ASM268850v1_genomic.fna.gz + md5sum: 0747c48f6693a4fb03c7164c2f472326 + - path: output/galah/test-dereplicated/GCF_004296495.1_ASM429649v1_genomic.fna.gz + md5sum: a8e9bac598df938f25e09418ff7214dd - path: output/galah/test-dereplicated_bins.tsv md5sum: d2f8a621bfa5794467f4fdd759e2bce7 - path: output/galah/versions.yml + +- name: galah test_galah_genomeinfo + command: nextflow run ./tests/modules/nf-core/galah -entry test_galah_genomeinfo -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/galah/nextflow.config + tags: + - galah + files: + - path: output/bioawk/genomeinfo.gz + md5sum: 7c7c29f7d08468cb67dfb5635aa144db + - path: output/bioawk/versions.yml + - path: output/galah/test-dereplicated/GCA_002688505.1_ASM268850v1_genomic.fna.gz + md5sum: 0747c48f6693a4fb03c7164c2f472326 + - path: output/galah/test-dereplicated/GCF_004296495.1_ASM429649v1_genomic.fna.gz + md5sum: a8e9bac598df938f25e09418ff7214dd + - path: output/galah/test-dereplicated_bins.tsv + md5sum: d2f8a621bfa5794467f4fdd759e2bce7 + - path: output/galah/versions.yml + - path: output/gunzip/genomeinfo + md5sum: f73b9131ab91ddb754725b94e5085955 + - path: output/gunzip/versions.yml From a59006a07d745c89857947904b705dd7eb3f9978 Mon Sep 17 00:00:00 2001 From: prototaxites Date: Tue, 25 Jul 2023 18:10:49 +0000 Subject: [PATCH 08/19] Update error message. --- modules/nf-core/galah/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/nf-core/galah/main.nf b/modules/nf-core/galah/main.nf index 6effe726fb4..af95804bd3f 100644 --- a/modules/nf-core/galah/main.nf +++ b/modules/nf-core/galah/main.nf @@ -24,7 +24,7 @@ process GALAH { def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" def qc_input = qc_table ? ((qc_format == "checkm") ? "--checkm-tab-table ${qc_table}" : "--genome-info ${qc_table}") : "" - if( qc_table && !(qc_format in ["checkm", "genome_info"]) ) error "Invalid qc_format supplied!" + if( qc_table && !(qc_format in ["checkm", "genome_info"]) ) error "Invalid qc_format supplied! qc_format should be either 'checkm' or 'genome_info'." """ mkdir ${prefix}-dereplicated From cb6916c60fba04ec291a3181a10e3e3e6fce15da Mon Sep 17 00:00:00 2001 From: Jim Downie <19718667+prototaxites@users.noreply.github.com> Date: Thu, 27 Jul 2023 08:26:55 +0100 Subject: [PATCH 09/19] Update main.nf --- modules/nf-core/galah/main.nf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/nf-core/galah/main.nf b/modules/nf-core/galah/main.nf index af95804bd3f..0eb4a59bc70 100644 --- a/modules/nf-core/galah/main.nf +++ b/modules/nf-core/galah/main.nf @@ -2,10 +2,10 @@ process GALAH { tag "$meta.id" label 'process_medium' - conda "bioconda::galah=0.3.1 bioconda::dashing=0.4.0 bioconda::fastani=1.31" + conda "bioconda::galah=0.3.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/galah:0.3.1--hec16e2b_1': - 'biocontainers/galah:0.3.1--hec16e2b_1' }" + 'https://depot.galaxyproject.org/singularity/galah%3A0.3.1--h031d066_3': + 'biocontainers/galah:0.3.1--h031d066_3' }" input: tuple val(meta), path(bins) From 798e7550d624f9bff0aa8dc6401a67f64717c7b2 Mon Sep 17 00:00:00 2001 From: Jim Downie <19718667+prototaxites@users.noreply.github.com> Date: Thu, 27 Jul 2023 08:48:54 +0100 Subject: [PATCH 10/19] Update test.yml --- tests/modules/nf-core/galah/test.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/modules/nf-core/galah/test.yml b/tests/modules/nf-core/galah/test.yml index 9eec3cc7eb7..9075894f138 100644 --- a/tests/modules/nf-core/galah/test.yml +++ b/tests/modules/nf-core/galah/test.yml @@ -16,8 +16,6 @@ tags: - galah files: - - path: output/bioawk/genomeinfo.gz - md5sum: 7c7c29f7d08468cb67dfb5635aa144db - path: output/bioawk/versions.yml - path: output/galah/test-dereplicated/GCA_002688505.1_ASM268850v1_genomic.fna.gz md5sum: 0747c48f6693a4fb03c7164c2f472326 From 6121ca06ff2a0c0fecdafe0143515e976369fbf1 Mon Sep 17 00:00:00 2001 From: Jim Downie <19718667+prototaxites@users.noreply.github.com> Date: Thu, 3 Aug 2023 14:24:20 +0100 Subject: [PATCH 11/19] Update main.nf --- modules/nf-core/galah/main.nf | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/modules/nf-core/galah/main.nf b/modules/nf-core/galah/main.nf index 0eb4a59bc70..7edd2cb7c27 100644 --- a/modules/nf-core/galah/main.nf +++ b/modules/nf-core/galah/main.nf @@ -23,8 +23,10 @@ process GALAH { script: def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" - def qc_input = qc_table ? ((qc_format == "checkm") ? "--checkm-tab-table ${qc_table}" : "--genome-info ${qc_table}") : "" - if( qc_table && !(qc_format in ["checkm", "genome_info"]) ) error "Invalid qc_format supplied! qc_format should be either 'checkm' or 'genome_info'." + def qc_args = (qc_format == "checkm") ? "--checkm-tab-table ${qc_table}" : "--genome-info ${qc_table}") + def qc_input = qc_table ? qc_args : "" + def valid_qc_format = qc_format in ["checkm", "genome_info"]) + if( qc_table && !valid_qc_format ) error "Invalid qc_format supplied! qc_format should be either 'checkm' or 'genome_info'." """ mkdir ${prefix}-dereplicated From 430c0cbdb37f89b2b898c677371d64d8440c1e5a Mon Sep 17 00:00:00 2001 From: Jim Downie <19718667+prototaxites@users.noreply.github.com> Date: Fri, 4 Aug 2023 10:02:13 +0100 Subject: [PATCH 12/19] Update modules/nf-core/galah/main.nf Co-authored-by: Adam Talbot <12817534+adamrtalbot@users.noreply.github.com> --- modules/nf-core/galah/main.nf | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/modules/nf-core/galah/main.nf b/modules/nf-core/galah/main.nf index 7edd2cb7c27..7038b7b4c4a 100644 --- a/modules/nf-core/galah/main.nf +++ b/modules/nf-core/galah/main.nf @@ -26,7 +26,9 @@ process GALAH { def qc_args = (qc_format == "checkm") ? "--checkm-tab-table ${qc_table}" : "--genome-info ${qc_table}") def qc_input = qc_table ? qc_args : "" def valid_qc_format = qc_format in ["checkm", "genome_info"]) - if( qc_table && !valid_qc_format ) error "Invalid qc_format supplied! qc_format should be either 'checkm' or 'genome_info'." + if( qc_table && !valid_qc_format ) { + error "Invalid qc_format supplied! qc_format should be either 'checkm' or 'genome_info'." + } """ mkdir ${prefix}-dereplicated From 08fed34ec0ece30c6506f02e791c5fc060756743 Mon Sep 17 00:00:00 2001 From: Jim Downie <19718667+prototaxites@users.noreply.github.com> Date: Fri, 4 Aug 2023 10:33:13 +0100 Subject: [PATCH 13/19] Update main.nf --- modules/nf-core/galah/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/nf-core/galah/main.nf b/modules/nf-core/galah/main.nf index 7038b7b4c4a..215f624c830 100644 --- a/modules/nf-core/galah/main.nf +++ b/modules/nf-core/galah/main.nf @@ -25,7 +25,7 @@ process GALAH { prefix = task.ext.prefix ?: "${meta.id}" def qc_args = (qc_format == "checkm") ? "--checkm-tab-table ${qc_table}" : "--genome-info ${qc_table}") def qc_input = qc_table ? qc_args : "" - def valid_qc_format = qc_format in ["checkm", "genome_info"]) + def valid_qc_format = qc_format in ["checkm", "genome_info"] if( qc_table && !valid_qc_format ) { error "Invalid qc_format supplied! qc_format should be either 'checkm' or 'genome_info'." } From 390fe417201546280320450802d7c78a9b4b0aec Mon Sep 17 00:00:00 2001 From: Jim Downie <19718667+prototaxites@users.noreply.github.com> Date: Fri, 4 Aug 2023 10:33:32 +0100 Subject: [PATCH 14/19] Update modules/nf-core/galah/main.nf Co-authored-by: Adam Talbot <12817534+adamrtalbot@users.noreply.github.com> --- modules/nf-core/galah/main.nf | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/modules/nf-core/galah/main.nf b/modules/nf-core/galah/main.nf index 215f624c830..cef43a42c10 100644 --- a/modules/nf-core/galah/main.nf +++ b/modules/nf-core/galah/main.nf @@ -8,9 +8,7 @@ process GALAH { 'biocontainers/galah:0.3.1--h031d066_3' }" input: - tuple val(meta), path(bins) - path(qc_table) - val(qc_format) + tuple val(meta), path(bins), path(qc_table), val(qc_format) output: tuple val(meta), path("*.tsv") , emit: tsv From ed6c688e9a00185fb76c9ac5ec97936a2857052f Mon Sep 17 00:00:00 2001 From: Jim Downie <19718667+prototaxites@users.noreply.github.com> Date: Fri, 4 Aug 2023 12:26:11 +0100 Subject: [PATCH 15/19] Update main.nf --- modules/nf-core/galah/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/nf-core/galah/main.nf b/modules/nf-core/galah/main.nf index cef43a42c10..19721a5d684 100644 --- a/modules/nf-core/galah/main.nf +++ b/modules/nf-core/galah/main.nf @@ -21,7 +21,7 @@ process GALAH { script: def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" - def qc_args = (qc_format == "checkm") ? "--checkm-tab-table ${qc_table}" : "--genome-info ${qc_table}") + def qc_args = (qc_format == "checkm") ? "--checkm-tab-table ${qc_table}" : "--genome-info ${qc_table}" def qc_input = qc_table ? qc_args : "" def valid_qc_format = qc_format in ["checkm", "genome_info"] if( qc_table && !valid_qc_format ) { From daaff42ba7a25c8ea98837a5b1782da6b989f53d Mon Sep 17 00:00:00 2001 From: Jim Downie <19718667+prototaxites@users.noreply.github.com> Date: Mon, 7 Aug 2023 10:05:34 +0100 Subject: [PATCH 16/19] Update test --- tests/modules/nf-core/galah/main.nf | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/tests/modules/nf-core/galah/main.nf b/tests/modules/nf-core/galah/main.nf index d260cc67ed0..2839a7f9452 100644 --- a/tests/modules/nf-core/galah/main.nf +++ b/tests/modules/nf-core/galah/main.nf @@ -9,21 +9,17 @@ workflow test_galah { input = [ [ id:'test' ], // meta map [file("https://github.com/nf-core/test-datasets/raw/magmap/testdata/GCA_002688505.1_ASM268850v1_genomic.fna.gz", checkIfExists: true), - file("https://github.com/nf-core/test-datasets/raw/magmap/testdata/GCF_004296495.1_ASM429649v1_genomic.fna.gz", checkIfExists: true)] + file("https://github.com/nf-core/test-datasets/raw/magmap/testdata/GCF_004296495.1_ASM429649v1_genomic.fna.gz", checkIfExists: true)], + [], + [] ] - GALAH ( input, [], [] ) + GALAH ( input ) } workflow test_galah_genomeinfo { - input = [ - [ id:'test' ], // meta map - [file("https://github.com/nf-core/test-datasets/raw/magmap/testdata/GCA_002688505.1_ASM268850v1_genomic.fna.gz", checkIfExists: true), - file("https://github.com/nf-core/test-datasets/raw/magmap/testdata/GCF_004296495.1_ASM429649v1_genomic.fna.gz", checkIfExists: true)] - ] - genomeinfo = [ [ id: 'genomeinfo' ], file("https://raw.githubusercontent.com/nf-core/test-datasets/magmap/testdata/checkm.lineage_wf.qa_2.tsv", checkIfExists: true) @@ -36,6 +32,16 @@ workflow test_galah_genomeinfo { ch_genomeinfo = GUNZIP.out.gunzip .map { meta, tsv -> [tsv] } - GALAH ( input, ch_genomeinfo, "genome_info") + input = [ + [ id:'test' ], // meta map + [file("https://github.com/nf-core/test-datasets/raw/magmap/testdata/GCA_002688505.1_ASM268850v1_genomic.fna.gz", checkIfExists: true), + file("https://github.com/nf-core/test-datasets/raw/magmap/testdata/GCF_004296495.1_ASM429649v1_genomic.fna.gz", checkIfExists: true)], + ch_genomeinfo, + "genome_info" + ] + + + + GALAH ( input ) } From cc26bc3de28f89333858aee0f19aa34378471901 Mon Sep 17 00:00:00 2001 From: Jim Downie <19718667+prototaxites@users.noreply.github.com> Date: Mon, 7 Aug 2023 10:18:17 +0100 Subject: [PATCH 17/19] Update main.nf --- tests/modules/nf-core/galah/main.nf | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/modules/nf-core/galah/main.nf b/tests/modules/nf-core/galah/main.nf index 2839a7f9452..a5ebdd68952 100644 --- a/tests/modules/nf-core/galah/main.nf +++ b/tests/modules/nf-core/galah/main.nf @@ -35,12 +35,12 @@ workflow test_galah_genomeinfo { input = [ [ id:'test' ], // meta map [file("https://github.com/nf-core/test-datasets/raw/magmap/testdata/GCA_002688505.1_ASM268850v1_genomic.fna.gz", checkIfExists: true), - file("https://github.com/nf-core/test-datasets/raw/magmap/testdata/GCF_004296495.1_ASM429649v1_genomic.fna.gz", checkIfExists: true)], - ch_genomeinfo, - "genome_info" + file("https://github.com/nf-core/test-datasets/raw/magmap/testdata/GCF_004296495.1_ASM429649v1_genomic.fna.gz", checkIfExists: true)] ] - - + .combine(ch_genomeinfo) + .map {meta, bins, qc -> + [ meta, bins, qc, "genome_info" ] + } GALAH ( input ) From 52dcde22da278ba480585ee0482170ab6a8e194a Mon Sep 17 00:00:00 2001 From: Jim Downie <19718667+prototaxites@users.noreply.github.com> Date: Mon, 7 Aug 2023 10:33:46 +0100 Subject: [PATCH 18/19] Update main.nf --- tests/modules/nf-core/galah/main.nf | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/modules/nf-core/galah/main.nf b/tests/modules/nf-core/galah/main.nf index a5ebdd68952..35ac44ec506 100644 --- a/tests/modules/nf-core/galah/main.nf +++ b/tests/modules/nf-core/galah/main.nf @@ -20,10 +20,10 @@ workflow test_galah { workflow test_galah_genomeinfo { - genomeinfo = [ - [ id: 'genomeinfo' ], - file("https://raw.githubusercontent.com/nf-core/test-datasets/magmap/testdata/checkm.lineage_wf.qa_2.tsv", checkIfExists: true) - ] + genomeinfo = Channel.fromPath("https://raw.githubusercontent.com/nf-core/test-datasets/magmap/testdata/checkm.lineage_wf.qa_2.tsv", checkIfExists: true) + .map { file -> + [ [id: "genomeinfo], file ] + } BIOAWK_GENOMEINFO(genomeinfo) From 673e2f04740072c7f8d0eb48cc40f700be1dd9de Mon Sep 17 00:00:00 2001 From: prototaxites Date: Mon, 7 Aug 2023 10:21:36 +0000 Subject: [PATCH 19/19] fix tests --- tests/modules/nf-core/galah/main.nf | 18 ++++++++++-------- tests/modules/nf-core/galah/nextflow.config | 1 + tests/modules/nf-core/galah/test.yml | 3 ++- 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/tests/modules/nf-core/galah/main.nf b/tests/modules/nf-core/galah/main.nf index 35ac44ec506..dba70de9169 100644 --- a/tests/modules/nf-core/galah/main.nf +++ b/tests/modules/nf-core/galah/main.nf @@ -10,7 +10,7 @@ workflow test_galah { [ id:'test' ], // meta map [file("https://github.com/nf-core/test-datasets/raw/magmap/testdata/GCA_002688505.1_ASM268850v1_genomic.fna.gz", checkIfExists: true), file("https://github.com/nf-core/test-datasets/raw/magmap/testdata/GCF_004296495.1_ASM429649v1_genomic.fna.gz", checkIfExists: true)], - [], + [], [] ] @@ -22,21 +22,23 @@ workflow test_galah_genomeinfo { genomeinfo = Channel.fromPath("https://raw.githubusercontent.com/nf-core/test-datasets/magmap/testdata/checkm.lineage_wf.qa_2.tsv", checkIfExists: true) .map { file -> - [ [id: "genomeinfo], file ] + [ [id: "genomeinfo"], file ] } BIOAWK_GENOMEINFO(genomeinfo) GUNZIP(BIOAWK_GENOMEINFO.out.output) - + ch_genomeinfo = GUNZIP.out.gunzip .map { meta, tsv -> [tsv] } - input = [ - [ id:'test' ], // meta map - [file("https://github.com/nf-core/test-datasets/raw/magmap/testdata/GCA_002688505.1_ASM268850v1_genomic.fna.gz", checkIfExists: true), - file("https://github.com/nf-core/test-datasets/raw/magmap/testdata/GCF_004296495.1_ASM429649v1_genomic.fna.gz", checkIfExists: true)] - ] + input = Channel.of( + [ + [ id:'test' ], // meta map + [file("https://github.com/nf-core/test-datasets/raw/magmap/testdata/GCA_002688505.1_ASM268850v1_genomic.fna.gz", checkIfExists: true), + file("https://github.com/nf-core/test-datasets/raw/magmap/testdata/GCF_004296495.1_ASM429649v1_genomic.fna.gz", checkIfExists: true)] + ] + ) .combine(ch_genomeinfo) .map {meta, bins, qc -> [ meta, bins, qc, "genome_info" ] diff --git a/tests/modules/nf-core/galah/nextflow.config b/tests/modules/nf-core/galah/nextflow.config index 8eacddfbc81..8cae6f9910f 100644 --- a/tests/modules/nf-core/galah/nextflow.config +++ b/tests/modules/nf-core/galah/nextflow.config @@ -7,6 +7,7 @@ process { // the file name minus the last extension withName: BIOAWK_GENOMEINFO { ext.args = '\'BEGIN {{FS="\t"; OFS=","}} NR==1 {print "genome","completeness","contamination"} NR>1 {print $1".fna",$6, $7}\'' + ext.prefix = "genome_info.tsv" } } diff --git a/tests/modules/nf-core/galah/test.yml b/tests/modules/nf-core/galah/test.yml index 9075894f138..47f9e900e96 100644 --- a/tests/modules/nf-core/galah/test.yml +++ b/tests/modules/nf-core/galah/test.yml @@ -16,6 +16,7 @@ tags: - galah files: + - path: output/bioawk/genome_info.tsv.gz - path: output/bioawk/versions.yml - path: output/galah/test-dereplicated/GCA_002688505.1_ASM268850v1_genomic.fna.gz md5sum: 0747c48f6693a4fb03c7164c2f472326 @@ -24,6 +25,6 @@ - path: output/galah/test-dereplicated_bins.tsv md5sum: d2f8a621bfa5794467f4fdd759e2bce7 - path: output/galah/versions.yml - - path: output/gunzip/genomeinfo + - path: output/gunzip/genome_info.tsv md5sum: f73b9131ab91ddb754725b94e5085955 - path: output/gunzip/versions.yml