From d6213c4f749f848d129de6ddf06bd77cd7c14ec8 Mon Sep 17 00:00:00 2001 From: Gisela Gabernet Date: Mon, 23 Oct 2023 16:56:38 -0400 Subject: [PATCH 1/4] use cached reference data by default --- CHANGELOG.md | 3 ++- conf/test_fetchimgt.config | 1 + main.nf | 2 +- nextflow.config | 5 +++-- nextflow_schema.json | 11 ++++++++--- subworkflows/local/vdj_annotation.nf | 8 ++++---- 6 files changed, 19 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 702d7d32..a83a325e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -225,7 +225,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - [#114](https://github.com/nf-core/bcellmagic/pull/114): Added Bcellmagic html report. - [#114](https://github.com/nf-core/bcellmagic/pull/114): Improved documentation on amplicon protocol support. - [#115](https://github.com/nf-core/bcellmagic/pull/115): Improved output file structure and documentation. -- [#124](https://github.com/nf-core/bcellmagic/pull/124): Template update to nf-core tools v2.0.1 +- [#124](https://github.com/nf-core/bcellmagic/pull/124): Template update to nf-core tools v2.0.1. ### `Fixed` @@ -236,6 +236,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - [#104](https://github.com/nf-core/bcellmagic/pull/104): Fix bug in pairseq barcode copy before consensus. - [#114](https://github.com/nf-core/bcellmagic/pull/114): Analysis not restricted to Ig heavy chains. - [#123](https://github.com/nf-core/bcellmagic/pull/123): Fix report Rmarkdown reading for running on AWS. +- Use cached IMGT and IgBlast reference data by default. ### `Dependencies` diff --git a/conf/test_fetchimgt.config b/conf/test_fetchimgt.config index 81500d7f..e223d687 100644 --- a/conf/test_fetchimgt.config +++ b/conf/test_fetchimgt.config @@ -23,6 +23,7 @@ params { input = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-bcr/Metadata_test_airr.tsv' cprimers = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-bcr/C_primers.fasta' vprimers = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/testdata-bcr/V_primers.fasta' + fetch_imgt = true mode = 'fastq' diff --git a/main.nf b/main.nf index d5ad4496..24de2277 100644 --- a/main.nf +++ b/main.nf @@ -30,7 +30,7 @@ if (params.help) { // Validate input parameters if (params.validate_params) { - validateParameters() + validateParameters() } WorkflowMain.initialise(workflow, params, log) diff --git a/nextflow.config b/nextflow.config index 536e9d68..1b772655 100644 --- a/nextflow.config +++ b/nextflow.config @@ -60,8 +60,9 @@ params { // ----------------------- productive_only = true reassign = true - igblast_base = null - imgtdb_base = null + igblast_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/igblast_base.zip' + imgtdb_base = 'https://raw.githubusercontent.com/nf-core/test-datasets/airrflow/database-cache/imgtdb_base.zip' + fetch_imgt = false save_databases = true // ----------------------- diff --git a/nextflow_schema.json b/nextflow_schema.json index 1468cdf1..7117deb4 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -45,7 +45,7 @@ }, "miairr": { "type": "string", - "default": "bcellmagic/assets/reveal/mapping_MiAIRR_BioSample_v1.3.1.tsv", + "default": "airrflow/assets/reveal/mapping_MiAIRR_BioSample_v1.3.1.tsv", "description": "Path to MiAIRR-BioSample mapping", "fa_icon": "fas fa-table" } @@ -286,14 +286,19 @@ "imgtdb_base": { "type": "string", "description": "Path to the cached IMGT database.", - "help_text": "If it is not provided, the database will be newly downloaded.", + "help_text": "By default, we provide a pre-downloaded version of the IMGT database. It is also possible to provide a custom IMGT reference database. To fetch a fresh version of IMGT, set the `--fetch_imgt` parameter instead.", "fa_icon": "fas fa-database" }, "igblast_base": { "type": "string", "description": "Path to the cached igblast database.", - "help_text": "If it is not provided, the database will be newly downloaded.", + "help_text": "By default, we provide a pre-downloaded version of the IMGT database. It is also possible to provide a custom IMGT reference database. To fetch a fresh version of IMGT, set the `--fetch_imgt` parameter instead.", "fa_icon": "fas fa-database" + }, + "fetch_imgt": { + "type": "boolean", + "description": "Set this flag to fetch the IMGT reference data at runtime.", + "fa_icon": "fas fa-cloud-download-alt" } }, "fa_icon": "fas fa-edit" diff --git a/subworkflows/local/vdj_annotation.nf b/subworkflows/local/vdj_annotation.nf index 31ed0fd6..d3f14711 100644 --- a/subworkflows/local/vdj_annotation.nf +++ b/subworkflows/local/vdj_annotation.nf @@ -25,7 +25,7 @@ workflow VDJ_ANNOTATION { // TODO: this can take a long time, and the progress shows 0%. Would be // nice to have some better progress reporting. // And maybe run this as 2 separate steps, one for IMGT and one for IgBLAST? - if( params.igblast_base ){ + if( !params.fetch_imgt ){ if (params.igblast_base.endsWith(".zip")) { Channel.fromPath("${params.igblast_base}") .ifEmpty{ error "IGBLAST DB not found: ${params.igblast_base}" } @@ -40,7 +40,7 @@ workflow VDJ_ANNOTATION { } } - if( params.imgtdb_base ){ + if( !params.fetch_imgt ){ if (params.imgtdb_base.endsWith(".zip")) { Channel.fromPath("${params.imgtdb_base}") .ifEmpty{ error "IMGTDB not found: ${params.imgtdb_base}" } @@ -50,12 +50,12 @@ workflow VDJ_ANNOTATION { ch_versions = ch_versions.mix(UNZIP_IMGT.out.versions.ifEmpty(null)) } else { Channel.fromPath("${params.imgtdb_base}") - .ifEmpty { error "IMGTDB not found: ${params.imgtdb_base}" } + .ifEmpty { error "IMGT DB not found: ${params.imgtdb_base}" } .set { ch_imgt } } } - if (!params.igblast_base | !params.imgtdb_base) { + if (params.fetch_imgt) { FETCH_DATABASES() ch_igblast = FETCH_DATABASES.out.igblast ch_imgt = FETCH_DATABASES.out.imgt From 4cd4a2edb3a50c40d2fcff6efa56e21eddddf567 Mon Sep 17 00:00:00 2001 From: Gisela Gabernet Date: Mon, 23 Oct 2023 16:58:35 -0400 Subject: [PATCH 2/4] update changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a83a325e..030ac0b0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -236,7 +236,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - [#104](https://github.com/nf-core/bcellmagic/pull/104): Fix bug in pairseq barcode copy before consensus. - [#114](https://github.com/nf-core/bcellmagic/pull/114): Analysis not restricted to Ig heavy chains. - [#123](https://github.com/nf-core/bcellmagic/pull/123): Fix report Rmarkdown reading for running on AWS. -- Use cached IMGT and IgBlast reference data by default. +- [#284](https://github.com/nf-core/bcellmagic/pull/284): Use cached IMGT and IgBlast reference data by default. ### `Dependencies` From 7da8d0b0fb1585c78b6067da182e341b9630ac2e Mon Sep 17 00:00:00 2001 From: Gisela Gabernet Date: Mon, 23 Oct 2023 17:14:17 -0400 Subject: [PATCH 3/4] remove unused igblast module --- modules/local/igblast/igblast_assigngenes.nf | 65 -------------------- subworkflows/local/vdj_annotation.nf | 1 - 2 files changed, 66 deletions(-) delete mode 100644 modules/local/igblast/igblast_assigngenes.nf diff --git a/modules/local/igblast/igblast_assigngenes.nf b/modules/local/igblast/igblast_assigngenes.nf deleted file mode 100644 index b4ea2c35..00000000 --- a/modules/local/igblast/igblast_assigngenes.nf +++ /dev/null @@ -1,65 +0,0 @@ -process IGBLAST_ASSIGNGENES { - tag "$meta.id" - label 'process_low' - label 'immcantation' - - conda "bioconda::changeo=1.3.0 bioconda::igblast=1.19.0 conda-forge::wget=1.20.1" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-7d8e418eb73acc6a80daea8e111c94cf19a4ecfd:00534555924705cdf2f7ac48b4b8b4083527ca58-1' : - 'biocontainers/mulled-v2-7d8e418eb73acc6a80daea8e111c94cf19a4ecfd:00534555924705cdf2f7ac48b4b8b4083527ca58-1' }" - - input: - tuple val(meta), path(reads) // reads in fasta format - path(igblast) // igblast fasta - - output: - tuple val(meta), path("*db-pass.tsv"), emit: tab - path "versions.yml" , emit: versions - path("*_command_log.txt"), emit: logs //process logs - path("*_makedb_command_log.txt"), emit: makedb_log - - script: - def args = task.ext.args ?: '' - """ - igblastn \ - -germline_db_V igblast_base/database/imgt_${meta.species}_${meta.locus.toLowerCase()}_v \ - -germline_db_D igblast_base/database/imgt_${meta.species}_${meta.locus.toLowerCase()}_d \ - -germline_db_J igblast_base/database/imgt_${meta.species}_${meta.locus.toLowerCase()}_j \ - -auxiliary_data igblast_base/optional_file/${meta.species}_gl.aux \ - -organism ${meta.species} \ - $args \ - -query $reads \ - -out ${meta.id}_db-pass.tsv - - echo "START> AssignGenes" > ${meta.id}_changeo_assigngenes_command_log.txt - echo "COMMAND> igblast" >> ${meta.id}_changeo_assigngenes_command_log.txt - echo "VERSION> \$( igblastn -version | grep -o "igblast[0-9\\. ]\\+" | grep -o "[0-9\\. ]\\+" )" >> ${meta.id}_changeo_assigngenes_command_log.txt - echo "FILE> ${reads}" >> ${meta.id}_changeo_assigngenes_command_log.txt - echo "ORGANISM> ${meta.species}" >> ${meta.id}_changeo_assigngenes_command_log.txt - echo "LOCI> ${meta.locus.toLowerCase()}" >> ${meta.id}_changeo_assigngenes_command_log.txt - echo "NPROC> ${task.cpus}\n" >> ${meta.id}_changeo_assigngenes_command_log.txt - echo "PROGRESS> ...Done \n" >> ${meta.id}_changeo_assigngenes_command_log.txt - echo "PASS> \$(tail -n +2 ${meta.id}_db-pass.tsv | wc -l )" >> ${meta.id}_changeo_assigngenes_command_log.txt - echo "OUTPUT> ${meta.id}_igblast.fmt7" >> ${meta.id}_changeo_assigngenes_command_log.txt - echo "END> AssignGenes" >> ${meta.id}_changeo_assigngenes_command_log.txt - - echo "START> MakeDB" > ${meta.id}_makedb_command_log.txt - echo "COMMAND> igblast" >> ${meta.id}_makedb_command_log.txt - echo "ALIGNER_FILE> ${meta.id}_igblast.fmt7" >> ${meta.id}_makedb_command_log.txt - echo "SEQ_FILE> ${reads}" >> ${meta.id}_makedb_command_log.txt - echo "ASIS_ID> False" >> ${meta.id}_makedb_command_log.txt - echo "ASIS_CALLS> False" >> ${meta.id}_makedb_command_log.txt - echo "VALIDATE> strict" >> ${meta.id}_makedb_command_log.txt - echo "EXTENDED> True" >> ${meta.id}_makedb_command_log.txt - echo "INFER_JUNCTION> False\n" >> ${meta.id}_makedb_command_log.txt - echo "PROGRESS> ...\n" >> ${meta.id}_makedb_command_log.txt - echo "PROGRESS> ... Done\n" >> ${meta.id}_makedb_command_log.txt - echo "OUTPUT> ${meta.id}_db-pass.tsv" >> ${meta.id}_makedb_command_log.txt - echo "PASS> \$(tail -n +2 ${meta.id}_db-pass.tsv | wc -l )" >> ${meta.id}_makedb_command_log.txt - echo "FAIL> 0" >> ${meta.id}_makedb_command_log.txt - echo "END> MakeDB" >> ${meta.id}_makedb_command_log.txt - - echo "\"${task.process}\":" > versions.yml - echo " igblastn: \$( igblastn -version | grep -o "igblast[0-9\\. ]\\+" | grep -o "[0-9\\. ]\\+" )" >> versions.yml - """ -} diff --git a/subworkflows/local/vdj_annotation.nf b/subworkflows/local/vdj_annotation.nf index d3f14711..d30375c6 100644 --- a/subworkflows/local/vdj_annotation.nf +++ b/subworkflows/local/vdj_annotation.nf @@ -4,7 +4,6 @@ include { UNZIP_DB as UNZIP_IMGT } from '../../modules/local/unzip_db' include { CHANGEO_ASSIGNGENES } from '../../modules/local/changeo/changeo_assigngenes' include { CHANGEO_MAKEDB } from '../../modules/local/changeo/changeo_makedb' include { CHANGEO_PARSEDB_SPLIT } from '../../modules/local/changeo/changeo_parsedb_split' -include { IGBLAST_ASSIGNGENES } from '../../modules/local/igblast/igblast_assigngenes' // reveal include { FILTER_QUALITY } from '../../modules/local/reveal/filter_quality' include { FILTER_JUNCTION_MOD3 } from '../../modules/local/reveal/filter_junction_mod3' From f695dd850f92ca69f34262faf64df33a80a2555f Mon Sep 17 00:00:00 2001 From: Gisela Gabernet Date: Mon, 23 Oct 2023 17:41:03 -0400 Subject: [PATCH 4/4] CI with immcantation container only PR to dev --- .github/workflows/ci_immcantation.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/ci_immcantation.yml b/.github/workflows/ci_immcantation.yml index b2b1dff9..d74bb5f9 100644 --- a/.github/workflows/ci_immcantation.yml +++ b/.github/workflows/ci_immcantation.yml @@ -1,9 +1,6 @@ name: nf-core CI immcantation # This workflow runs the pipeline with the minimal test dataset to check that it completes without any syntax errors on: - push: - branches: - - dev pull_request: branches-ignore: - "master"