From 0a096b57273f15fe30fa745186ec3253ad94906e Mon Sep 17 00:00:00 2001 From: Chengxin Dai <37200167+daichengxin@users.noreply.github.com> Date: Mon, 13 May 2024 19:13:07 +0800 Subject: [PATCH 1/3] allow skip raw conversion --- bin/diann_convert.py | 24 +++++++++++++----------- modules/local/diannconvert/main.nf | 3 ++- modules/local/pmultiqc/main.nf | 2 +- nextflow.config | 1 + nextflow_schema.json | 7 +++++++ workflows/quantms.nf | 26 +++++++++++++++++++------- 6 files changed, 43 insertions(+), 20 deletions(-) diff --git a/bin/diann_convert.py b/bin/diann_convert.py index fb2bdd22..f98b4a52 100755 --- a/bin/diann_convert.py +++ b/bin/diann_convert.py @@ -39,12 +39,13 @@ def cli(): @click.option("--folder", "-f") @click.option("--exp_design", "-d") @click.option("--diann_version", "-v") +@click.option("--skip_mzTab", "-s") @click.option("--dia_params", "-p") @click.option("--charge", "-c") @click.option("--missed_cleavages", "-m") @click.option("--qvalue_threshold", "-q", type=float) @click.pass_context -def convert(ctx, folder, exp_design, dia_params, diann_version, charge, missed_cleavages, qvalue_threshold): +def convert(ctx, folder, exp_design, dia_params, diann_version, skip_mzTab, charge, missed_cleavages, qvalue_threshold): """ Convert DIA-NN output to MSstats, Triqler or mzTab. The output formats are used for quality control and downstream analysis. @@ -138,16 +139,17 @@ def convert(ctx, folder, exp_design, dia_params, diann_version, charge, missed_c logger.info(f"Triqler input file is saved as {exp_out_prefix}_triqler_in.tsv") del out_triqler - mztab_out = f"{Path(exp_design).stem}_out.mzTab" - # Convert to mzTab - diann_directory.convert_to_mztab( - report=report, - f_table=f_table, - charge=charge, - missed_cleavages=missed_cleavages, - dia_params=dia_params, - out=mztab_out, - ) + if not skip_mzTab: + mztab_out = f"{Path(exp_design).stem}_out.mzTab" + # Convert to mzTab + diann_directory.convert_to_mztab( + report=report, + f_table=f_table, + charge=charge, + missed_cleavages=missed_cleavages, + dia_params=dia_params, + out=mztab_out, + ) def _true_stem(x): diff --git a/modules/local/diannconvert/main.nf b/modules/local/diannconvert/main.nf index 23524138..9bed9fe8 100644 --- a/modules/local/diannconvert/main.nf +++ b/modules/local/diannconvert/main.nf @@ -22,7 +22,7 @@ process DIANNCONVERT { output: path "*msstats_in.csv", emit: out_msstats path "*triqler_in.tsv", emit: out_triqler - path "*.mzTab", emit: out_mztab + path "*.mzTab", emit: out_mztab optional true path "*.log", emit: log path "versions.yml", emit: version @@ -39,6 +39,7 @@ process DIANNCONVERT { --folder ./ \\ --exp_design ${exp_design} \\ --diann_version ./version/versions.yml \\ + --skip_mzTab $params.skip_raw_conversion \\ --dia_params "${dia_params}" \\ --charge $params.max_precursor_charge \\ --missed_cleavages $params.allowed_missed_cleavages \\ diff --git a/modules/local/pmultiqc/main.nf b/modules/local/pmultiqc/main.nf index 8b6ad192..aab63830 100644 --- a/modules/local/pmultiqc/main.nf +++ b/modules/local/pmultiqc/main.nf @@ -21,7 +21,7 @@ process PMULTIQC { script: def args = task.ext.args ?: '' - def disable_pmultiqc = (params.enable_pmultiqc) && (params.export_mztab) ? "" : "--disable_plugin" + def disable_pmultiqc = (params.enable_pmultiqc) && (params.export_mztab) && (!params.skip_raw_conversion) ? "" : "--disable_plugin" def disable_table_plots = (params.enable_pmultiqc) && (params.skip_table_plots) ? "--disable_table" : "" def disable_idxml_index = (params.enable_pmultiqc) && (params.pmultiqc_idxml_skip) ? "--ignored_idxml" : "" diff --git a/nextflow.config b/nextflow.config index 22303377..a99a9820 100644 --- a/nextflow.config +++ b/nextflow.config @@ -189,6 +189,7 @@ params { species_genes = false diann_normalize = true diann_speclib = null + skip_raw_conversion = false // DIA-NN: Extras skip_preliminary_analysis = false diff --git a/nextflow_schema.json b/nextflow_schema.json index 746e312c..210c9f8e 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -175,6 +175,13 @@ "description": "Force initial re-indexing of input mzML files. Also fixes some common mistakes in slightly incomplete/outdated mzMLs. (Default: true for safety)", "fa_icon": "far fa-check-square", "help_text": "Force re-indexing in the beginning of the pipeline to make sure that indices are up-to-date and to avoid redundant indexing on-demand in steps that require an index (e.g., Comet)." + }, + "skip_raw_conversion": { + "type": "boolean", + "default": false, + "description": "Convert RAW files to mzML for DIA-NN workflow", + "fa_icon": "far fa-check-square", + "help_text": "Whether to convert RAW files to .mzML for DIA-NN workflow" } }, "fa_icon": "far fa-chart-bar" diff --git a/workflows/quantms.nf b/workflows/quantms.nf index bd620408..9ba8ba7a 100644 --- a/workflows/quantms.nf +++ b/workflows/quantms.nf @@ -61,13 +61,23 @@ workflow QUANTMS { // // SUBWORKFLOW: File preparation // - FILE_PREPARATION ( - CREATE_INPUT_CHANNEL.out.ch_meta_config_iso.mix(CREATE_INPUT_CHANNEL.out.ch_meta_config_lfq).mix(CREATE_INPUT_CHANNEL.out.ch_meta_config_dia) - ) + statistics = Channel.empty() + ch_raw_input = Channel.empty() + if (params.skip_raw_conversion == false) { + FILE_PREPARATION ( + CREATE_INPUT_CHANNEL.out.ch_meta_config_iso.mix(CREATE_INPUT_CHANNEL.out.ch_meta_config_lfq).mix(CREATE_INPUT_CHANNEL.out.ch_meta_config_dia) + ) + ch_versions = ch_versions.mix(FILE_PREPARATION.out.version.ifEmpty(null)) + statistics = FILE_PREPARATION.out.statistics + } else { + ch_raw_input = CREATE_INPUT_CHANNEL.out.ch_meta_config_dia + if (ch_raw_input.ifEmpty(true)) { + exit 1, 'Allows skipping of raw data conversions in the DIA workflow only!' + } + } - ch_versions = ch_versions.mix(FILE_PREPARATION.out.version.ifEmpty(null)) - FILE_PREPARATION.out.results + ch_raw_input.mix(FILE_PREPARATION.out.results) .branch { dia: it[0].acquisition_method.contains("dia") iso: it[0].labelling_type.contains("tmt") || it[0].labelling_type.contains("itraq") @@ -131,7 +141,7 @@ workflow QUANTMS { ch_msstats_in = ch_msstats_in.mix(LFQ.out.msstats_in) ch_versions = ch_versions.mix(LFQ.out.versions.ifEmpty(null)) - DIA(ch_fileprep_result.dia, CREATE_INPUT_CHANNEL.out.ch_expdesign, FILE_PREPARATION.out.statistics) + DIA(ch_fileprep_result.dia, CREATE_INPUT_CHANNEL.out.ch_expdesign, statistics) ch_pipeline_results = ch_pipeline_results.mix(DIA.out.diann_report) ch_msstats_in = ch_msstats_in.mix(DIA.out.msstats_in) ch_versions = ch_versions.mix(DIA.out.versions.ifEmpty(null)) @@ -161,7 +171,9 @@ workflow QUANTMS { ch_methods_description = Channel.value(methodsDescriptionText(ch_multiqc_custom_methods_description)) ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_config) ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(FILE_PREPARATION.out.statistics) + if (!params.skip_raw_conversion) { + ch_multiqc_files = ch_multiqc_files.mix(FILE_PREPARATION.out.statistics) + } ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml', sort: false)) ch_multiqc_quantms_logo = file("$projectDir/assets/nf-core-quantms_logo_light.png") From cf24916f5022516fd47ce9e330778bff58bc64c6 Mon Sep 17 00:00:00 2001 From: Chengxin Dai <37200167+daichengxin@users.noreply.github.com> Date: Wed, 15 May 2024 11:02:16 +0800 Subject: [PATCH 2/3] Revert "allow skip raw conversion" This reverts commit 0a096b57273f15fe30fa745186ec3253ad94906e. --- bin/diann_convert.py | 24 +++++++++++------------- modules/local/diannconvert/main.nf | 3 +-- modules/local/pmultiqc/main.nf | 2 +- nextflow.config | 1 - nextflow_schema.json | 7 ------- workflows/quantms.nf | 26 +++++++------------------- 6 files changed, 20 insertions(+), 43 deletions(-) diff --git a/bin/diann_convert.py b/bin/diann_convert.py index f98b4a52..fb2bdd22 100755 --- a/bin/diann_convert.py +++ b/bin/diann_convert.py @@ -39,13 +39,12 @@ def cli(): @click.option("--folder", "-f") @click.option("--exp_design", "-d") @click.option("--diann_version", "-v") -@click.option("--skip_mzTab", "-s") @click.option("--dia_params", "-p") @click.option("--charge", "-c") @click.option("--missed_cleavages", "-m") @click.option("--qvalue_threshold", "-q", type=float) @click.pass_context -def convert(ctx, folder, exp_design, dia_params, diann_version, skip_mzTab, charge, missed_cleavages, qvalue_threshold): +def convert(ctx, folder, exp_design, dia_params, diann_version, charge, missed_cleavages, qvalue_threshold): """ Convert DIA-NN output to MSstats, Triqler or mzTab. The output formats are used for quality control and downstream analysis. @@ -139,17 +138,16 @@ def convert(ctx, folder, exp_design, dia_params, diann_version, skip_mzTab, char logger.info(f"Triqler input file is saved as {exp_out_prefix}_triqler_in.tsv") del out_triqler - if not skip_mzTab: - mztab_out = f"{Path(exp_design).stem}_out.mzTab" - # Convert to mzTab - diann_directory.convert_to_mztab( - report=report, - f_table=f_table, - charge=charge, - missed_cleavages=missed_cleavages, - dia_params=dia_params, - out=mztab_out, - ) + mztab_out = f"{Path(exp_design).stem}_out.mzTab" + # Convert to mzTab + diann_directory.convert_to_mztab( + report=report, + f_table=f_table, + charge=charge, + missed_cleavages=missed_cleavages, + dia_params=dia_params, + out=mztab_out, + ) def _true_stem(x): diff --git a/modules/local/diannconvert/main.nf b/modules/local/diannconvert/main.nf index 9bed9fe8..23524138 100644 --- a/modules/local/diannconvert/main.nf +++ b/modules/local/diannconvert/main.nf @@ -22,7 +22,7 @@ process DIANNCONVERT { output: path "*msstats_in.csv", emit: out_msstats path "*triqler_in.tsv", emit: out_triqler - path "*.mzTab", emit: out_mztab optional true + path "*.mzTab", emit: out_mztab path "*.log", emit: log path "versions.yml", emit: version @@ -39,7 +39,6 @@ process DIANNCONVERT { --folder ./ \\ --exp_design ${exp_design} \\ --diann_version ./version/versions.yml \\ - --skip_mzTab $params.skip_raw_conversion \\ --dia_params "${dia_params}" \\ --charge $params.max_precursor_charge \\ --missed_cleavages $params.allowed_missed_cleavages \\ diff --git a/modules/local/pmultiqc/main.nf b/modules/local/pmultiqc/main.nf index aab63830..8b6ad192 100644 --- a/modules/local/pmultiqc/main.nf +++ b/modules/local/pmultiqc/main.nf @@ -21,7 +21,7 @@ process PMULTIQC { script: def args = task.ext.args ?: '' - def disable_pmultiqc = (params.enable_pmultiqc) && (params.export_mztab) && (!params.skip_raw_conversion) ? "" : "--disable_plugin" + def disable_pmultiqc = (params.enable_pmultiqc) && (params.export_mztab) ? "" : "--disable_plugin" def disable_table_plots = (params.enable_pmultiqc) && (params.skip_table_plots) ? "--disable_table" : "" def disable_idxml_index = (params.enable_pmultiqc) && (params.pmultiqc_idxml_skip) ? "--ignored_idxml" : "" diff --git a/nextflow.config b/nextflow.config index a99a9820..22303377 100644 --- a/nextflow.config +++ b/nextflow.config @@ -189,7 +189,6 @@ params { species_genes = false diann_normalize = true diann_speclib = null - skip_raw_conversion = false // DIA-NN: Extras skip_preliminary_analysis = false diff --git a/nextflow_schema.json b/nextflow_schema.json index 210c9f8e..746e312c 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -175,13 +175,6 @@ "description": "Force initial re-indexing of input mzML files. Also fixes some common mistakes in slightly incomplete/outdated mzMLs. (Default: true for safety)", "fa_icon": "far fa-check-square", "help_text": "Force re-indexing in the beginning of the pipeline to make sure that indices are up-to-date and to avoid redundant indexing on-demand in steps that require an index (e.g., Comet)." - }, - "skip_raw_conversion": { - "type": "boolean", - "default": false, - "description": "Convert RAW files to mzML for DIA-NN workflow", - "fa_icon": "far fa-check-square", - "help_text": "Whether to convert RAW files to .mzML for DIA-NN workflow" } }, "fa_icon": "far fa-chart-bar" diff --git a/workflows/quantms.nf b/workflows/quantms.nf index 9ba8ba7a..bd620408 100644 --- a/workflows/quantms.nf +++ b/workflows/quantms.nf @@ -61,23 +61,13 @@ workflow QUANTMS { // // SUBWORKFLOW: File preparation // - statistics = Channel.empty() - ch_raw_input = Channel.empty() - if (params.skip_raw_conversion == false) { - FILE_PREPARATION ( - CREATE_INPUT_CHANNEL.out.ch_meta_config_iso.mix(CREATE_INPUT_CHANNEL.out.ch_meta_config_lfq).mix(CREATE_INPUT_CHANNEL.out.ch_meta_config_dia) - ) - ch_versions = ch_versions.mix(FILE_PREPARATION.out.version.ifEmpty(null)) - statistics = FILE_PREPARATION.out.statistics - } else { - ch_raw_input = CREATE_INPUT_CHANNEL.out.ch_meta_config_dia - if (ch_raw_input.ifEmpty(true)) { - exit 1, 'Allows skipping of raw data conversions in the DIA workflow only!' - } - } + FILE_PREPARATION ( + CREATE_INPUT_CHANNEL.out.ch_meta_config_iso.mix(CREATE_INPUT_CHANNEL.out.ch_meta_config_lfq).mix(CREATE_INPUT_CHANNEL.out.ch_meta_config_dia) + ) + ch_versions = ch_versions.mix(FILE_PREPARATION.out.version.ifEmpty(null)) - ch_raw_input.mix(FILE_PREPARATION.out.results) + FILE_PREPARATION.out.results .branch { dia: it[0].acquisition_method.contains("dia") iso: it[0].labelling_type.contains("tmt") || it[0].labelling_type.contains("itraq") @@ -141,7 +131,7 @@ workflow QUANTMS { ch_msstats_in = ch_msstats_in.mix(LFQ.out.msstats_in) ch_versions = ch_versions.mix(LFQ.out.versions.ifEmpty(null)) - DIA(ch_fileprep_result.dia, CREATE_INPUT_CHANNEL.out.ch_expdesign, statistics) + DIA(ch_fileprep_result.dia, CREATE_INPUT_CHANNEL.out.ch_expdesign, FILE_PREPARATION.out.statistics) ch_pipeline_results = ch_pipeline_results.mix(DIA.out.diann_report) ch_msstats_in = ch_msstats_in.mix(DIA.out.msstats_in) ch_versions = ch_versions.mix(DIA.out.versions.ifEmpty(null)) @@ -171,9 +161,7 @@ workflow QUANTMS { ch_methods_description = Channel.value(methodsDescriptionText(ch_multiqc_custom_methods_description)) ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_config) ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - if (!params.skip_raw_conversion) { - ch_multiqc_files = ch_multiqc_files.mix(FILE_PREPARATION.out.statistics) - } + ch_multiqc_files = ch_multiqc_files.mix(FILE_PREPARATION.out.statistics) ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml', sort: false)) ch_multiqc_quantms_logo = file("$projectDir/assets/nf-core-quantms_logo_light.png") From cbb8edd969a3656d7b5fed63b1308442fc012a7d Mon Sep 17 00:00:00 2001 From: Chengxin Dai <37200167+daichengxin@users.noreply.github.com> Date: Wed, 15 May 2024 11:04:45 +0800 Subject: [PATCH 3/3] add msgfdb_indexing --- .../openms/thirdparty/msgfdb_indexing/main.nf | 36 +++++++++++++++++++ .../thirdparty/msgfdb_indexing/meta.yml | 32 +++++++++++++++++ .../thirdparty/searchenginemsgf/main.nf | 2 +- subworkflows/local/databasesearchengines.nf | 4 ++- 4 files changed, 72 insertions(+), 2 deletions(-) create mode 100644 modules/local/openms/thirdparty/msgfdb_indexing/main.nf create mode 100644 modules/local/openms/thirdparty/msgfdb_indexing/meta.yml diff --git a/modules/local/openms/thirdparty/msgfdb_indexing/main.nf b/modules/local/openms/thirdparty/msgfdb_indexing/main.nf new file mode 100644 index 00000000..c9e206ec --- /dev/null +++ b/modules/local/openms/thirdparty/msgfdb_indexing/main.nf @@ -0,0 +1,36 @@ +process MSGFDBINDEXING { + tag "$database.baseName" + label 'process_low' + label 'openms' + + conda "bioconda::openms-thirdparty=3.1.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/openms-thirdparty:3.1.0--h9ee0642_1' : + 'biocontainers/openms-thirdparty:3.1.0--h9ee0642_1' }" + + input: + path(database) + + output: + tuple path("${database.baseName}.cnlcp"), path("${database.baseName}.canno"), path("${database.baseName}.csarr"), path("${database.baseName}.cseq"), emit: msgfdb_idx + path "versions.yml", emit: version + path "*.log", emit: log + + script: + def args = task.ext.args ?: '' + + """ + msgf_plus edu.ucsd.msjava.msdbsearch.BuildSA \\ + -d ${database} \\ + -o ./ \\ + -tda 0 \\ + -debug $params.db_debug \\ + $args \\ + 2>&1 | tee ${database.baseName}_msgfdb_idx.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + msgf_plus: \$(msgf_plus 2>&1 | grep -E '^MS-GF\\+ Release.*') + END_VERSIONS + """ +} diff --git a/modules/local/openms/thirdparty/msgfdb_indexing/meta.yml b/modules/local/openms/thirdparty/msgfdb_indexing/meta.yml new file mode 100644 index 00000000..8919e89e --- /dev/null +++ b/modules/local/openms/thirdparty/msgfdb_indexing/meta.yml @@ -0,0 +1,32 @@ +name: msgfdb_indexing +description: Indexing the database prior to the database search for MS-GF+. +keywords: + - Indexing + - database + - msgf +tools: + - msgf_plus: + description: | + The MS-GF+ protein identification (database search) engine. + homepage: https://abibuilder.cs.uni-tuebingen.de/archive/openms/Documentation/nightly/html/TOPP_MSGFPlusAdapter.html + documentation: https://abibuilder.cs.uni-tuebingen.de/archive/openms/Documentation/nightly/html/TOPP_MSGFPlusAdapter.html +input: + - database: + type: file + description: fasta file + pattern: "*.{fasta,fa}" +output: + - msgfdb_idx: + type: file + description: Database indexing files + pattern: "*.{cnlcp,canno,csarr,cseq}" + - log: + type: file + description: log file + pattern: "*.log" + - version: + type: file + description: File containing software version + pattern: "versions.yml" +authors: + - "@daichengxin" diff --git a/modules/local/openms/thirdparty/searchenginemsgf/main.nf b/modules/local/openms/thirdparty/searchenginemsgf/main.nf index b76b1106..4735b043 100644 --- a/modules/local/openms/thirdparty/searchenginemsgf/main.nf +++ b/modules/local/openms/thirdparty/searchenginemsgf/main.nf @@ -9,7 +9,7 @@ process SEARCHENGINEMSGF { 'biocontainers/openms-thirdparty:3.1.0--h9ee0642_1' }" input: - tuple val(meta), path(mzml_file), path(database) + tuple val(meta), path(mzml_file), path(database), path(cnlcp), path(canno), path(csarr), path(cseq) output: tuple val(meta), path("${mzml_file.baseName}_msgf.idXML"), emit: id_files_msgf diff --git a/subworkflows/local/databasesearchengines.nf b/subworkflows/local/databasesearchengines.nf index e0d62bfc..0c1209c6 100644 --- a/subworkflows/local/databasesearchengines.nf +++ b/subworkflows/local/databasesearchengines.nf @@ -5,6 +5,7 @@ include { SEARCHENGINEMSGF } from '../../modules/local/openms/thirdparty/searchenginemsgf/main' include { SEARCHENGINECOMET } from '../../modules/local/openms/thirdparty/searchenginecomet/main' include { SEARCHENGINESAGE } from '../../modules/local/openms/thirdparty/searchenginesage/main' +include { MSGFDBINDEXING } from '../../modules/local/openms/thirdparty/msgfdb_indexing/main' workflow DATABASESEARCHENGINES { take: @@ -15,7 +16,8 @@ workflow DATABASESEARCHENGINES { (ch_id_msgf, ch_id_comet, ch_id_sage, ch_versions) = [ Channel.empty(), Channel.empty(), Channel.empty(), Channel.empty() ] if (params.search_engines.contains("msgf")) { - SEARCHENGINEMSGF(ch_mzmls_search.combine(ch_searchengine_in_db)) + MSGFDBINDEXING(ch_searchengine_in_db) + SEARCHENGINEMSGF(ch_mzmls_search.combine(ch_searchengine_in_db).combine(MSGFDBINDEXING.out.msgfdb_idx)) ch_versions = ch_versions.mix(SEARCHENGINEMSGF.out.version) ch_id_msgf = ch_id_msgf.mix(SEARCHENGINEMSGF.out.id_files_msgf) }