From 622be5e323d630dc45df02ccdc4d53cf298c7058 Mon Sep 17 00:00:00 2001 From: Michal-Babins Date: Mon, 7 Oct 2024 16:42:14 +0000 Subject: [PATCH 01/13] Initial commit for TheiaEuk ONT --- .dockstore.yml | 5 ++ workflows/theiaeuk/wf_theiaeuk_ont.wdl | 108 ++++++++++++++++++++++++ workflows/utilities/wf_merlin_magic.wdl | 56 ++++++++---- 3 files changed, 153 insertions(+), 16 deletions(-) create mode 100644 workflows/theiaeuk/wf_theiaeuk_ont.wdl diff --git a/.dockstore.yml b/.dockstore.yml index 5306d30ed..21f1b57ce 100644 --- a/.dockstore.yml +++ b/.dockstore.yml @@ -5,6 +5,11 @@ workflows: primaryDescriptorPath: /workflows/theiaeuk/wf_theiaeuk_illumina_pe.wdl testParameterFiles: - /tests/inputs/empty.json + - name: TheiaEuk_ONT_PHB + subclass: WDL + primaryDescriptorPath: /workflows/theiaeuk/wf_theiaeuk_ont.wdl + testParameterFiles: + - /tests/inputs/empty.json - name: Cauris_CladeTyper_PHB subclass: WDL primaryDescriptorPath: /workflows/theiaeuk/wf_cauris_cladetyper.wdl diff --git a/workflows/theiaeuk/wf_theiaeuk_ont.wdl b/workflows/theiaeuk/wf_theiaeuk_ont.wdl new file mode 100644 index 000000000..2d9118962 --- /dev/null +++ b/workflows/theiaeuk/wf_theiaeuk_ont.wdl @@ -0,0 +1,108 @@ +version 1.0 + +import "../utilities/wf_read_QC_trim_ont.wdl" as read_qc +import "../../tasks/assembly/task_dragonflye.wdl" as dragonflye +import "../../tasks/taxon_id/task_gambit.wdl" as gambit +import "../utilities/wf_merlin_magic.wdl" as merlin_magic_workflow + +workflow theiaeuk_ont { + input { + File read1 + String samplename + Int genome_length = 50000000 + String workflow_series = "theiaeuk" + String? assembler + String? assembler_options + Int dragonflye_cpu = 8 + Int dragonflye_memory = 32 + Int dragonflye_disk_size = 100 + String medaka_model = "r941_min_hac_g507" + File gambit_db_genomes = "gs://theiagen-public-files-rp/terra/theiaeuk-files/gambit/221130-theiagen-fungal-v0.2.db" + File gambit_db_signatures = "gs://theiagen-public-files-rp/terra/theiaeuk-files/gambit/221130-theiagen-fungal-v0.2.h5" + Boolean assembly_only = true + Boolean ont_data = true + } + + call read_qc.read_QC_trim_ont as read_qc { + input: + read1 = read1, + samplename = samplename, + genome_length = genome_length, + workflow_series = workflow_series + } + + call dragonflye.dragonflye { + input: + read1 = read_qc.read1_clean, + samplename = samplename, + assembler = assembler, + assembler_options = assembler_options, + genome_length = "${genome_length}", + medaka_model = medaka_model, + cpu = dragonflye_cpu, + memory = dragonflye_memory, + disk_size = dragonflye_disk_size + } + + call gambit.gambit { + input: + assembly = dragonflye.assembly_fasta, + samplename = samplename, + gambit_db_genomes = gambit_db_genomes, + gambit_db_signatures = gambit_db_signatures + } + + call merlin_magic_workflow.merlin_magic { + input: + samplename = samplename, + merlin_tag = gambit.merlin_tag, + assembly = dragonflye.assembly_fasta, + read1 = read_qc.read1_clean, + assembly_only = assembly_only, + ont_data = ont_data, + theiaeuk = true + } + + output { + # Read QC outputs + File read1_clean = read_qc.read1_clean + String? nanoq_version = read_qc.nanoq_version + Int est_genome_length = read_qc.est_genome_length + # Assembly outputs + File assembly_fasta = dragonflye.assembly_fasta + File contigs_gfa = dragonflye.contigs_gfa + String dragonflye_version = dragonflye.dragonflye_version + # Gambit outputs + File gambit_report_file = gambit.gambit_report_file + File gambit_closest_genomes_file = gambit.gambit_closest_genomes_file + String gambit_predicted_taxon = gambit.gambit_predicted_taxon + String gambit_predicted_taxon_rank = gambit.gambit_predicted_taxon_rank + String gambit_next_taxon = gambit.gambit_next_taxon + String gambit_next_taxon_rank = gambit.gambit_next_taxon_rank + String gambit_version = gambit.gambit_version + String gambit_db_version = gambit.gambit_db_version + String merlin_tag = gambit.merlin_tag + String gambit_docker = gambit.gambit_docker + # C. auris specific outputs + String? clade_type = merlin_magic.clade_type + String? cladetyper_analysis_date = merlin_magic.cladetyper_analysis_date + String? cladetyper_version = merlin_magic.cladetyper_version + String? cladetyper_docker_image = merlin_magic.cladetyper_docker_image + String? cladetype_annotated_ref = merlin_magic.cladetype_annotated_ref + # Snippy variants outputs + String? snippy_variants_version = merlin_magic.snippy_variants_version + String? snippy_variants_query = merlin_magic.snippy_variants_query + String? snippy_variants_query_check = merlin_magic.snippy_variants_query_check + String? snippy_variants_hits = merlin_magic.snippy_variants_hits + String? snippy_variants_gene_query_results = merlin_magic.snippy_variants_gene_query_results + String? snippy_variants_outdir_tarball = merlin_magic.snippy_variants_outdir_tarball + String? snippy_variants_results = merlin_magic.snippy_variants_results + String? snippy_variants_bam = merlin_magic.snippy_variants_bam + String? snippy_variants_bai = merlin_magic.snippy_variants_bai + String? snippy_variants_summary = merlin_magic.snippy_variants_summary + String? snippy_variants_num_reads_aligned = merlin_magic.snippy_variants_num_reads_aligned + String? snippy_variants_coverage_tsv = merlin_magic.snippy_variants_coverage_tsv + String? snippy_variants_num_variants = merlin_magic.snippy_variants_num_variants + String? snippy_variants_percent_ref_coverage = merlin_magic.snippy_variants_percent_ref_coverage + } +} \ No newline at end of file diff --git a/workflows/utilities/wf_merlin_magic.wdl b/workflows/utilities/wf_merlin_magic.wdl index 690086a60..c59aa16c2 100644 --- a/workflows/utilities/wf_merlin_magic.wdl +++ b/workflows/utilities/wf_merlin_magic.wdl @@ -612,7 +612,7 @@ workflow merlin_magic { docker = cauris_cladetyper_docker_image } if (!assembly_only && !ont_data) { - call snippy.snippy_variants as snippy_cauris { # no ONT support right now + call snippy.snippy_variants as snippy_cauris { input: reference_genome_file = cladetyper.clade_spec_ref, read1 = select_first([read1]), @@ -635,6 +635,30 @@ workflow merlin_magic { docker = snippy_gene_query_docker_image } } + + if (assembly_only && ont_data) { + call snippy.snippy_variants as snippy_cauris_ont { + input: + reference_genome_file = cladetyper.clade_spec_ref, + assembly_fasta = assembly, + samplename = samplename, + map_qual = snippy_map_qual, + base_quality = snippy_base_quality, + min_coverage = snippy_min_coverage, + min_frac = snippy_min_frac, + min_quality = snippy_min_quality, + maxsoft = snippy_maxsoft, + docker = snippy_variants_docker_image + } + call snippy_gene_query.snippy_gene_query as snippy_gene_query_cauris_ont { + input: + samplename = samplename, + snippy_variants_results = snippy_cauris_ont.snippy_variants_results, + reference = cladetyper.clade_spec_ref, + query_gene = select_first([snippy_query_gene, "FKS1,ERG11,FUR1"]), + docker = snippy_gene_query_docker_image + } + } } # Removing C.albicans subworkflow for now as current workflows not designed for diploid assembly # if (merlin_tag == "Candida albicans") { @@ -963,20 +987,20 @@ workflow merlin_magic { String? cladetyper_docker_image = cladetyper.gambit_cladetyper_docker_image String? cladetype_annotated_ref = cladetyper.clade_spec_ref # snippy variants - String snippy_variants_reference_genome = select_first([snippy_cauris.snippy_variants_reference_genome, snippy_afumigatus.snippy_variants_reference_genome, snippy_crypto.snippy_variants_reference_genome, "gs://theiagen-public-files/terra/theiaeuk_files/no_match_detected.txt"]) - String snippy_variants_version = select_first([snippy_cauris.snippy_variants_version, snippy_afumigatus.snippy_variants_version, snippy_crypto.snippy_variants_version, "No matching taxon detected"]) - String snippy_variants_query = select_first([snippy_gene_query_cauris.snippy_variants_query, snippy_gene_query_afumigatus.snippy_variants_query, snippy_gene_query_crypto.snippy_variants_query, "No matching taxon detected"]) - String snippy_variants_query_check = select_first([snippy_gene_query_cauris.snippy_variants_query_check, snippy_gene_query_afumigatus.snippy_variants_query_check, snippy_gene_query_crypto.snippy_variants_query_check, "No matching taxon detected"]) - String snippy_variants_hits = select_first([snippy_gene_query_cauris.snippy_variants_hits, snippy_gene_query_afumigatus.snippy_variants_hits, snippy_gene_query_crypto.snippy_variants_hits, "No matching taxon detected"]) - String snippy_variants_gene_query_results = select_first([snippy_gene_query_cauris.snippy_variants_gene_query_results, snippy_gene_query_afumigatus.snippy_variants_gene_query_results, snippy_gene_query_crypto.snippy_variants_gene_query_results, "gs://theiagen-public-files/terra/theiaeuk_files/no_match_detected.txt"]) - String snippy_variants_outdir_tarball = select_first([snippy_cauris.snippy_variants_outdir_tarball, snippy_afumigatus.snippy_variants_outdir_tarball, snippy_crypto.snippy_variants_outdir_tarball, "gs://theiagen-public-files/terra/theiaeuk_files/no_match_detected.txt"]) - String snippy_variants_results = select_first([snippy_cauris.snippy_variants_results, snippy_afumigatus.snippy_variants_results, snippy_crypto.snippy_variants_results, "gs://theiagen-public-files/terra/theiaeuk_files/no_match_detected.txt"]) - String snippy_variants_bam = select_first([snippy_cauris.snippy_variants_bam, snippy_afumigatus.snippy_variants_bam, snippy_crypto.snippy_variants_bam, "gs://theiagen-public-files/terra/theiaeuk_files/no_match_detected.txt"]) - String snippy_variants_bai = select_first([snippy_cauris.snippy_variants_bai, snippy_afumigatus.snippy_variants_bai, snippy_crypto.snippy_variants_bai, "gs://theiagen-public-files/terra/theiaeuk_files/no_match_detected.txt"]) - String snippy_variants_summary = select_first([snippy_cauris.snippy_variants_summary, snippy_afumigatus.snippy_variants_summary, snippy_crypto.snippy_variants_summary, "gs://theiagen-public-files/terra/theiaeuk_files/no_match_detected.txt"]) - String snippy_variants_num_reads_aligned = select_first([snippy_cauris.snippy_variants_num_reads_aligned, snippy_afumigatus.snippy_variants_num_reads_aligned, snippy_crypto.snippy_variants_num_reads_aligned, "No matching taxon detected"]) - String snippy_variants_coverage_tsv = select_first([snippy_cauris.snippy_variants_coverage_tsv, snippy_afumigatus.snippy_variants_coverage_tsv, snippy_crypto.snippy_variants_coverage_tsv, "gs://theiagen-public-files/terra/theiaeuk_files/no_match_detected.txt"]) - String snippy_variants_num_variants = select_first([snippy_cauris.snippy_variants_num_variants, snippy_afumigatus.snippy_variants_num_variants, snippy_crypto.snippy_variants_num_reads_aligned, "No matching taxon detected"]) - String snippy_variants_percent_ref_coverage = select_first([snippy_cauris.snippy_variants_percent_ref_coverage, snippy_afumigatus.snippy_variants_percent_ref_coverage, snippy_crypto.snippy_variants_percent_ref_coverage, "No matching taxon detected"]) + String snippy_variants_reference_genome = select_first([snippy_cauris.snippy_variants_reference_genome, snippy_cauris_ont.snippy_variants_reference_genome, snippy_afumigatus.snippy_variants_reference_genome, snippy_crypto.snippy_variants_reference_genome, "gs://theiagen-public-files/terra/theiaeuk_files/no_match_detected.txt"]) + String snippy_variants_version = select_first([snippy_cauris.snippy_variants_version, snippy_cauris_ont.snippy_variants_version,snippy_afumigatus.snippy_variants_version, snippy_crypto.snippy_variants_version, "No matching taxon detected"]) + String snippy_variants_query = select_first([snippy_gene_query_cauris.snippy_variants_query, snippy_gene_query_cauris_ont.snippy_variants_query, snippy_gene_query_afumigatus.snippy_variants_query, snippy_gene_query_crypto.snippy_variants_query, "No matching taxon detected"]) + String snippy_variants_query_check = select_first([snippy_gene_query_cauris.snippy_variants_query_check, snippy_gene_query_cauris_ont.snippy_variants_query_check, snippy_gene_query_afumigatus.snippy_variants_query_check, snippy_gene_query_crypto.snippy_variants_query_check, "No matching taxon detected"]) + String snippy_variants_hits = select_first([snippy_gene_query_cauris.snippy_variants_hits, snippy_gene_query_cauris_ont.snippy_variants_hits, snippy_gene_query_afumigatus.snippy_variants_hits, snippy_gene_query_crypto.snippy_variants_hits, "No matching taxon detected"]) + String snippy_variants_gene_query_results = select_first([snippy_gene_query_cauris.snippy_variants_gene_query_results, snippy_gene_query_cauris_ont.snippy_variants_gene_query_results, snippy_gene_query_afumigatus.snippy_variants_gene_query_results, snippy_gene_query_crypto.snippy_variants_gene_query_results, "gs://theiagen-public-files/terra/theiaeuk_files/no_match_detected.txt"]) + String snippy_variants_outdir_tarball = select_first([snippy_cauris.snippy_variants_outdir_tarball, snippy_cauris_ont.snippy_variants_outdir_tarball, snippy_afumigatus.snippy_variants_outdir_tarball, snippy_crypto.snippy_variants_outdir_tarball, "gs://theiagen-public-files/terra/theiaeuk_files/no_match_detected.txt"]) + String snippy_variants_results = select_first([snippy_cauris.snippy_variants_results, snippy_cauris_ont.snippy_variants_results,snippy_afumigatus.snippy_variants_results, snippy_crypto.snippy_variants_results, "gs://theiagen-public-files/terra/theiaeuk_files/no_match_detected.txt"]) + String snippy_variants_bam = select_first([snippy_cauris.snippy_variants_bam, snippy_cauris_ont.snippy_variants_bam, snippy_afumigatus.snippy_variants_bam, snippy_crypto.snippy_variants_bam, "gs://theiagen-public-files/terra/theiaeuk_files/no_match_detected.txt"]) + String snippy_variants_bai = select_first([snippy_cauris.snippy_variants_bai, snippy_cauris_ont.snippy_variants_bai, snippy_afumigatus.snippy_variants_bai, snippy_crypto.snippy_variants_bai, "gs://theiagen-public-files/terra/theiaeuk_files/no_match_detected.txt"]) + String snippy_variants_summary = select_first([snippy_cauris.snippy_variants_summary, snippy_cauris_ont.snippy_variants_summary, snippy_afumigatus.snippy_variants_summary, snippy_crypto.snippy_variants_summary, "gs://theiagen-public-files/terra/theiaeuk_files/no_match_detected.txt"]) + String snippy_variants_num_reads_aligned = select_first([snippy_cauris.snippy_variants_num_reads_aligned, snippy_cauris_ont.snippy_variants_num_reads_aligned, snippy_afumigatus.snippy_variants_num_reads_aligned, snippy_crypto.snippy_variants_num_reads_aligned, "No matching taxon detected"]) + String snippy_variants_coverage_tsv = select_first([snippy_cauris.snippy_variants_coverage_tsv, snippy_cauris_ont.snippy_variants_coverage_tsv, snippy_afumigatus.snippy_variants_coverage_tsv, snippy_crypto.snippy_variants_coverage_tsv, "gs://theiagen-public-files/terra/theiaeuk_files/no_match_detected.txt"]) + String snippy_variants_num_variants = select_first([snippy_cauris.snippy_variants_num_variants, snippy_cauris_ont.snippy_variants_num_variants, snippy_afumigatus.snippy_variants_num_variants, snippy_crypto.snippy_variants_num_reads_aligned, "No matching taxon detected"]) + String snippy_variants_percent_ref_coverage = select_first([snippy_cauris.snippy_variants_percent_ref_coverage, snippy_cauris_ont.snippy_variants_percent_ref_coverage, snippy_afumigatus.snippy_variants_percent_ref_coverage, snippy_crypto.snippy_variants_percent_ref_coverage, "No matching taxon detected"]) } } \ No newline at end of file From 821b9903e439e27696c3df04789c0ea408f4632a Mon Sep 17 00:00:00 2001 From: Michal-Babins Date: Mon, 7 Oct 2024 17:05:35 +0000 Subject: [PATCH 02/13] Add specifics for description of cladetyper --- workflows/theiaeuk/wf_theiaeuk_ont.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/theiaeuk/wf_theiaeuk_ont.wdl b/workflows/theiaeuk/wf_theiaeuk_ont.wdl index 2d9118962..4d2079c28 100644 --- a/workflows/theiaeuk/wf_theiaeuk_ont.wdl +++ b/workflows/theiaeuk/wf_theiaeuk_ont.wdl @@ -83,7 +83,7 @@ workflow theiaeuk_ont { String gambit_db_version = gambit.gambit_db_version String merlin_tag = gambit.merlin_tag String gambit_docker = gambit.gambit_docker - # C. auris specific outputs + # C. auris specific outputs for cladetyper String? clade_type = merlin_magic.clade_type String? cladetyper_analysis_date = merlin_magic.cladetyper_analysis_date String? cladetyper_version = merlin_magic.cladetyper_version From 0dbc7581df8d555c1a8f07ff22950a9afc4e4f23 Mon Sep 17 00:00:00 2001 From: Michal-Babins Date: Mon, 7 Oct 2024 17:36:51 +0000 Subject: [PATCH 03/13] Added theiaeuk specific workflow_series logic to qc trim ont --- workflows/utilities/wf_read_QC_trim_ont.wdl | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/workflows/utilities/wf_read_QC_trim_ont.wdl b/workflows/utilities/wf_read_QC_trim_ont.wdl index c03141251..3a5ac6058 100644 --- a/workflows/utilities/wf_read_QC_trim_ont.wdl +++ b/workflows/utilities/wf_read_QC_trim_ont.wdl @@ -121,6 +121,24 @@ workflow read_QC_trim_ont { samplename = samplename } } + + if (workflow_series == "theiaeuk") { + # rasusa for random downsampling + call rasusa_task.rasusa as theiaeuk_rasusa { + input: + read1 = read1, + samplename = samplename, + coverage = downsampling_coverage, + genome_length = genome_length + } + + # nanoq for filtering + call nanoq_task.nanoq as theiaeuk_nanoq { + input: + read1 = theiaeuk_rasusa.read1_subsampled, + samplename = samplename + } + } output { # theiacov outputs # ncbi scrub outputs @@ -144,7 +162,7 @@ workflow read_QC_trim_ont { Int est_genome_length = genome_length # nanoq outputs - File read1_clean = select_first([nanoq.filtered_read1, read_filtering.read1_clean]) + File read1_clean = select_first([nanoq.filtered_read1, read_filtering.read1_clean, theiaeuk_nanoq.filtered_read1]) String? nanoq_version = nanoq.version # rasusa outputs From 63dcad3595f679bc6829243995bb850b349b6d11 Mon Sep 17 00:00:00 2001 From: Michal-Babins Date: Mon, 7 Oct 2024 19:39:22 +0000 Subject: [PATCH 04/13] Added versioning capture --- workflows/theiaeuk/wf_theiaeuk_ont.wdl | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/workflows/theiaeuk/wf_theiaeuk_ont.wdl b/workflows/theiaeuk/wf_theiaeuk_ont.wdl index 4d2079c28..f59e6cd30 100644 --- a/workflows/theiaeuk/wf_theiaeuk_ont.wdl +++ b/workflows/theiaeuk/wf_theiaeuk_ont.wdl @@ -4,6 +4,7 @@ import "../utilities/wf_read_QC_trim_ont.wdl" as read_qc import "../../tasks/assembly/task_dragonflye.wdl" as dragonflye import "../../tasks/taxon_id/task_gambit.wdl" as gambit import "../utilities/wf_merlin_magic.wdl" as merlin_magic_workflow +import "../../tasks/task_versioning.wdl" as versioning workflow theiaeuk_ont { input { @@ -37,7 +38,7 @@ workflow theiaeuk_ont { samplename = samplename, assembler = assembler, assembler_options = assembler_options, - genome_length = "${genome_length}", + genome_length = genome_length, medaka_model = medaka_model, cpu = dragonflye_cpu, memory = dragonflye_memory, @@ -63,7 +64,14 @@ workflow theiaeuk_ont { theiaeuk = true } + call versioning.version_capture { + input: + } + output { + # Version Capture + String theiaeuk_ont_version = version_capture.phb_version + String theiaeuk_ont_analysis_date = version_capture.date # Read QC outputs File read1_clean = read_qc.read1_clean String? nanoq_version = read_qc.nanoq_version From 6a45737190e24b34af9a5af2302e7c8cc3617786 Mon Sep 17 00:00:00 2001 From: Michal-Babins Date: Tue, 8 Oct 2024 12:36:13 +0000 Subject: [PATCH 05/13] Reordered logic for rasusa and nanoq --- workflows/utilities/wf_read_QC_trim_ont.wdl | 73 +++++++++------------ 1 file changed, 30 insertions(+), 43 deletions(-) diff --git a/workflows/utilities/wf_read_QC_trim_ont.wdl b/workflows/utilities/wf_read_QC_trim_ont.wdl index 3a5ac6058..00bddc0ac 100644 --- a/workflows/utilities/wf_read_QC_trim_ont.wdl +++ b/workflows/utilities/wf_read_QC_trim_ont.wdl @@ -79,27 +79,8 @@ workflow read_QC_trim_ont { target_organism = target_organism } } - if ("~{workflow_series}" == "theiaprok") { - if ((call_kraken) && defined(kraken_db)) { - call kraken2.kraken2_standalone as kraken2_se { - input: - samplename = samplename, - read1 = read1, - kraken2_db = select_first([kraken_db]), - disk_size = kraken_disk_size, - memory = kraken_memory, - cpu = kraken_cpu - } - call kraken2.kraken2_parse_classified as kraken2_recalculate_abundances { - input: - samplename = samplename, - kraken2_report = kraken2_se.kraken2_report, - kraken2_classified_report = kraken2_se.kraken2_classified_report - } - } if ((call_kraken) && ! defined(kraken_db)) { - String kraken_db_warning = "Kraken database not defined" - } + if ("~{workflow_series}" == "theiaprok" || "~{workflow_series}" == "theiaeuk") { # rasusa for random downsampling call rasusa_task.rasusa { input: @@ -108,35 +89,41 @@ workflow read_QC_trim_ont { coverage = downsampling_coverage, genome_length = genome_length } - # tiptoft for plasmid detection - call tiptoft_task.tiptoft { - input: - read1 = read1, - samplename = samplename - } - # nanoq/filtlong (default min length 500) + + # nanoq for filtering call nanoq_task.nanoq { input: read1 = rasusa.read1_subsampled, samplename = samplename } - } - if (workflow_series == "theiaeuk") { - # rasusa for random downsampling - call rasusa_task.rasusa as theiaeuk_rasusa { - input: - read1 = read1, - samplename = samplename, - coverage = downsampling_coverage, - genome_length = genome_length - } + if ("~{workflow_series}" == "theiaprok") { + # tiptoft for plasmid detection + call tiptoft_task.tiptoft { + input: + read1 = read1, + samplename = samplename + } - # nanoq for filtering - call nanoq_task.nanoq as theiaeuk_nanoq { - input: - read1 = theiaeuk_rasusa.read1_subsampled, - samplename = samplename + if (call_kraken && defined(kraken_db)) { + call kraken2.kraken2_standalone as kraken2_se { + input: + samplename = samplename, + read1 = read1, + kraken2_db = select_first([kraken_db]), + disk_size = kraken_disk_size, + memory = kraken_memory, + cpu = kraken_cpu + } + call kraken2.kraken2_parse_classified as kraken2_recalculate_abundances { + input: + samplename = samplename, + kraken2_report = kraken2_se.kraken2_report, + kraken2_classified_report = kraken2_se.kraken2_classified_report + } + } if ((call_kraken) && ! defined(kraken_db)) { + String kraken_db_warning = "Kraken database not defined" + } } } output { @@ -162,7 +149,7 @@ workflow read_QC_trim_ont { Int est_genome_length = genome_length # nanoq outputs - File read1_clean = select_first([nanoq.filtered_read1, read_filtering.read1_clean, theiaeuk_nanoq.filtered_read1]) + File read1_clean = select_first([nanoq.filtered_read1, read_filtering.read1_clean]) String? nanoq_version = nanoq.version # rasusa outputs From af723142d3db67c4e838f95c31193a28b2346906 Mon Sep 17 00:00:00 2001 From: Michal-Babins Date: Tue, 8 Oct 2024 12:38:03 +0000 Subject: [PATCH 06/13] Removed options for merlin magic that should be hidden --- workflows/theiaeuk/wf_theiaeuk_ont.wdl | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/workflows/theiaeuk/wf_theiaeuk_ont.wdl b/workflows/theiaeuk/wf_theiaeuk_ont.wdl index f59e6cd30..224473b88 100644 --- a/workflows/theiaeuk/wf_theiaeuk_ont.wdl +++ b/workflows/theiaeuk/wf_theiaeuk_ont.wdl @@ -20,8 +20,6 @@ workflow theiaeuk_ont { String medaka_model = "r941_min_hac_g507" File gambit_db_genomes = "gs://theiagen-public-files-rp/terra/theiaeuk-files/gambit/221130-theiagen-fungal-v0.2.db" File gambit_db_signatures = "gs://theiagen-public-files-rp/terra/theiaeuk-files/gambit/221130-theiagen-fungal-v0.2.h5" - Boolean assembly_only = true - Boolean ont_data = true } call read_qc.read_QC_trim_ont as read_qc { @@ -58,9 +56,8 @@ workflow theiaeuk_ont { samplename = samplename, merlin_tag = gambit.merlin_tag, assembly = dragonflye.assembly_fasta, - read1 = read_qc.read1_clean, - assembly_only = assembly_only, - ont_data = ont_data, + assembly_only = true, + ont_data = true, theiaeuk = true } From 4bd515d79772424b9f3fc43e7e4e5e51134f7571 Mon Sep 17 00:00:00 2001 From: Michal-Babins Date: Tue, 8 Oct 2024 13:39:59 +0000 Subject: [PATCH 07/13] Reorder logic for snippy variant if tag is C.Auris --- workflows/utilities/wf_merlin_magic.wdl | 94 ++++++++++++------------- 1 file changed, 45 insertions(+), 49 deletions(-) diff --git a/workflows/utilities/wf_merlin_magic.wdl b/workflows/utilities/wf_merlin_magic.wdl index c59aa16c2..f010353f3 100644 --- a/workflows/utilities/wf_merlin_magic.wdl +++ b/workflows/utilities/wf_merlin_magic.wdl @@ -612,54 +612,50 @@ workflow merlin_magic { docker = cauris_cladetyper_docker_image } if (!assembly_only && !ont_data) { - call snippy.snippy_variants as snippy_cauris { - input: - reference_genome_file = cladetyper.clade_spec_ref, - read1 = select_first([read1]), - read2 = read2, - samplename = samplename, - map_qual = snippy_map_qual, - base_quality = snippy_base_quality, - min_coverage = snippy_min_coverage, - min_frac = snippy_min_frac, - min_quality = snippy_min_quality, - maxsoft = snippy_maxsoft, - docker = snippy_variants_docker_image - } - call snippy_gene_query.snippy_gene_query as snippy_gene_query_cauris { - input: - samplename = samplename, - snippy_variants_results = snippy_cauris.snippy_variants_results, - reference = cladetyper.clade_spec_ref, - query_gene = select_first([snippy_query_gene, "FKS1,lanosterol.14-alpha.demethylase,uracil.phosphoribosyltransferase"]), - docker = snippy_gene_query_docker_image - } + call snippy.snippy_variants as snippy_cauris { + input: + reference_genome_file = cladetyper.clade_spec_ref, + read1 = select_first([read1]), + read2 = read2, + samplename = samplename, + map_qual = snippy_map_qual, + base_quality = snippy_base_quality, + min_coverage = snippy_min_coverage, + min_frac = snippy_min_frac, + min_quality = snippy_min_quality, + maxsoft = snippy_maxsoft, + docker = snippy_variants_docker_image } - - if (assembly_only && ont_data) { - call snippy.snippy_variants as snippy_cauris_ont { - input: - reference_genome_file = cladetyper.clade_spec_ref, - assembly_fasta = assembly, - samplename = samplename, - map_qual = snippy_map_qual, - base_quality = snippy_base_quality, - min_coverage = snippy_min_coverage, - min_frac = snippy_min_frac, - min_quality = snippy_min_quality, - maxsoft = snippy_maxsoft, - docker = snippy_variants_docker_image - } - call snippy_gene_query.snippy_gene_query as snippy_gene_query_cauris_ont { - input: - samplename = samplename, - snippy_variants_results = snippy_cauris_ont.snippy_variants_results, - reference = cladetyper.clade_spec_ref, - query_gene = select_first([snippy_query_gene, "FKS1,ERG11,FUR1"]), - docker = snippy_gene_query_docker_image - } + } + + if (assembly_only && ont_data) { + call snippy.snippy_variants as snippy_cauris_ont { + input: + reference_genome_file = cladetyper.clade_spec_ref, + assembly_fasta = assembly, + samplename = samplename, + map_qual = snippy_map_qual, + base_quality = snippy_base_quality, + min_coverage = snippy_min_coverage, + min_frac = snippy_min_frac, + min_quality = snippy_min_quality, + maxsoft = snippy_maxsoft, + docker = snippy_variants_docker_image } } + call snippy_gene_query.snippy_gene_query as snippy_gene_query_cauris { + input: + samplename = samplename, + snippy_variants_results = select_first([snippy_cauris.snippy_variants_results, snippy_cauris_ont.snippy_variants_results]), + reference = cladetyper.clade_spec_ref, + query_gene = select_first([ + snippy_query_gene, + if (assembly_only && ont_data) then "FKS1,ERG11,FUR1" + else "FKS1,lanosterol.14-alpha.demethylase,uracil.phosphoribosyltransferase" + ]), + docker = snippy_gene_query_docker_image + } + } # Removing C.albicans subworkflow for now as current workflows not designed for diploid assembly # if (merlin_tag == "Candida albicans") { # if (!assembly_only && !ont_data) { @@ -989,10 +985,10 @@ workflow merlin_magic { # snippy variants String snippy_variants_reference_genome = select_first([snippy_cauris.snippy_variants_reference_genome, snippy_cauris_ont.snippy_variants_reference_genome, snippy_afumigatus.snippy_variants_reference_genome, snippy_crypto.snippy_variants_reference_genome, "gs://theiagen-public-files/terra/theiaeuk_files/no_match_detected.txt"]) String snippy_variants_version = select_first([snippy_cauris.snippy_variants_version, snippy_cauris_ont.snippy_variants_version,snippy_afumigatus.snippy_variants_version, snippy_crypto.snippy_variants_version, "No matching taxon detected"]) - String snippy_variants_query = select_first([snippy_gene_query_cauris.snippy_variants_query, snippy_gene_query_cauris_ont.snippy_variants_query, snippy_gene_query_afumigatus.snippy_variants_query, snippy_gene_query_crypto.snippy_variants_query, "No matching taxon detected"]) - String snippy_variants_query_check = select_first([snippy_gene_query_cauris.snippy_variants_query_check, snippy_gene_query_cauris_ont.snippy_variants_query_check, snippy_gene_query_afumigatus.snippy_variants_query_check, snippy_gene_query_crypto.snippy_variants_query_check, "No matching taxon detected"]) - String snippy_variants_hits = select_first([snippy_gene_query_cauris.snippy_variants_hits, snippy_gene_query_cauris_ont.snippy_variants_hits, snippy_gene_query_afumigatus.snippy_variants_hits, snippy_gene_query_crypto.snippy_variants_hits, "No matching taxon detected"]) - String snippy_variants_gene_query_results = select_first([snippy_gene_query_cauris.snippy_variants_gene_query_results, snippy_gene_query_cauris_ont.snippy_variants_gene_query_results, snippy_gene_query_afumigatus.snippy_variants_gene_query_results, snippy_gene_query_crypto.snippy_variants_gene_query_results, "gs://theiagen-public-files/terra/theiaeuk_files/no_match_detected.txt"]) + String snippy_variants_query = select_first([snippy_gene_query_cauris.snippy_variants_query, snippy_gene_query_afumigatus.snippy_variants_query, snippy_gene_query_crypto.snippy_variants_query, "No matching taxon detected"]) + String snippy_variants_query_check = select_first([snippy_gene_query_cauris.snippy_variants_query_check, snippy_gene_query_afumigatus.snippy_variants_query_check, snippy_gene_query_crypto.snippy_variants_query_check, "No matching taxon detected"]) + String snippy_variants_hits = select_first([snippy_gene_query_cauris.snippy_variants_hits, snippy_gene_query_afumigatus.snippy_variants_hits, snippy_gene_query_crypto.snippy_variants_hits, "No matching taxon detected"]) + String snippy_variants_gene_query_results = select_first([snippy_gene_query_cauris.snippy_variants_gene_query_results, snippy_gene_query_afumigatus.snippy_variants_gene_query_results, snippy_gene_query_crypto.snippy_variants_gene_query_results, "gs://theiagen-public-files/terra/theiaeuk_files/no_match_detected.txt"]) String snippy_variants_outdir_tarball = select_first([snippy_cauris.snippy_variants_outdir_tarball, snippy_cauris_ont.snippy_variants_outdir_tarball, snippy_afumigatus.snippy_variants_outdir_tarball, snippy_crypto.snippy_variants_outdir_tarball, "gs://theiagen-public-files/terra/theiaeuk_files/no_match_detected.txt"]) String snippy_variants_results = select_first([snippy_cauris.snippy_variants_results, snippy_cauris_ont.snippy_variants_results,snippy_afumigatus.snippy_variants_results, snippy_crypto.snippy_variants_results, "gs://theiagen-public-files/terra/theiaeuk_files/no_match_detected.txt"]) String snippy_variants_bam = select_first([snippy_cauris.snippy_variants_bam, snippy_cauris_ont.snippy_variants_bam, snippy_afumigatus.snippy_variants_bam, snippy_crypto.snippy_variants_bam, "gs://theiagen-public-files/terra/theiaeuk_files/no_match_detected.txt"]) From 08a92fb73f27de2a6f6dc53c9a4a446098f2ca1b Mon Sep 17 00:00:00 2001 From: Michal-Babins Date: Tue, 8 Oct 2024 13:57:39 +0000 Subject: [PATCH 08/13] Added read qc and assembly qc --- workflows/theiaeuk/wf_theiaeuk_ont.wdl | 78 ++++++++++++++++++++++++-- 1 file changed, 74 insertions(+), 4 deletions(-) diff --git a/workflows/theiaeuk/wf_theiaeuk_ont.wdl b/workflows/theiaeuk/wf_theiaeuk_ont.wdl index 224473b88..a4a721084 100644 --- a/workflows/theiaeuk/wf_theiaeuk_ont.wdl +++ b/workflows/theiaeuk/wf_theiaeuk_ont.wdl @@ -5,6 +5,9 @@ import "../../tasks/assembly/task_dragonflye.wdl" as dragonflye import "../../tasks/taxon_id/task_gambit.wdl" as gambit import "../utilities/wf_merlin_magic.wdl" as merlin_magic_workflow import "../../tasks/task_versioning.wdl" as versioning +import "../../tasks/quality_control/basic_statistics/task_nanoplot.wdl" as nanoplot_task +import "../../tasks/quality_control/basic_statistics/task_quast.wdl" as quast_task +import "../../tasks/quality_control/advanced_metrics/task_busco.wdl" as busco_task workflow theiaeuk_ont { input { @@ -22,6 +25,10 @@ workflow theiaeuk_ont { File gambit_db_signatures = "gs://theiagen-public-files-rp/terra/theiaeuk-files/gambit/221130-theiagen-fungal-v0.2.h5" } + call versioning.version_capture { + input: + } + call read_qc.read_QC_trim_ont as read_qc { input: read1 = read1, @@ -42,6 +49,32 @@ workflow theiaeuk_ont { memory = dragonflye_memory, disk_size = dragonflye_disk_size } + #call quast on the assembly + call quast_task.quast { + input: + assembly = dragonflye.assembly_fasta, + samplename = samplename + } + # nanoplot for raw reads + call nanoplot_task.nanoplot as nanoplot_raw { + input: + read1 = read1, + samplename = samplename, + est_genome_length = select_first([genome_length, quast.genome_length]) + } + # nanoplot for cleaned reads + call nanoplot_task.nanoplot as nanoplot_clean { + input: + read1 = read_qc.read1_clean, + samplename = samplename, + est_genome_length = select_first([genome_length, quast.genome_length]) + } + # busco on the assembly + call busco_task.busco { + input: + assembly = dragonflye.assembly_fasta, + samplename = samplename + } call gambit.gambit { input: @@ -61,10 +94,6 @@ workflow theiaeuk_ont { theiaeuk = true } - call versioning.version_capture { - input: - } - output { # Version Capture String theiaeuk_ont_version = version_capture.phb_version @@ -77,6 +106,47 @@ workflow theiaeuk_ont { File assembly_fasta = dragonflye.assembly_fasta File contigs_gfa = dragonflye.contigs_gfa String dragonflye_version = dragonflye.dragonflye_version + # Read QC - nanoplot raw outputs + File? nanoplot_html_raw = nanoplot_raw.nanoplot_html + File? nanoplot_tsv_raw = nanoplot_raw.nanoplot_tsv + Int? nanoplot_num_reads_raw1 = nanoplot_raw.num_reads + Float? nanoplot_r1_median_readlength_raw = nanoplot_raw.median_readlength + Float? nanoplot_r1_mean_readlength_raw = nanoplot_raw.mean_readlength + Float? nanoplot_r1_stdev_readlength_raw = nanoplot_raw.stdev_readlength + Float? nanoplot_r1_n50_raw = nanoplot_raw.n50 + Float? nanoplot_r1_mean_q_raw = nanoplot_raw.mean_q + Float? nanoplot_r1_median_q_raw = nanoplot_raw.median_q + Float? nanoplot_r1_est_coverage_raw = nanoplot_raw.est_coverage + # Read QC - nanoplot clean outputs + File? nanoplot_html_clean = nanoplot_clean.nanoplot_html + File? nanoplot_tsv_clean = nanoplot_clean.nanoplot_tsv + Int? nanoplot_num_reads_clean1 = nanoplot_clean.num_reads + Float? nanoplot_r1_median_readlength_clean = nanoplot_clean.median_readlength + Float? nanoplot_r1_mean_readlength_clean = nanoplot_clean.mean_readlength + Float? nanoplot_r1_stdev_readlength_clean = nanoplot_clean.stdev_readlength + Float? nanoplot_r1_n50_clean = nanoplot_clean.n50 + Float? nanoplot_r1_mean_q_clean = nanoplot_clean.mean_q + Float? nanoplot_r1_median_q_clean = nanoplot_clean.median_q + Float? nanoplot_r1_est_coverage_clean = nanoplot_clean.est_coverage + # Read QC - nanoplot general outputs + String? nanoplot_version = nanoplot_raw.nanoplot_version + String? nanoplot_docker = nanoplot_raw.nanoplot_docker + # Assembly QC - quast outputs + File? quast_report = quast.quast_report + String? quast_version = quast.version + Int? assembly_length = quast.genome_length + Int? number_contigs = quast.number_contigs + Int? n50_value = quast.n50_value + Float? quast_gc_percent = quast.gc_percent + # Assembly QC - nanoplot outputs + Float? est_coverage_raw = nanoplot_raw.est_coverage + Float? est_coverage_clean = nanoplot_clean.est_coverage + # Assembly QC - busco outputs + String? busco_version = busco.busco_version + String? busco_docker = busco.busco_docker + String? busco_database = busco.busco_database + String? busco_results = busco.busco_results + File? busco_report = busco.busco_report # Gambit outputs File gambit_report_file = gambit.gambit_report_file File gambit_closest_genomes_file = gambit.gambit_closest_genomes_file From 5cc94078de1b7d7ad4a0cae8da610eb43a198e94 Mon Sep 17 00:00:00 2001 From: Michal-Babins Date: Tue, 8 Oct 2024 16:56:24 +0000 Subject: [PATCH 09/13] updated md5 sum --- tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml | 2 +- tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml index d2cedb29b..bae74e2e5 100644 --- a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml +++ b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml @@ -633,7 +633,7 @@ - path: miniwdl_run/wdl/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl md5sum: 6d9dd969e2144ca23f2a0e101e6b6966 - path: miniwdl_run/wdl/workflows/utilities/wf_merlin_magic.wdl - md5sum: 8ab95440118d06dd3c07765a19e876e7 + md5sum: ba96a542bb0896569fee6136f712fdd1 - path: miniwdl_run/wdl/workflows/utilities/wf_read_QC_trim_pe.wdl contains: ["version", "QC", "output"] - path: miniwdl_run/workflow.log diff --git a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml index d19f1319e..3a891aaa8 100644 --- a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml +++ b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml @@ -526,7 +526,7 @@ - path: miniwdl_run/wdl/tasks/gene_typing/drug_resistance/task_resfinder.wdl md5sum: 27528633723303b462d095b642649453 - path: miniwdl_run/wdl/tasks/gene_typing/variant_detection/task_snippy_variants.wdl - md5sum: 284ce680b52e7e1c1753537b344fa161 + md5sum: 3b9e04569d7e856dcc649b7726b306b7 - path: miniwdl_run/wdl/tasks/quality_control/read_filtering/task_bbduk.wdl md5sum: aec6ef024d6dff31723f44290f6b9cf5 - path: miniwdl_run/wdl/tasks/quality_control/advanced_metrics/task_busco.wdl @@ -596,7 +596,7 @@ - path: miniwdl_run/wdl/workflows/theiaprok/wf_theiaprok_illumina_se.wdl md5sum: 5aa25e4fad466f92c96a7c138aca0d20 - path: miniwdl_run/wdl/workflows/utilities/wf_merlin_magic.wdl - md5sum: 8ab95440118d06dd3c07765a19e876e7 + md5sum: ba96a542bb0896569fee6136f712fdd1 - path: miniwdl_run/wdl/workflows/utilities/wf_read_QC_trim_se.wdl md5sum: d11bfe33fdd96eab28892be5a01c1c7d - path: miniwdl_run/workflow.log From 021dbb71640af8179eed7bc22b02857e0b7e7c98 Mon Sep 17 00:00:00 2001 From: Michal-Babins Date: Tue, 8 Oct 2024 18:21:54 +0000 Subject: [PATCH 10/13] Initial commits for theiaeuk CI --- tests/config/pytest_filter.yml | 10 +++++++++ tests/inputs/theiaeuk/wf_theiaeuk_ont.json | 17 ++++++++++++++ .../theiaeuk/wf_theiaeuk_ont_cromwell.json | 7 ++++++ .../theiaeuk/test_wf_theiaeuk_ont.yml | 22 +++++++++++++++++++ 4 files changed, 56 insertions(+) create mode 100644 tests/inputs/theiaeuk/wf_theiaeuk_ont.json create mode 100644 tests/inputs/theiaeuk/wf_theiaeuk_ont_cromwell.json create mode 100644 tests/workflows/theiaeuk/test_wf_theiaeuk_ont.yml diff --git a/tests/config/pytest_filter.yml b/tests/config/pytest_filter.yml index 5effea33a..838c21d33 100644 --- a/tests/config/pytest_filter.yml +++ b/tests/config/pytest_filter.yml @@ -171,3 +171,13 @@ wf_theiaprok_illumina_se: - tasks/species_typing/multi/task_ts_mlst.wdl - tasks/taxon_id/contamination/task_kraken2.wdl - tasks/taxon_id/contamination/task_midas.wdl + +# wf_theiaeuk_ont: +# - workflows/theiaeuk/wf_theiaeuk_ont.wdl +# - tasks/assembly/task_dragonflye.wdl +# - tasks/taxon_id/task_gambit.wdl +# - workflows/utilities/wf_merlin_magic.wdl +# - tasks/quality_control/basic_statistics/task_nanoplot.wdl +# - tasks/quality_control/basic_statistics/task_quast.wdl +# - tasks/quality_control/advanced_metrics/task_busco.wdl +# - tasks/task_versioning.wdl \ No newline at end of file diff --git a/tests/inputs/theiaeuk/wf_theiaeuk_ont.json b/tests/inputs/theiaeuk/wf_theiaeuk_ont.json new file mode 100644 index 000000000..e027c6387 --- /dev/null +++ b/tests/inputs/theiaeuk/wf_theiaeuk_ont.json @@ -0,0 +1,17 @@ +{ + "theiaeuk_ont.samplename": "test", + "theiaeuk_ont.read1": "tests/data/theiacov/fastqs/ont/ont.fastq.gz", + "theiaeuk_ont.dragonflye.assembler": "flye", + "theiaeuk_ont.merlin_magic.cladetyper_ref_clade1": "./tests/inputs/empty-for-test.txt", + "theiaeuk_ont.merlin_magic.cladetyper_ref_clade1_annotated": "./tests/inputs/empty-for-test.txt", + "theiaeuk_ont.merlin_magic.cladetyper_ref_clade2": "./tests/inputs/empty-for-test.txt", + "theiaeuk_ont.merlin_magic.cladetyper_ref_clade2_annotated": "./tests/inputs/empty-for-test.txt", + "theiaeuk_ont.merlin_magic.cladetyper_ref_clade3": "./tests/inputs/empty-for-test.txt", + "theiaeuk_ont.merlin_magic.cladetyper_ref_clade3_annotated": "./tests/inputs/empty-for-test.txt", + "theiaeuk_ont.merlin_magic.cladetyper_ref_clade4": "./tests/inputs/empty-for-test.txt", + "theiaeuk_ont.merlin_magic.cladetyper_ref_clade4_annotated": "./tests/inputs/empty-for-test.txt", + "theiaeuk_ont.merlin_magic.cladetyper_ref_clade5": "./tests/inputs/empty-for-test.txt", + "theiaeuk_ont.merlin_magic.cladetyper_ref_clade5_annotated": "./tests/inputs/empty-for-test.txt", + "theiaeuk_ont.gambit.gambit_db_signatures": "./tests/inputs/completely-empty-for-test.txt", + "theiaeuk_ont.gambit.gambit_db_genomes": "./tests/inputs/completely-empty-for-test.txt" +} \ No newline at end of file diff --git a/tests/inputs/theiaeuk/wf_theiaeuk_ont_cromwell.json b/tests/inputs/theiaeuk/wf_theiaeuk_ont_cromwell.json new file mode 100644 index 000000000..0f9c135b5 --- /dev/null +++ b/tests/inputs/theiaeuk/wf_theiaeuk_ont_cromwell.json @@ -0,0 +1,7 @@ +{ + "theiaeuk_ont.samplename": "test", + "theiaeuk_ont.read1": "tests/data/theiacov/fastqs/ont/ont.fastq.gz", + "theiaeuk_ont.dragonflye.assembler": "flye", + "theiaeuk_ont.gambit.gambit_db_signatures": "./tests/inputs/completely-empty-for-test.txt", + "theiaeuk_ont.gambit.gambit_db_genomes": "./tests/inputs/completely-empty-for-test.txt" +} \ No newline at end of file diff --git a/tests/workflows/theiaeuk/test_wf_theiaeuk_ont.yml b/tests/workflows/theiaeuk/test_wf_theiaeuk_ont.yml new file mode 100644 index 000000000..e070eed9d --- /dev/null +++ b/tests/workflows/theiaeuk/test_wf_theiaeuk_ont.yml @@ -0,0 +1,22 @@ +- name: theiaeuk_ont_cromwell + command: cromwell run -i ./tests/inputs/theiaeuk/wf_theiaeuk_ont_cromwell.json -m metadata.json ./workflows/theiaeuk/wf_theiaeuk_ont.wdl + tags: + - wf_theiaeuk_ont + - wf_theiaeuk_ont_cromwell + files: + - path: log.err + - path: log.out + contains: ["workflow finished with status 'Succeeded'", "theiaeuk_ont", "Done"] + - path: metadata.json + contains: ["outputs", "theiaeuk_ont", "Succeeded"] + +- name: theiaeuk_ont_miniwdl + command: miniwdl run -i ./tests/inputs/theiaeuk/wf_theiaeuk_ont.json -d miniwdl_run/. --verbose --error-json ./workflows/theiaeuk/wf_theiaeuk_ont.wdl + tags: + - wf_theiaeuk_ont + - wf_theiaeuk_ont_miniwdl + files: + - path: miniwdl_run/call-someTask/command + md5sum: expectedMd5sumHere + - path: miniwdl_run/call-someTask/inputs.json + contains: ["expectedContent", "moreExpectedContent"] \ No newline at end of file From 767f33a57039a7455ec5626958432d11bb81bef0 Mon Sep 17 00:00:00 2001 From: Michal-Babins Date: Wed, 9 Oct 2024 18:39:25 +0000 Subject: [PATCH 11/13] Removed unecessary tests --- tests/config/pytest_filter.yml | 12 +--------- tests/inputs/theiaeuk/wf_theiaeuk_ont.json | 17 -------------- .../theiaeuk/wf_theiaeuk_ont_cromwell.json | 7 ------ .../theiaeuk/test_wf_theiaeuk_ont.yml | 22 ------------------- 4 files changed, 1 insertion(+), 57 deletions(-) delete mode 100644 tests/inputs/theiaeuk/wf_theiaeuk_ont.json delete mode 100644 tests/inputs/theiaeuk/wf_theiaeuk_ont_cromwell.json delete mode 100644 tests/workflows/theiaeuk/test_wf_theiaeuk_ont.yml diff --git a/tests/config/pytest_filter.yml b/tests/config/pytest_filter.yml index 838c21d33..b98a79f7b 100644 --- a/tests/config/pytest_filter.yml +++ b/tests/config/pytest_filter.yml @@ -170,14 +170,4 @@ wf_theiaprok_illumina_se: - tasks/species_typing/mycobacterium/task_tbprofiler.wdl - tasks/species_typing/multi/task_ts_mlst.wdl - tasks/taxon_id/contamination/task_kraken2.wdl - - tasks/taxon_id/contamination/task_midas.wdl - -# wf_theiaeuk_ont: -# - workflows/theiaeuk/wf_theiaeuk_ont.wdl -# - tasks/assembly/task_dragonflye.wdl -# - tasks/taxon_id/task_gambit.wdl -# - workflows/utilities/wf_merlin_magic.wdl -# - tasks/quality_control/basic_statistics/task_nanoplot.wdl -# - tasks/quality_control/basic_statistics/task_quast.wdl -# - tasks/quality_control/advanced_metrics/task_busco.wdl -# - tasks/task_versioning.wdl \ No newline at end of file + - tasks/taxon_id/contamination/task_midas.wdl \ No newline at end of file diff --git a/tests/inputs/theiaeuk/wf_theiaeuk_ont.json b/tests/inputs/theiaeuk/wf_theiaeuk_ont.json deleted file mode 100644 index e027c6387..000000000 --- a/tests/inputs/theiaeuk/wf_theiaeuk_ont.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "theiaeuk_ont.samplename": "test", - "theiaeuk_ont.read1": "tests/data/theiacov/fastqs/ont/ont.fastq.gz", - "theiaeuk_ont.dragonflye.assembler": "flye", - "theiaeuk_ont.merlin_magic.cladetyper_ref_clade1": "./tests/inputs/empty-for-test.txt", - "theiaeuk_ont.merlin_magic.cladetyper_ref_clade1_annotated": "./tests/inputs/empty-for-test.txt", - "theiaeuk_ont.merlin_magic.cladetyper_ref_clade2": "./tests/inputs/empty-for-test.txt", - "theiaeuk_ont.merlin_magic.cladetyper_ref_clade2_annotated": "./tests/inputs/empty-for-test.txt", - "theiaeuk_ont.merlin_magic.cladetyper_ref_clade3": "./tests/inputs/empty-for-test.txt", - "theiaeuk_ont.merlin_magic.cladetyper_ref_clade3_annotated": "./tests/inputs/empty-for-test.txt", - "theiaeuk_ont.merlin_magic.cladetyper_ref_clade4": "./tests/inputs/empty-for-test.txt", - "theiaeuk_ont.merlin_magic.cladetyper_ref_clade4_annotated": "./tests/inputs/empty-for-test.txt", - "theiaeuk_ont.merlin_magic.cladetyper_ref_clade5": "./tests/inputs/empty-for-test.txt", - "theiaeuk_ont.merlin_magic.cladetyper_ref_clade5_annotated": "./tests/inputs/empty-for-test.txt", - "theiaeuk_ont.gambit.gambit_db_signatures": "./tests/inputs/completely-empty-for-test.txt", - "theiaeuk_ont.gambit.gambit_db_genomes": "./tests/inputs/completely-empty-for-test.txt" -} \ No newline at end of file diff --git a/tests/inputs/theiaeuk/wf_theiaeuk_ont_cromwell.json b/tests/inputs/theiaeuk/wf_theiaeuk_ont_cromwell.json deleted file mode 100644 index 0f9c135b5..000000000 --- a/tests/inputs/theiaeuk/wf_theiaeuk_ont_cromwell.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "theiaeuk_ont.samplename": "test", - "theiaeuk_ont.read1": "tests/data/theiacov/fastqs/ont/ont.fastq.gz", - "theiaeuk_ont.dragonflye.assembler": "flye", - "theiaeuk_ont.gambit.gambit_db_signatures": "./tests/inputs/completely-empty-for-test.txt", - "theiaeuk_ont.gambit.gambit_db_genomes": "./tests/inputs/completely-empty-for-test.txt" -} \ No newline at end of file diff --git a/tests/workflows/theiaeuk/test_wf_theiaeuk_ont.yml b/tests/workflows/theiaeuk/test_wf_theiaeuk_ont.yml deleted file mode 100644 index e070eed9d..000000000 --- a/tests/workflows/theiaeuk/test_wf_theiaeuk_ont.yml +++ /dev/null @@ -1,22 +0,0 @@ -- name: theiaeuk_ont_cromwell - command: cromwell run -i ./tests/inputs/theiaeuk/wf_theiaeuk_ont_cromwell.json -m metadata.json ./workflows/theiaeuk/wf_theiaeuk_ont.wdl - tags: - - wf_theiaeuk_ont - - wf_theiaeuk_ont_cromwell - files: - - path: log.err - - path: log.out - contains: ["workflow finished with status 'Succeeded'", "theiaeuk_ont", "Done"] - - path: metadata.json - contains: ["outputs", "theiaeuk_ont", "Succeeded"] - -- name: theiaeuk_ont_miniwdl - command: miniwdl run -i ./tests/inputs/theiaeuk/wf_theiaeuk_ont.json -d miniwdl_run/. --verbose --error-json ./workflows/theiaeuk/wf_theiaeuk_ont.wdl - tags: - - wf_theiaeuk_ont - - wf_theiaeuk_ont_miniwdl - files: - - path: miniwdl_run/call-someTask/command - md5sum: expectedMd5sumHere - - path: miniwdl_run/call-someTask/inputs.json - contains: ["expectedContent", "moreExpectedContent"] \ No newline at end of file From d46d58f3839f8a89b181de7c7ab88ba8c17922bd Mon Sep 17 00:00:00 2001 From: Michal-Babins Date: Fri, 18 Oct 2024 18:23:16 +0000 Subject: [PATCH 12/13] Added ont support for A. fumigatus & C. neoformans --- workflows/utilities/wf_merlin_magic.wdl | 61 +++++++++++++++++++------ 1 file changed, 46 insertions(+), 15 deletions(-) diff --git a/workflows/utilities/wf_merlin_magic.wdl b/workflows/utilities/wf_merlin_magic.wdl index f010353f3..b02e978b1 100644 --- a/workflows/utilities/wf_merlin_magic.wdl +++ b/workflows/utilities/wf_merlin_magic.wdl @@ -697,16 +697,32 @@ workflow merlin_magic { maxsoft = snippy_maxsoft, docker = snippy_variants_docker_image } + } + + if (assembly_only && ont_data) { + call snippy.snippy_variants as snippy_afumigatus_ont { + input: + reference_genome_file = snippy_reference_afumigatus, + assembly_fasta = assembly, + samplename = samplename, + map_qual = snippy_map_qual, + base_quality = snippy_base_quality, + min_coverage = snippy_min_coverage, + min_frac = snippy_min_frac, + min_quality = snippy_min_quality, + maxsoft = snippy_maxsoft, + docker = snippy_variants_docker_image + } + } call snippy_gene_query.snippy_gene_query as snippy_gene_query_afumigatus { input: samplename = samplename, - snippy_variants_results = snippy_afumigatus.snippy_variants_results, + snippy_variants_results = select_first([snippy_afumigatus.snippy_variants_results, snippy_afumigatus_ont.snippy_variants_results]), reference = snippy_reference_afumigatus, query_gene = select_first([snippy_query_gene, "Cyp51A,HapE,AFUA_4G08340"]), # AFUA_4G08340 is COX10 according to MARDy docker = snippy_gene_query_docker_image } } - } if (merlin_tag == "Cryptococcus neoformans") { if (!assembly_only && !ont_data) { call snippy.snippy_variants as snippy_crypto { @@ -723,16 +739,31 @@ workflow merlin_magic { maxsoft = snippy_maxsoft, docker = snippy_variants_docker_image } + } + if (assembly_only && ont_data) { + call snippy.snippy_variants as snippy_crypto_ont { + input: + reference_genome_file = snippy_reference_cryptoneo, + assembly_fasta = assembly, + samplename = samplename, + map_qual = snippy_map_qual, + base_quality = snippy_base_quality, + min_coverage = snippy_min_coverage, + min_frac = snippy_min_frac, + min_quality = snippy_min_quality, + maxsoft = snippy_maxsoft, + docker = snippy_variants_docker_image + } + } call snippy_gene_query.snippy_gene_query as snippy_gene_query_crypto { input: samplename = samplename, - snippy_variants_results = snippy_crypto.snippy_variants_results, + snippy_variants_results = select_first([snippy_crypto.snippy_variants_results, snippy_crypto_ont.snippy_variants_results]), reference = snippy_reference_cryptoneo, query_gene = select_first([snippy_query_gene, "CNA00300"]), # CNA00300 is ERG11 for this reference genome docker = snippy_gene_query_docker_image } } - } } output { # theiaprok @@ -983,20 +1014,20 @@ workflow merlin_magic { String? cladetyper_docker_image = cladetyper.gambit_cladetyper_docker_image String? cladetype_annotated_ref = cladetyper.clade_spec_ref # snippy variants - String snippy_variants_reference_genome = select_first([snippy_cauris.snippy_variants_reference_genome, snippy_cauris_ont.snippy_variants_reference_genome, snippy_afumigatus.snippy_variants_reference_genome, snippy_crypto.snippy_variants_reference_genome, "gs://theiagen-public-files/terra/theiaeuk_files/no_match_detected.txt"]) - String snippy_variants_version = select_first([snippy_cauris.snippy_variants_version, snippy_cauris_ont.snippy_variants_version,snippy_afumigatus.snippy_variants_version, snippy_crypto.snippy_variants_version, "No matching taxon detected"]) + String snippy_variants_reference_genome = select_first([snippy_cauris.snippy_variants_reference_genome, snippy_cauris_ont.snippy_variants_reference_genome, snippy_afumigatus.snippy_variants_reference_genome, snippy_afumigatus_ont.snippy_variants_reference_genome, snippy_crypto.snippy_variants_reference_genome, snippy_crypto_ont.snippy_variants_reference_genome, "gs://theiagen-public-files/terra/theiaeuk_files/no_match_detected.txt"]) + String snippy_variants_version = select_first([snippy_cauris.snippy_variants_version, snippy_cauris_ont.snippy_variants_version,snippy_afumigatus.snippy_variants_version, snippy_afumigatus_ont.snippy_variants_version, snippy_crypto.snippy_variants_version, snippy_crypto_ont.snippy_variants_version, "No matching taxon detected"]) String snippy_variants_query = select_first([snippy_gene_query_cauris.snippy_variants_query, snippy_gene_query_afumigatus.snippy_variants_query, snippy_gene_query_crypto.snippy_variants_query, "No matching taxon detected"]) String snippy_variants_query_check = select_first([snippy_gene_query_cauris.snippy_variants_query_check, snippy_gene_query_afumigatus.snippy_variants_query_check, snippy_gene_query_crypto.snippy_variants_query_check, "No matching taxon detected"]) String snippy_variants_hits = select_first([snippy_gene_query_cauris.snippy_variants_hits, snippy_gene_query_afumigatus.snippy_variants_hits, snippy_gene_query_crypto.snippy_variants_hits, "No matching taxon detected"]) String snippy_variants_gene_query_results = select_first([snippy_gene_query_cauris.snippy_variants_gene_query_results, snippy_gene_query_afumigatus.snippy_variants_gene_query_results, snippy_gene_query_crypto.snippy_variants_gene_query_results, "gs://theiagen-public-files/terra/theiaeuk_files/no_match_detected.txt"]) - String snippy_variants_outdir_tarball = select_first([snippy_cauris.snippy_variants_outdir_tarball, snippy_cauris_ont.snippy_variants_outdir_tarball, snippy_afumigatus.snippy_variants_outdir_tarball, snippy_crypto.snippy_variants_outdir_tarball, "gs://theiagen-public-files/terra/theiaeuk_files/no_match_detected.txt"]) - String snippy_variants_results = select_first([snippy_cauris.snippy_variants_results, snippy_cauris_ont.snippy_variants_results,snippy_afumigatus.snippy_variants_results, snippy_crypto.snippy_variants_results, "gs://theiagen-public-files/terra/theiaeuk_files/no_match_detected.txt"]) - String snippy_variants_bam = select_first([snippy_cauris.snippy_variants_bam, snippy_cauris_ont.snippy_variants_bam, snippy_afumigatus.snippy_variants_bam, snippy_crypto.snippy_variants_bam, "gs://theiagen-public-files/terra/theiaeuk_files/no_match_detected.txt"]) - String snippy_variants_bai = select_first([snippy_cauris.snippy_variants_bai, snippy_cauris_ont.snippy_variants_bai, snippy_afumigatus.snippy_variants_bai, snippy_crypto.snippy_variants_bai, "gs://theiagen-public-files/terra/theiaeuk_files/no_match_detected.txt"]) - String snippy_variants_summary = select_first([snippy_cauris.snippy_variants_summary, snippy_cauris_ont.snippy_variants_summary, snippy_afumigatus.snippy_variants_summary, snippy_crypto.snippy_variants_summary, "gs://theiagen-public-files/terra/theiaeuk_files/no_match_detected.txt"]) - String snippy_variants_num_reads_aligned = select_first([snippy_cauris.snippy_variants_num_reads_aligned, snippy_cauris_ont.snippy_variants_num_reads_aligned, snippy_afumigatus.snippy_variants_num_reads_aligned, snippy_crypto.snippy_variants_num_reads_aligned, "No matching taxon detected"]) - String snippy_variants_coverage_tsv = select_first([snippy_cauris.snippy_variants_coverage_tsv, snippy_cauris_ont.snippy_variants_coverage_tsv, snippy_afumigatus.snippy_variants_coverage_tsv, snippy_crypto.snippy_variants_coverage_tsv, "gs://theiagen-public-files/terra/theiaeuk_files/no_match_detected.txt"]) - String snippy_variants_num_variants = select_first([snippy_cauris.snippy_variants_num_variants, snippy_cauris_ont.snippy_variants_num_variants, snippy_afumigatus.snippy_variants_num_variants, snippy_crypto.snippy_variants_num_reads_aligned, "No matching taxon detected"]) - String snippy_variants_percent_ref_coverage = select_first([snippy_cauris.snippy_variants_percent_ref_coverage, snippy_cauris_ont.snippy_variants_percent_ref_coverage, snippy_afumigatus.snippy_variants_percent_ref_coverage, snippy_crypto.snippy_variants_percent_ref_coverage, "No matching taxon detected"]) + String snippy_variants_outdir_tarball = select_first([snippy_cauris.snippy_variants_outdir_tarball, snippy_cauris_ont.snippy_variants_outdir_tarball, snippy_afumigatus.snippy_variants_outdir_tarball, snippy_afumigatus_ont.snippy_variants_outdir_tarball, snippy_crypto.snippy_variants_outdir_tarball, snippy_crypto_ont.snippy_variants_outdir_tarball, "gs://theiagen-public-files/terra/theiaeuk_files/no_match_detected.txt"]) + String snippy_variants_results = select_first([snippy_cauris.snippy_variants_results, snippy_cauris_ont.snippy_variants_results,snippy_afumigatus.snippy_variants_results, snippy_afumigatus_ont.snippy_variants_results, snippy_crypto.snippy_variants_results, snippy_crypto_ont.snippy_variants_results, "gs://theiagen-public-files/terra/theiaeuk_files/no_match_detected.txt"]) + String snippy_variants_bam = select_first([snippy_cauris.snippy_variants_bam, snippy_cauris_ont.snippy_variants_bam, snippy_afumigatus.snippy_variants_bam, snippy_afumigatus_ont.snippy_variants_bam, snippy_crypto.snippy_variants_bam, snippy_crypto_ont.snippy_variants_bam, "gs://theiagen-public-files/terra/theiaeuk_files/no_match_detected.txt"]) + String snippy_variants_bai = select_first([snippy_cauris.snippy_variants_bai, snippy_cauris_ont.snippy_variants_bai, snippy_afumigatus.snippy_variants_bai, snippy_afumigatus_ont.snippy_variants_bai, snippy_crypto.snippy_variants_bai, snippy_crypto_ont.snippy_variants_bai, "gs://theiagen-public-files/terra/theiaeuk_files/no_match_detected.txt"]) + String snippy_variants_summary = select_first([snippy_cauris.snippy_variants_summary, snippy_cauris_ont.snippy_variants_summary, snippy_afumigatus.snippy_variants_summary, snippy_afumigatus_ont.snippy_variants_summary, snippy_crypto.snippy_variants_summary, snippy_crypto_ont.snippy_variants_summary, "gs://theiagen-public-files/terra/theiaeuk_files/no_match_detected.txt"]) + String snippy_variants_num_reads_aligned = select_first([snippy_cauris.snippy_variants_num_reads_aligned, snippy_cauris_ont.snippy_variants_num_reads_aligned, snippy_afumigatus.snippy_variants_num_reads_aligned, snippy_afumigatus_ont.snippy_variants_num_reads_aligned, snippy_crypto.snippy_variants_num_reads_aligned, snippy_crypto_ont.snippy_variants_num_reads_aligned, "No matching taxon detected"]) + String snippy_variants_coverage_tsv = select_first([snippy_cauris.snippy_variants_coverage_tsv, snippy_cauris_ont.snippy_variants_coverage_tsv, snippy_afumigatus.snippy_variants_coverage_tsv, snippy_afumigatus_ont.snippy_variants_coverage_tsv, snippy_crypto.snippy_variants_coverage_tsv, snippy_crypto_ont.snippy_variants_coverage_tsv, "gs://theiagen-public-files/terra/theiaeuk_files/no_match_detected.txt"]) + String snippy_variants_num_variants = select_first([snippy_cauris.snippy_variants_num_variants, snippy_cauris_ont.snippy_variants_num_variants, snippy_afumigatus.snippy_variants_num_variants, snippy_afumigatus_ont.snippy_variants_num_variants, snippy_crypto.snippy_variants_num_reads_aligned, snippy_crypto_ont.snippy_variants_num_variants, "No matching taxon detected"]) + String snippy_variants_percent_ref_coverage = select_first([snippy_cauris.snippy_variants_percent_ref_coverage, snippy_cauris_ont.snippy_variants_percent_ref_coverage, snippy_afumigatus.snippy_variants_percent_ref_coverage, snippy_afumigatus_ont.snippy_variants_percent_ref_coverage, snippy_crypto.snippy_variants_percent_ref_coverage, snippy_crypto_ont.snippy_variants_percent_ref_coverage, "No matching taxon detected"]) } } \ No newline at end of file From e23e9ab9255fb4ca3a5614162751cfd3d683bf29 Mon Sep 17 00:00:00 2001 From: Michal-Babins Date: Fri, 18 Oct 2024 18:41:54 +0000 Subject: [PATCH 13/13] updated md5 sum for merlin magic --- tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml | 2 +- tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml index bae74e2e5..fc163f307 100644 --- a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml +++ b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_pe.yml @@ -633,7 +633,7 @@ - path: miniwdl_run/wdl/workflows/theiaprok/wf_theiaprok_illumina_pe.wdl md5sum: 6d9dd969e2144ca23f2a0e101e6b6966 - path: miniwdl_run/wdl/workflows/utilities/wf_merlin_magic.wdl - md5sum: ba96a542bb0896569fee6136f712fdd1 + md5sum: 75f2c3825fcce2931d7c37c3bdbd74b8 - path: miniwdl_run/wdl/workflows/utilities/wf_read_QC_trim_pe.wdl contains: ["version", "QC", "output"] - path: miniwdl_run/workflow.log diff --git a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml index 3a891aaa8..a23424aa7 100644 --- a/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml +++ b/tests/workflows/theiaprok/test_wf_theiaprok_illumina_se.yml @@ -596,7 +596,7 @@ - path: miniwdl_run/wdl/workflows/theiaprok/wf_theiaprok_illumina_se.wdl md5sum: 5aa25e4fad466f92c96a7c138aca0d20 - path: miniwdl_run/wdl/workflows/utilities/wf_merlin_magic.wdl - md5sum: ba96a542bb0896569fee6136f712fdd1 + md5sum: 75f2c3825fcce2931d7c37c3bdbd74b8 - path: miniwdl_run/wdl/workflows/utilities/wf_read_QC_trim_se.wdl md5sum: d11bfe33fdd96eab28892be5a01c1c7d - path: miniwdl_run/workflow.log