diff --git a/tasks/phylogenetic_inference/task_iqtree2.wdl b/tasks/phylogenetic_inference/task_iqtree2.wdl index 28ad52022..362dc40c2 100644 --- a/tasks/phylogenetic_inference/task_iqtree2.wdl +++ b/tasks/phylogenetic_inference/task_iqtree2.wdl @@ -8,7 +8,6 @@ task iqtree2 { Int iqtree2_bootstraps = 1000 # Ultrafast bootstrap replicates Int alrt = 1000 # SH-like approximate likelihood ratio test (SH-aLRT) replicates String? iqtree2_opts - Boolean? core_genome String docker = "us-docker.pkg.dev/general-theiagen/staphb/iqtree2:2.1.2" Int disk_size = 100 @@ -22,30 +21,19 @@ task iqtree2 { # multiple sed statements to get down to a string that is just "version 2.1.2" iqtree2 --version | grep version | sed 's|.*version|version|;s| COVID-edition for Linux.*||' | tee VERSION - # if iqtree2_model is set by user, use that String input + # check if iqtree2_model input is set and output for sanity if [ -n "~{iqtree2_model}" ]; then - echo "user provided an iqtree2_model string input, will use this for running iqtree2" + echo "DEBUG: User provided iqtree2_model ~{iqtree2_model}, will use this for running iqtree2" IQTREE2_MODEL="~{iqtree2_model}" else - echo "User did not supply an iqtree2_model input, setting based on boolean core_genome" - - # if iqtree2_model is NOT set by user, then set iqtree2_model based on boolean core_genome - # if core_genome is set to TRUE, then use model "GTR+G" - if [[ "~{core_genome}" == true ]]; then - echo "core_genome boolean was set to true, so using iqtree2_model GTR+G" - IQTREE2_MODEL="GTR+G" - elif [ "~{core_genome}" == false ]; then - echo "core_genome boolean was set to false, so using iqtree2_model GTR+I+G" - IQTREE2_MODEL="GTR+I+G" - else - echo "iqtree2_model was not specified by user AND core_genome was not specified, so we will use the default setting from iqtree2" - fi + echo "DEBUG: User did not supply an iqtree2_model input, will use iqtree2's model finder" fi # sanity check - echo "IQTREE2_MODEL is set to:" ${IQTREE2_MODEL} + echo "DEBUG: IQTREE2_MODEL is set to: " ${IQTREE2_MODEL} - numGenomes=`grep -o '>' ~{alignment} | wc -l` + # make sure there are more than 3 genomes in the dataset + numGenomes=$(grep -o '>' ~{alignment} | wc -l) if [ "$numGenomes" -gt 3 ]; then cp ~{alignment} ./msa.fasta @@ -56,6 +44,7 @@ task iqtree2 { # -bb : number of bootstrap replicates # -alrt : number of replicates to perform SH-like approximate likelihood ration test if [[ -v IQTREE2_MODEL ]] ; then # iqtree2 model set; use -m tag + echo "DEBUG: running iqtree2 with the -m flag which is used to provide a model; user-specified " ${IQTREE2_MODEL} iqtree2 \ -nt AUTO \ -s msa.fasta \ @@ -67,7 +56,7 @@ task iqtree2 { echo ${IQTREE2_MODEL} | tee IQTREE2_MODEL.TXT else # iqtree model is not set; do not use -m tag - echo "running iqtree2 without the -m flag for providing a model. Will default to iqtree2 default; for DNA this is HKY+F" + echo "DEBUG: running iqtree2 without the -m flag which is used to provide a model. Will default to iqtree2 default (Model Finder)" iqtree2 \ -nt AUTO \ -s msa.fasta \ @@ -82,6 +71,8 @@ task iqtree2 { # rename the final output newick file cp -v msa.fasta.contree ~{cluster_name}_iqtree.nwk + else + echo "DEBUG: not enough genomes provided; more than 3 are required to run iqtree2" fi >>> output { diff --git a/workflows/phylogenetics/wf_snippy_tree.wdl b/workflows/phylogenetics/wf_snippy_tree.wdl index 09a5842c8..2b5aec4be 100644 --- a/workflows/phylogenetics/wf_snippy_tree.wdl +++ b/workflows/phylogenetics/wf_snippy_tree.wdl @@ -19,19 +19,24 @@ workflow snippy_tree_wf { Array[String] samplenames File reference_genome_file Boolean use_gubbins = true - Boolean? core_genome + Boolean core_genome = true + String? data_summary_terra_project String? data_summary_terra_workspace String? data_summary_terra_table String? data_summary_column_names # comma delimited + + # the following parameters are exposed to allow modification in snippy_streamline String? snippy_core_docker Int? snippy_core_cpu Int? snippy_core_disk_size Int? snippy_core_memory + Int? gubbins_disk_size Int? gubbins_memory Int? gubbins_cpu String? gubbins_docker + Int? iqtree2_cpu Int? iqtree2_memory Int? iqtree2_disk_size @@ -39,7 +44,9 @@ workflow snippy_tree_wf { String? iqtree2_docker Int? iqtree2_bootstraps String? iqtree2_model + String? snp_dists_docker + Int? snp_sites_cpus Int? snp_sites_disk_size Int? snp_sites_memory @@ -67,10 +74,7 @@ workflow snippy_tree_wf { cpu = gubbins_cpu } } - # select first here is so that the optional boolean input 'core_genome' is coerced into being required - # if user does not specify core_genome or they specify false, this block will be skipped - # if user DOES specify core_genome as true, then the snp_sites task will be called/utilized - if (select_first([core_genome, false])) { + if (core_genome) { call snp_sites_task.snp_sites as snp_sites { input: # hardcoding some of the snp-sites optional outputs to false, @@ -92,14 +96,13 @@ workflow snippy_tree_wf { input: alignment = select_first([snp_sites.snp_sites_multifasta, gubbins.gubbins_polymorphic_fasta, snippy_core.snippy_full_alignment_clean]), cluster_name = tree_name, + iqtree2_model = iqtree2_model, + iqtree2_opts = iqtree2_opts, + iqtree2_bootstraps = iqtree2_bootstraps, docker = iqtree2_docker, cpu = iqtree2_cpu, memory = iqtree2_memory, - disk_size = iqtree2_disk_size, - iqtree2_model = iqtree2_model, - core_genome = core_genome, - iqtree2_opts = iqtree2_opts, - iqtree2_bootstraps = iqtree2_bootstraps + disk_size = iqtree2_disk_size } call snp_dists_task.snp_dists { input: