Skip to content

Commit

Permalink
Merge pull request #144 from theiagen/smw-core-genome-dev
Browse files Browse the repository at this point in the history
Snippy_Tree `core_genome` default value
  • Loading branch information
kapsakcj authored Aug 17, 2023
2 parents cfe1f29 + 6237f63 commit e018118
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 29 deletions.
29 changes: 10 additions & 19 deletions tasks/phylogenetic_inference/task_iqtree2.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ task iqtree2 {
Int iqtree2_bootstraps = 1000 # Ultrafast bootstrap replicates
Int alrt = 1000 # SH-like approximate likelihood ratio test (SH-aLRT) replicates
String? iqtree2_opts
Boolean? core_genome

String docker = "us-docker.pkg.dev/general-theiagen/staphb/iqtree2:2.1.2"
Int disk_size = 100
Expand All @@ -22,30 +21,19 @@ task iqtree2 {
# multiple sed statements to get down to a string that is just "version 2.1.2"
iqtree2 --version | grep version | sed 's|.*version|version|;s| COVID-edition for Linux.*||' | tee VERSION

# if iqtree2_model is set by user, use that String input
# check if iqtree2_model input is set and output for sanity
if [ -n "~{iqtree2_model}" ]; then
echo "user provided an iqtree2_model string input, will use this for running iqtree2"
echo "DEBUG: User provided iqtree2_model ~{iqtree2_model}, will use this for running iqtree2"
IQTREE2_MODEL="~{iqtree2_model}"
else
echo "User did not supply an iqtree2_model input, setting based on boolean core_genome"

# if iqtree2_model is NOT set by user, then set iqtree2_model based on boolean core_genome
# if core_genome is set to TRUE, then use model "GTR+G"
if [[ "~{core_genome}" == true ]]; then
echo "core_genome boolean was set to true, so using iqtree2_model GTR+G"
IQTREE2_MODEL="GTR+G"
elif [ "~{core_genome}" == false ]; then
echo "core_genome boolean was set to false, so using iqtree2_model GTR+I+G"
IQTREE2_MODEL="GTR+I+G"
else
echo "iqtree2_model was not specified by user AND core_genome was not specified, so we will use the default setting from iqtree2"
fi
echo "DEBUG: User did not supply an iqtree2_model input, will use iqtree2's model finder"
fi

# sanity check
echo "IQTREE2_MODEL is set to:" ${IQTREE2_MODEL}
echo "DEBUG: IQTREE2_MODEL is set to: " ${IQTREE2_MODEL}

numGenomes=`grep -o '>' ~{alignment} | wc -l`
# make sure there are more than 3 genomes in the dataset
numGenomes=$(grep -o '>' ~{alignment} | wc -l)
if [ "$numGenomes" -gt 3 ]; then
cp ~{alignment} ./msa.fasta

Expand All @@ -56,6 +44,7 @@ task iqtree2 {
# -bb : number of bootstrap replicates
# -alrt : number of replicates to perform SH-like approximate likelihood ration test
if [[ -v IQTREE2_MODEL ]] ; then # iqtree2 model set; use -m tag
echo "DEBUG: running iqtree2 with the -m flag which is used to provide a model; user-specified " ${IQTREE2_MODEL}
iqtree2 \
-nt AUTO \
-s msa.fasta \
Expand All @@ -67,7 +56,7 @@ task iqtree2 {
echo ${IQTREE2_MODEL} | tee IQTREE2_MODEL.TXT

else # iqtree model is not set; do not use -m tag
echo "running iqtree2 without the -m flag for providing a model. Will default to iqtree2 default; for DNA this is HKY+F"
echo "DEBUG: running iqtree2 without the -m flag which is used to provide a model. Will default to iqtree2 default (Model Finder)"
iqtree2 \
-nt AUTO \
-s msa.fasta \
Expand All @@ -82,6 +71,8 @@ task iqtree2 {

# rename the final output newick file
cp -v msa.fasta.contree ~{cluster_name}_iqtree.nwk
else
echo "DEBUG: not enough genomes provided; more than 3 are required to run iqtree2"
fi
>>>
output {
Expand Down
23 changes: 13 additions & 10 deletions workflows/phylogenetics/wf_snippy_tree.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -19,27 +19,34 @@ workflow snippy_tree_wf {
Array[String] samplenames
File reference_genome_file
Boolean use_gubbins = true
Boolean? core_genome
Boolean core_genome = true

String? data_summary_terra_project
String? data_summary_terra_workspace
String? data_summary_terra_table
String? data_summary_column_names # comma delimited
# the following parameters are exposed to allow modification in snippy_streamline
String? snippy_core_docker
Int? snippy_core_cpu
Int? snippy_core_disk_size
Int? snippy_core_memory

Int? gubbins_disk_size
Int? gubbins_memory
Int? gubbins_cpu
String? gubbins_docker

Int? iqtree2_cpu
Int? iqtree2_memory
Int? iqtree2_disk_size
String? iqtree2_opts
String? iqtree2_docker
Int? iqtree2_bootstraps
String? iqtree2_model

String? snp_dists_docker

Int? snp_sites_cpus
Int? snp_sites_disk_size
Int? snp_sites_memory
Expand Down Expand Up @@ -67,10 +74,7 @@ workflow snippy_tree_wf {
cpu = gubbins_cpu
}
}
# select first here is so that the optional boolean input 'core_genome' is coerced into being required
# if user does not specify core_genome or they specify false, this block will be skipped
# if user DOES specify core_genome as true, then the snp_sites task will be called/utilized
if (select_first([core_genome, false])) {
if (core_genome) {
call snp_sites_task.snp_sites as snp_sites {
input:
# hardcoding some of the snp-sites optional outputs to false,
Expand All @@ -92,14 +96,13 @@ workflow snippy_tree_wf {
input:
alignment = select_first([snp_sites.snp_sites_multifasta, gubbins.gubbins_polymorphic_fasta, snippy_core.snippy_full_alignment_clean]),
cluster_name = tree_name,
iqtree2_model = iqtree2_model,
iqtree2_opts = iqtree2_opts,
iqtree2_bootstraps = iqtree2_bootstraps,
docker = iqtree2_docker,
cpu = iqtree2_cpu,
memory = iqtree2_memory,
disk_size = iqtree2_disk_size,
iqtree2_model = iqtree2_model,
core_genome = core_genome,
iqtree2_opts = iqtree2_opts,
iqtree2_bootstraps = iqtree2_bootstraps
disk_size = iqtree2_disk_size
}
call snp_dists_task.snp_dists {
input:
Expand Down

0 comments on commit e018118

Please sign in to comment.