diff --git a/.gitignore b/.gitignore index 45df67d33..e7ccb8147 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ -cromwell* \ No newline at end of file +cromwell* +_LAST +2024* \ No newline at end of file diff --git a/tasks/taxon_id/task_nextclade.wdl b/tasks/taxon_id/task_nextclade.wdl index 6d1043d9e..32e661ba3 100644 --- a/tasks/taxon_id/task_nextclade.wdl +++ b/tasks/taxon_id/task_nextclade.wdl @@ -60,6 +60,72 @@ task nextclade { } } +task nextclade_v3 { + meta { + description: "Nextclade classification of one sample. Leaving optional inputs unspecified will use SARS-CoV-2 defaults." + } + input { + File genome_fasta + File? auspice_reference_tree_json + File? gene_annotations_gff + File? nextclade_pathogen_json + File? input_ref + String docker = "us-docker.pkg.dev/general-theiagen/nextstrain/nextclade:3.3.1" + String dataset_name + String verbosity = "warn" # other options are: "off" "error" "info" "debug" and "trace" + String dataset_tag + Int disk_size = 50 + Int memory = 4 + Int cpu = 2 + } + String basename = basename(genome_fasta, ".fasta") + command <<< + # track version & print to log + nextclade --version | tee NEXTCLADE_VERSION + + # --reference no longer used in v3. consolidated into --name and --tag + nextclade dataset get \ + --name="~{dataset_name}" \ + --tag="~{dataset_tag}" \ + -o nextclade_dataset_dir \ + --verbosity ~{verbosity} + + # exit script/task upon error + set -e + + # not necessary to include `--jobs ` in v3. Nextclade will use all available CPU threads by default. It's fast so I don't think we will need to change unless we see errors + nextclade run \ + --input-dataset nextclade_dataset_dir/ \ + ~{"--input-ref " + input_ref} \ + ~{"--input-tree " + auspice_reference_tree_json} \ + ~{"--input-pathogen-json " + nextclade_pathogen_json} \ + ~{"--input-annotation " + gene_annotations_gff} \ + --output-json "~{basename}".nextclade.json \ + --output-tsv "~{basename}".nextclade.tsv \ + --output-tree "~{basename}".nextclade.auspice.json \ + --output-all . \ + --verbosity ~{verbosity} \ + "~{genome_fasta}" + >>> + runtime { + docker: "~{docker}" + memory: "~{memory} GB" + cpu: cpu + disks: "local-disk " + disk_size + " SSD" + disk: disk_size + " GB" # TES + dx_instance_type: "mem1_ssd1_v2_x2" + maxRetries: 3 + } + output { + String nextclade_version = read_string("NEXTCLADE_VERSION") + File nextclade_json = "~{basename}.nextclade.json" + File auspice_json = "~{basename}.nextclade.auspice.json" + File nextclade_tsv = "~{basename}.nextclade.tsv" + String nextclade_docker = docker + String nextclade_dataset_tag = "~{dataset_tag}" + } +} + task nextclade_output_parser { meta { description: "Python and bash codeblocks for parsing the output files from Nextclade." @@ -163,52 +229,49 @@ task nextclade_add_ref { } input { File genome_fasta - File? root_sequence File? reference_tree_json - File? qc_config_json + File? nextclade_pathogen_json File? gene_annotations_gff - File? pcr_primers_csv - File? virus_properties - String docker = "us-docker.pkg.dev/general-theiagen/nextstrain/nextclade:2.14.0" + File? input_ref + String docker = "us-docker.pkg.dev/general-theiagen/nextstrain/nextclade:3.3.1" String dataset_name - String? dataset_reference String? dataset_tag - Int disk_size = 50 - Int memory = 8 + String verbosity = "warn" # other options are: "off" "error" "info" "debug" and "trace" + Int disk_size = 100 + Int memory = 4 Int cpu = 2 } String basename = basename(genome_fasta, ".fasta") command <<< - NEXTCLADE_VERSION="$(nextclade --version)" - echo $NEXTCLADE_VERSION > NEXTCLADE_VERSION + # track version & print to log + nextclade --version | tee NEXTCLADE_VERSION + echo "DEBUG: downloading nextclade dataset..." nextclade dataset get \ --name="~{dataset_name}" \ - ~{"--reference " + dataset_reference} \ ~{"--tag " + dataset_tag} \ -o nextclade_dataset_dir \ - --verbose + --verbosity ~{verbosity} - # If no referece sequence is provided, use the reference tree from the dataset + # If no reference sequence is provided, use the reference tree from the dataset if [ -z "~{reference_tree_json}" ]; then echo "Default dataset reference tree JSON will be used" - cp nextclade_dataset_dir/tree.json reference_tree.json + cp -v nextclade_dataset_dir/tree.json reference_tree.json else echo "User reference tree JSON will be used" - cp ~{reference_tree_json} reference_tree.json + cp -v ~{reference_tree_json} reference_tree.json fi tree_json="reference_tree.json" set -e + echo "DEBUG: running nextclade..." nextclade run \ - --input-dataset=nextclade_dataset_dir/ \ - ~{"--input-root-seq " + root_sequence} \ + --input-dataset nextclade_dataset_dir/ \ --input-tree ${tree_json} \ - ~{"--input-qc-config " + qc_config_json} \ - ~{"--input-gene-map " + gene_annotations_gff} \ - ~{"--input-pcr-primers " + pcr_primers_csv} \ - ~{"--input-virus-properties " + virus_properties} \ + ~{"--input-pathogen-json " + nextclade_pathogen_json} \ + ~{"--input-annotation " + gene_annotations_gff} \ + ~{"--input-ref " + input_ref} \ --output-json "~{basename}".nextclade.json \ --output-tsv "~{basename}".nextclade.tsv \ --output-tree "~{basename}".nextclade.auspice.json \ diff --git a/tests/workflows/theiacov/test_wf_theiacov_clearlabs.yml b/tests/workflows/theiacov/test_wf_theiacov_clearlabs.yml index 3b3222019..32ca74e19 100644 --- a/tests/workflows/theiacov/test_wf_theiacov_clearlabs.yml +++ b/tests/workflows/theiacov/test_wf_theiacov_clearlabs.yml @@ -212,67 +212,58 @@ - path: miniwdl_run/call-ncbi_scrub_se/work/clearlabs_R1_dehosted.fastq.gz - path: miniwdl_run/call-ncbi_scrub_se/work/r1.fastq - path: miniwdl_run/call-ncbi_scrub_se/work/r1.fastq.clean - - path: miniwdl_run/call-nextclade/command - - path: miniwdl_run/call-nextclade/inputs.json + - path: miniwdl_run/call-nextclade_v3/command + - path: miniwdl_run/call-nextclade_v3/inputs.json contains: ["dataset_name", "dataset_tag", "genome_fasta"] - - path: miniwdl_run/call-nextclade/outputs.json + - path: miniwdl_run/call-nextclade_v3/outputs.json contains: ["nextclade", "nextclade_json", "nextclade_version"] - - path: miniwdl_run/call-nextclade/stderr.txt - - path: miniwdl_run/call-nextclade/stderr.txt.offset - - path: miniwdl_run/call-nextclade/stdout.txt - - path: miniwdl_run/call-nextclade/task.log + - path: miniwdl_run/call-nextclade_v3/stderr.txt + - path: miniwdl_run/call-nextclade_v3/stderr.txt.offset + - path: miniwdl_run/call-nextclade_v3/stdout.txt + - path: miniwdl_run/call-nextclade_v3/task.log contains: ["wdl", "theiacov_clearlabs", "nextclade", "done"] - - path: miniwdl_run/call-nextclade/work/NEXTCLADE_VERSION - md5sum: 91a455762183b41af0d8de5596e28e7f - - path: miniwdl_run/call-nextclade/work/_miniwdl_inputs/0/clearlabs.medaka.consensus.fasta + - path: miniwdl_run/call-nextclade_v3/work/NEXTCLADE_VERSION + md5sum: 70aa6879bf9f0e8ba2b9953b0d4a2216 + - path: miniwdl_run/call-nextclade_v3/work/_miniwdl_inputs/0/clearlabs.medaka.consensus.fasta md5sum: d41d8cd98f00b204e9800998ecf8427e - - path: miniwdl_run/call-nextclade/work/clearlabs.medaka.consensus.nextclade.auspice.json - - path: miniwdl_run/call-nextclade/work/clearlabs.medaka.consensus.nextclade.json - - path: miniwdl_run/call-nextclade/work/clearlabs.medaka.consensus.nextclade.tsv - - path: miniwdl_run/call-nextclade/work/nextclade.aligned.fasta + - path: miniwdl_run/call-nextclade_v3/work/clearlabs.medaka.consensus.nextclade.auspice.json + - path: miniwdl_run/call-nextclade_v3/work/clearlabs.medaka.consensus.nextclade.json + - path: miniwdl_run/call-nextclade_v3/work/clearlabs.medaka.consensus.nextclade.tsv + - path: miniwdl_run/call-nextclade_v3/work/nextclade.aligned.fasta md5sum: eb18c508f26125851279f2c03d4a336c - - path: miniwdl_run/call-nextclade/work/nextclade.csv - - path: miniwdl_run/call-nextclade/work/nextclade.errors.csv - md5sum: 2d1dad70d68e56d0a1191900c17061bc - - path: miniwdl_run/call-nextclade/work/nextclade.insertions.csv - md5sum: 3fb6db0807dc663e2821e0bbbccdc5aa - - path: miniwdl_run/call-nextclade/work/nextclade.ndjson - - path: miniwdl_run/call-nextclade/work/nextclade_dataset_dir/genemap.gff - md5sum: b4bd70a3779718e556a17360a41dce90 - - path: miniwdl_run/call-nextclade/work/nextclade_dataset_dir/primers.csv - md5sum: 5990c3483bf66ce607aeb90a44e7ef2e - - path: miniwdl_run/call-nextclade/work/nextclade_dataset_dir/qc.json - md5sum: b01f4491a54941fea12ec5b04a10fb8c - - path: miniwdl_run/call-nextclade/work/nextclade_dataset_dir/reference.fasta + - path: miniwdl_run/call-nextclade_v3/work/nextclade.csv + - path: miniwdl_run/call-nextclade_v3/work/nextclade.ndjson + - path: miniwdl_run/call-nextclade_v3/work/nextclade_dataset_dir/genome_annotation.gff3 + md5sum: 4dff84d2d6ada820e0e3a8bc6798d402 + - path: miniwdl_run/call-nextclade_v3/work/nextclade_dataset_dir/pathogen.json + md5sum: 9f99ba19333ff907af307611fbb73e21 + - path: miniwdl_run/call-nextclade_v3/work/nextclade_dataset_dir/reference.fasta md5sum: c7ce05f28e4ec0322c96f24e064ef55c - - path: miniwdl_run/call-nextclade/work/nextclade_dataset_dir/sequences.fasta - - path: miniwdl_run/call-nextclade/work/nextclade_dataset_dir/tag.json - - path: miniwdl_run/call-nextclade/work/nextclade_dataset_dir/tree.json - - path: miniwdl_run/call-nextclade/work/nextclade_dataset_dir/virus_properties.json - md5sum: 03bd2f9d33326299b5b49b4910d84183 - - path: miniwdl_run/call-nextclade/work/nextclade_gene_E.translation.fasta + - path: miniwdl_run/call-nextclade_v3/work/nextclade_dataset_dir/sequences.fasta + - path: miniwdl_run/call-nextclade_v3/work/nextclade_dataset_dir/tree.json + - path: miniwdl_run/call-nextclade_v3/work/nextclade.cds_translation.E.fasta md5sum: 14808ad8b34c8bac7de500707400250e - - path: miniwdl_run/call-nextclade/work/nextclade_gene_M.translation.fasta + - path: miniwdl_run/call-nextclade_v3/work/nextclade.cds_translation.M.fasta md5sum: 4799e5af880d2005da56342d6a9d64ab - - path: miniwdl_run/call-nextclade/work/nextclade_gene_N.translation.fasta + - path: miniwdl_run/call-nextclade_v3/work/nextclade.cds_translation.N.fasta md5sum: bbc46cedb153b3213a9cf8f425dd906c - - path: miniwdl_run/call-nextclade/work/nextclade_gene_ORF1a.translation.fasta + - path: miniwdl_run/call-nextclade_v3/work/nextclade.cds_translation.ORF1a.fasta md5sum: 0c1b1bbcbcfe86d10c466bf63fca5c11 - - path: miniwdl_run/call-nextclade/work/nextclade_gene_ORF1b.translation.fasta - md5sum: 23a0497efe0ccffaf51b792f40ca5036 - - path: miniwdl_run/call-nextclade/work/nextclade_gene_ORF3a.translation.fasta + - path: miniwdl_run/call-nextclade_v3/work/nextclade.cds_translation.ORF1b.fasta + md5sum: bea75a83074a11fa74c316e4df6a3d9f + - path: miniwdl_run/call-nextclade_v3/work/nextclade.cds_translation.ORF3a.fasta md5sum: 692b2c314c4ff6584a40273dc239cb78 - - path: miniwdl_run/call-nextclade/work/nextclade_gene_ORF6.translation.fasta + - path: miniwdl_run/call-nextclade_v3/work/nextclade.cds_translation.ORF6.fasta md5sum: c1d610f9e45acd3915e40f0d643f0188 - - path: miniwdl_run/call-nextclade/work/nextclade_gene_ORF7a.translation.fasta + - path: miniwdl_run/call-nextclade_v3/work/nextclade.cds_translation.ORF7a.fasta md5sum: a655a6c325b0bc9ad69842fcf2e927a7 - - path: miniwdl_run/call-nextclade/work/nextclade_gene_ORF7b.translation.fasta + - path: miniwdl_run/call-nextclade_v3/work/nextclade.cds_translation.ORF7b.fasta md5sum: 27fd219bb6d18731898a9ddfdee27f67 - - path: miniwdl_run/call-nextclade/work/nextclade_gene_ORF8.translation.fasta + - path: miniwdl_run/call-nextclade_v3/work/nextclade.cds_translation.ORF8.fasta md5sum: 398798980c482562e7c5b21b205e0445 - - path: miniwdl_run/call-nextclade/work/nextclade_gene_ORF9b.translation.fasta + - path: miniwdl_run/call-nextclade_v3/work/nextclade.cds_translation.ORF9b.fasta md5sum: 3d6a949bdcecaf70e9d123651a7a7c5e - - path: miniwdl_run/call-nextclade/work/nextclade_gene_S.translation.fasta + - path: miniwdl_run/call-nextclade_v3/work/nextclade.cds_translation.S.fasta md5sum: 0ce44a0a8e2784ca4b3e8d8f03211813 - path: miniwdl_run/call-nextclade_output_parser/command md5sum: f377fb9fc901d440fa35b1b05317a0e1 diff --git a/tests/workflows/theiacov/test_wf_theiacov_fasta.yml b/tests/workflows/theiacov/test_wf_theiacov_fasta.yml index effb1760a..75fc74d4d 100644 --- a/tests/workflows/theiacov/test_wf_theiacov_fasta.yml +++ b/tests/workflows/theiacov/test_wf_theiacov_fasta.yml @@ -36,67 +36,59 @@ - path: miniwdl_run/call-consensus_qc/work/PERCENT_REF_COVERAGE md5sum: 6808ca805661622ad65ae014a4b2a094 - path: miniwdl_run/call-consensus_qc/work/_miniwdl_inputs/0/clearlabs.fasta.gz - - path: miniwdl_run/call-nextclade/command - md5sum: 5574a2eac479f797176a8ecc305b5e0d - - path: miniwdl_run/call-nextclade/inputs.json - - path: miniwdl_run/call-nextclade/outputs.json - - path: miniwdl_run/call-nextclade/stderr.txt - - path: miniwdl_run/call-nextclade/stderr.txt.offset - - path: miniwdl_run/call-nextclade/stdout.txt - - path: miniwdl_run/call-nextclade/task.log - - path: miniwdl_run/call-nextclade/work/NEXTCLADE_VERSION - md5sum: 91a455762183b41af0d8de5596e28e7f - - path: miniwdl_run/call-nextclade/work/_miniwdl_inputs/0/clearlabs.fasta.gz - - path: miniwdl_run/call-nextclade/work/clearlabs.fasta.gz.nextclade.auspice.json - - path: miniwdl_run/call-nextclade/work/clearlabs.fasta.gz.nextclade.json - - path: miniwdl_run/call-nextclade/work/clearlabs.fasta.gz.nextclade.tsv - md5sum: 9b459df2831ab1974c9c8cef5bc5340e - - path: miniwdl_run/call-nextclade/work/nextclade.aligned.fasta + - path: miniwdl_run/call-nextclade_v3/command + md5sum: 2cd5aabef6c3aaa89a503e59a3b6b36c + - path: miniwdl_run/call-nextclade_v3/inputs.json + - path: miniwdl_run/call-nextclade_v3/outputs.json + - path: miniwdl_run/call-nextclade_v3/stderr.txt + - path: miniwdl_run/call-nextclade_v3/stderr.txt.offset + - path: miniwdl_run/call-nextclade_v3/stdout.txt + - path: miniwdl_run/call-nextclade_v3/task.log + - path: miniwdl_run/call-nextclade_v3/work/NEXTCLADE_VERSION + md5sum: 70aa6879bf9f0e8ba2b9953b0d4a2216 + - path: miniwdl_run/call-nextclade_v3/work/_miniwdl_inputs/0/clearlabs.fasta.gz + - path: miniwdl_run/call-nextclade_v3/work/clearlabs.fasta.gz.nextclade.auspice.json + - path: miniwdl_run/call-nextclade_v3/work/clearlabs.fasta.gz.nextclade.json + - path: miniwdl_run/call-nextclade_v3/work/clearlabs.fasta.gz.nextclade.tsv + md5sum: 3aeae954ba64b8ad7db55e08f9c7131c + - path: miniwdl_run/call-nextclade_v3/work/nextclade.aligned.fasta md5sum: bf487271d506418ea23fe30fc033e44d - - path: miniwdl_run/call-nextclade/work/nextclade.csv - md5sum: 2993ec6e17c617b665d0662488e2d657 - - path: miniwdl_run/call-nextclade/work/nextclade.errors.csv - md5sum: 483415f75e782897d83fc1305f793e21 - - path: miniwdl_run/call-nextclade/work/nextclade.insertions.csv - md5sum: eb3f8ec90ecd728a0b03a14171085702 - - path: miniwdl_run/call-nextclade/work/nextclade.ndjson - - path: miniwdl_run/call-nextclade/work/nextclade_dataset_dir/genemap.gff - md5sum: b4bd70a3779718e556a17360a41dce90 - - path: miniwdl_run/call-nextclade/work/nextclade_dataset_dir/primers.csv - md5sum: 5990c3483bf66ce607aeb90a44e7ef2e - - path: miniwdl_run/call-nextclade/work/nextclade_dataset_dir/qc.json - - path: miniwdl_run/call-nextclade/work/nextclade_dataset_dir/reference.fasta + - path: miniwdl_run/call-nextclade_v3/work/nextclade.csv + md5sum: 50ca5404982b62cbdf077c5d16543e6f + - path: miniwdl_run/call-nextclade_v3/work/nextclade.ndjson + - path: miniwdl_run/call-nextclade_v3/work/nextclade_dataset_dir/genome_annotation.gff3 + md5sum: 4dff84d2d6ada820e0e3a8bc6798d402 + - path: miniwdl_run/call-nextclade_v3/work/nextclade_dataset_dir/pathogen.json + md5sum: 9f99ba19333ff907af307611fbb73e21 + - path: miniwdl_run/call-nextclade_v3/work/nextclade_dataset_dir/reference.fasta md5sum: c7ce05f28e4ec0322c96f24e064ef55c - - path: miniwdl_run/call-nextclade/work/nextclade_dataset_dir/sequences.fasta - md5sum: ea475ab0a62a0a68fc3b1108fdff8a20 - - path: miniwdl_run/call-nextclade/work/nextclade_dataset_dir/tag.json - md5sum: 7c080257b26eb528c72858e369fec68c - - path: miniwdl_run/call-nextclade/work/nextclade_dataset_dir/tree.json - md5sum: 6b227b0aeeda07a187f5879e810879a3 - - path: miniwdl_run/call-nextclade/work/nextclade_dataset_dir/virus_properties.json - - path: miniwdl_run/call-nextclade/work/nextclade_gene_E.translation.fasta + - path: miniwdl_run/call-nextclade_v3/work/nextclade_dataset_dir/sequences.fasta + md5sum: c2a4d6cbb837dce22d81f9c36dd0629e + - path: miniwdl_run/call-nextclade_v3/work/nextclade_dataset_dir/tree.json + md5sum: ae2f621cbd025d28389282ed5403fedc + - path: miniwdl_run/call-nextclade_v3/work/nextclade.cds_translation.E.fasta md5sum: dc43b1e98245a25c142aec52b29a07df - - path: miniwdl_run/call-nextclade/work/nextclade_gene_M.translation.fasta + - path: miniwdl_run/call-nextclade_v3/work/nextclade.cds_translation.M.fasta md5sum: 440de8feeb02bd06878c63319513fb74 - - path: miniwdl_run/call-nextclade/work/nextclade_gene_N.translation.fasta + - path: miniwdl_run/call-nextclade_v3/work/nextclade.cds_translation.N.fasta md5sum: 8a45a3246eb19643cb305a29a3f7f712 - - path: miniwdl_run/call-nextclade/work/nextclade_gene_ORF1a.translation.fasta + - path: miniwdl_run/call-nextclade_v3/work/nextclade.cds_translation.ORF1a.fasta md5sum: b2f11d3bc5674501f9231bfe37e87412 - - path: miniwdl_run/call-nextclade/work/nextclade_gene_ORF1b.translation.fasta + - path: miniwdl_run/call-nextclade_v3/work/nextclade.cds_translation.ORF1b.fasta md5sum: 7597678bc323068f31cd6cc726384903 - - path: miniwdl_run/call-nextclade/work/nextclade_gene_ORF3a.translation.fasta + - path: miniwdl_run/call-nextclade_v3/work/nextclade.cds_translation.ORF3a.fasta md5sum: 13a0d63ae18a7975413b54a97cfa0dd5 - - path: miniwdl_run/call-nextclade/work/nextclade_gene_ORF6.translation.fasta + - path: miniwdl_run/call-nextclade_v3/work/nextclade.cds_translation.ORF6.fasta md5sum: ae20a4dae100edd4033cb8af32a4d0bc - - path: miniwdl_run/call-nextclade/work/nextclade_gene_ORF7a.translation.fasta + - path: miniwdl_run/call-nextclade_v3/work/nextclade.cds_translation.ORF7a.fasta md5sum: 31d03e19799bc52da442217a5e1f1313 - - path: miniwdl_run/call-nextclade/work/nextclade_gene_ORF7b.translation.fasta + - path: miniwdl_run/call-nextclade_v3/work/nextclade.cds_translation.ORF7b.fasta md5sum: 5a505858730e58ac19c052ea2ee84517 - - path: miniwdl_run/call-nextclade/work/nextclade_gene_ORF8.translation.fasta + - path: miniwdl_run/call-nextclade_v3/work/nextclade.cds_translation.ORF8.fasta md5sum: 0e8b98d96c0d75386f08755ea9b3899d - - path: miniwdl_run/call-nextclade/work/nextclade_gene_ORF9b.translation.fasta + - path: miniwdl_run/call-nextclade_v3/work/nextclade.cds_translation.ORF9b.fasta md5sum: fb478d9f262d6a4b3ca5554e4b6654b9 - - path: miniwdl_run/call-nextclade/work/nextclade_gene_S.translation.fasta + - path: miniwdl_run/call-nextclade_v3/work/nextclade.cds_translation.S.fasta md5sum: e630a638abbb2c8ab4a8b74455e9668f - path: miniwdl_run/call-nextclade_output_parser/command md5sum: 0805cf58414b2ffaf3ea5e53f85a5f3d @@ -119,7 +111,7 @@ - path: miniwdl_run/call-nextclade_output_parser/work/_miniwdl_inputs/0/clearlabs.fasta.gz.nextclade.tsv md5sum: d41d8cd98f00b204e9800998ecf8427e - path: miniwdl_run/call-nextclade_output_parser/work/input.tsv - md5sum: 9b459df2831ab1974c9c8cef5bc5340e + md5sum: 3aeae954ba64b8ad7db55e08f9c7131c - path: miniwdl_run/call-pangolin4/command md5sum: b9c36681b77c5e007bf7e890265d70eb - path: miniwdl_run/call-pangolin4/inputs.json diff --git a/tests/workflows/theiacov/test_wf_theiacov_illumina_pe.yml b/tests/workflows/theiacov/test_wf_theiacov_illumina_pe.yml index a3a373f45..731542509 100644 --- a/tests/workflows/theiacov/test_wf_theiacov_illumina_pe.yml +++ b/tests/workflows/theiacov/test_wf_theiacov_illumina_pe.yml @@ -373,66 +373,57 @@ - path: miniwdl_run/call-pangolin4/work/_miniwdl_inputs/0/SRR13687078.ivar.consensus.fasta md5sum: d41d8cd98f00b204e9800998ecf8427e # nextclade - - path: miniwdl_run/call-nextclade/command - md5sum: c8f44c8628e995221576f5593f7230e0 - - path: miniwdl_run/call-nextclade/inputs.json + - path: miniwdl_run/call-nextclade_v3/command + md5sum: 2c430469c2ae6ef311ccd1dd11e6dbe5 + - path: miniwdl_run/call-nextclade_v3/inputs.json contains: ["dataset_name", "dataset_tag", "genome_fasta"] - - path: miniwdl_run/call-nextclade/outputs.json + - path: miniwdl_run/call-nextclade_v3/outputs.json contains: ["nextclade_json", "nextclade_version"] - - path: miniwdl_run/call-nextclade/stderr.txt - - path: miniwdl_run/call-nextclade/stderr.txt.offset - - path: miniwdl_run/call-nextclade/stdout.txt - - path: miniwdl_run/call-nextclade/task.log + - path: miniwdl_run/call-nextclade_v3/stderr.txt + - path: miniwdl_run/call-nextclade_v3/stderr.txt.offset + - path: miniwdl_run/call-nextclade_v3/stdout.txt + - path: miniwdl_run/call-nextclade_v3/task.log contains: ["wdl", "theiacov_illumina_pe", "done"] - - path: miniwdl_run/call-nextclade/work/SRR13687078.ivar.consensus.nextclade.auspice.json - - path: miniwdl_run/call-nextclade/work/SRR13687078.ivar.consensus.nextclade.json - - path: miniwdl_run/call-nextclade/work/SRR13687078.ivar.consensus.nextclade.tsv - - path: miniwdl_run/call-nextclade/work/NEXTCLADE_VERSION - md5sum: 91a455762183b41af0d8de5596e28e7f - - path: miniwdl_run/call-nextclade/work/nextclade.aligned.fasta + - path: miniwdl_run/call-nextclade_v3/work/SRR13687078.ivar.consensus.nextclade.auspice.json + - path: miniwdl_run/call-nextclade_v3/work/SRR13687078.ivar.consensus.nextclade.json + - path: miniwdl_run/call-nextclade_v3/work/SRR13687078.ivar.consensus.nextclade.tsv + - path: miniwdl_run/call-nextclade_v3/work/NEXTCLADE_VERSION + md5sum: 70aa6879bf9f0e8ba2b9953b0d4a2216 + - path: miniwdl_run/call-nextclade_v3/work/nextclade.aligned.fasta md5sum: 59893fd3ef32062d50ded18300024734 - - path: miniwdl_run/call-nextclade/work/nextclade.csv - - path: miniwdl_run/call-nextclade/work/nextclade.errors.csv - md5sum: 2d1dad70d68e56d0a1191900c17061bc - - path: miniwdl_run/call-nextclade/work/nextclade.insertions.csv - md5sum: 3fb6db0807dc663e2821e0bbbccdc5aa - - path: miniwdl_run/call-nextclade/work/nextclade.ndjson - - path: miniwdl_run/call-nextclade/work/nextclade_gene_E.translation.fasta + - path: miniwdl_run/call-nextclade_v3/work/nextclade.csv + - path: miniwdl_run/call-nextclade_v3/work/nextclade.ndjson + - path: miniwdl_run/call-nextclade_v3/work/nextclade.cds_translation.E.fasta md5sum: b84502318ddddc339eae05c5eb2a1ff8 - - path: miniwdl_run/call-nextclade/work/nextclade_gene_M.translation.fasta + - path: miniwdl_run/call-nextclade_v3/work/nextclade.cds_translation.M.fasta md5sum: 7b4b60c7ed0c3b02be1095913c8a19e0 - - path: miniwdl_run/call-nextclade/work/nextclade_gene_N.translation.fasta - md5sum: 876d9b9fac2be03e1ba791341f3d9481 - - path: miniwdl_run/call-nextclade/work/nextclade_gene_ORF1a.translation.fasta - md5sum: 30e34bc765b9eef36412dbfe81d50f2c - - path: miniwdl_run/call-nextclade/work/nextclade_gene_ORF1b.translation.fasta - md5sum: 462b5eab805ef767570968432b08b781 - - path: miniwdl_run/call-nextclade/work/nextclade_gene_ORF3a.translation.fasta - md5sum: 8869418672c467c4708ca5c56f7a7237 - - path: miniwdl_run/call-nextclade/work/nextclade_gene_ORF6.translation.fasta + - path: miniwdl_run/call-nextclade_v3/work/nextclade.cds_translation.N.fasta + md5sum: f7f87c9ad8e6f5193ba1845c15f5a3ff + - path: miniwdl_run/call-nextclade_v3/work/nextclade.cds_translation.ORF1a.fasta + md5sum: 1035b0ef8dd7acb0ad94133d1d59dd4b + - path: miniwdl_run/call-nextclade_v3/work/nextclade.cds_translation.ORF1b.fasta + md5sum: e185f89f9bf273a436b92926c27a6bb6 + - path: miniwdl_run/call-nextclade_v3/work/nextclade.cds_translation.ORF3a.fasta + md5sum: c9ad81eaae733e400892654e9006632f + - path: miniwdl_run/call-nextclade_v3/work/nextclade.cds_translation.ORF6.fasta md5sum: c1d610f9e45acd3915e40f0d643f0188 - - path: miniwdl_run/call-nextclade/work/nextclade_gene_ORF7a.translation.fasta + - path: miniwdl_run/call-nextclade_v3/work/nextclade.cds_translation.ORF7a.fasta md5sum: 6a128f382b60376719d94cfaed018844 - - path: miniwdl_run/call-nextclade/work/nextclade_gene_ORF7b.translation.fasta + - path: miniwdl_run/call-nextclade_v3/work/nextclade.cds_translation.ORF7b.fasta md5sum: 80ebf21c9b190354c10ff56bc30fcb7f - - path: miniwdl_run/call-nextclade/work/nextclade_gene_ORF8.translation.fasta + - path: miniwdl_run/call-nextclade_v3/work/nextclade.cds_translation.ORF8.fasta md5sum: acc50605f4df17e4773be03538ce1fca - - path: miniwdl_run/call-nextclade/work/nextclade_gene_ORF9b.translation.fasta + - path: miniwdl_run/call-nextclade_v3/work/nextclade.cds_translation.ORF9b.fasta md5sum: 0f55e05f70734e109cca95918da37881 - - path: miniwdl_run/call-nextclade/work/nextclade_gene_S.translation.fasta - md5sum: 258ff6d007f516924411a11d5fa17e09 - - path: miniwdl_run/call-nextclade/work/nextclade_dataset_dir/genemap.gff - - path: miniwdl_run/call-nextclade/work/nextclade_dataset_dir/primers.csv - md5sum: 5990c3483bf66ce607aeb90a44e7ef2e - - path: miniwdl_run/call-nextclade/work/nextclade_dataset_dir/qc.json - md5sum: b01f4491a54941fea12ec5b04a10fb8c - - path: miniwdl_run/call-nextclade/work/nextclade_dataset_dir/reference.fasta + - path: miniwdl_run/call-nextclade_v3/work/nextclade.cds_translation.S.fasta + md5sum: 77689948a0c14fe6a3e9241453ecb885 + - path: miniwdl_run/call-nextclade_v3/work/nextclade_dataset_dir/genome_annotation.gff3 + - path: miniwdl_run/call-nextclade_v3/work/nextclade_dataset_dir/reference.fasta md5sum: c7ce05f28e4ec0322c96f24e064ef55c - - path: miniwdl_run/call-nextclade/work/nextclade_dataset_dir/sequences.fasta - - path: miniwdl_run/call-nextclade/work/nextclade_dataset_dir/tag.json - - path: miniwdl_run/call-nextclade/work/nextclade_dataset_dir/tree.json - - path: miniwdl_run/call-nextclade/work/nextclade_dataset_dir/virus_properties.json - - path: miniwdl_run/call-nextclade/work/_miniwdl_inputs/0/SRR13687078.ivar.consensus.fasta + - path: miniwdl_run/call-nextclade_v3/work/nextclade_dataset_dir/sequences.fasta + - path: miniwdl_run/call-nextclade_v3/work/nextclade_dataset_dir/tree.json + - path: miniwdl_run/call-nextclade_v3/work/nextclade_dataset_dir/pathogen.json + - path: miniwdl_run/call-nextclade_v3/work/_miniwdl_inputs/0/SRR13687078.ivar.consensus.fasta md5sum: d41d8cd98f00b204e9800998ecf8427e # nextclade output parsing - path: miniwdl_run/call-nextclade_output_parser/command diff --git a/tests/workflows/theiacov/test_wf_theiacov_illumina_se.yml b/tests/workflows/theiacov/test_wf_theiacov_illumina_se.yml index 2349a4302..faa653aea 100644 --- a/tests/workflows/theiacov/test_wf_theiacov_illumina_se.yml +++ b/tests/workflows/theiacov/test_wf_theiacov_illumina_se.yml @@ -324,66 +324,58 @@ - path: miniwdl_run/call-pangolin4/work/_miniwdl_inputs/0/ERR6319327.ivar.consensus.fasta md5sum: d41d8cd98f00b204e9800998ecf8427e # nextclade - - path: miniwdl_run/call-nextclade/command - md5sum: 4360b0d82ea182ab01ffe606b3893bd2 - - path: miniwdl_run/call-nextclade/inputs.json + - path: miniwdl_run/call-nextclade_v3/command + md5sum: 9c7d2beb32ea16dc9e893908285fded5 + - path: miniwdl_run/call-nextclade_v3/inputs.json contains: ["dataset_name", "dataset_tag", "genome_fasta"] - - path: miniwdl_run/call-nextclade/outputs.json + - path: miniwdl_run/call-nextclade_v3/outputs.json contains: ["nextclade_json", "nextclade_version"] - - path: miniwdl_run/call-nextclade/stderr.txt - - path: miniwdl_run/call-nextclade/stderr.txt.offset - - path: miniwdl_run/call-nextclade/stdout.txt - - path: miniwdl_run/call-nextclade/task.log + - path: miniwdl_run/call-nextclade_v3/stderr.txt + - path: miniwdl_run/call-nextclade_v3/stderr.txt.offset + - path: miniwdl_run/call-nextclade_v3/stdout.txt + - path: miniwdl_run/call-nextclade_v3/task.log contains: ["wdl", "theiacov_illumina_se", "done"] - - path: miniwdl_run/call-nextclade/work/ERR6319327.ivar.consensus.nextclade.auspice.json - - path: miniwdl_run/call-nextclade/work/ERR6319327.ivar.consensus.nextclade.json - - path: miniwdl_run/call-nextclade/work/ERR6319327.ivar.consensus.nextclade.tsv - - path: miniwdl_run/call-nextclade/work/NEXTCLADE_VERSION - md5sum: 91a455762183b41af0d8de5596e28e7f - - path: miniwdl_run/call-nextclade/work/nextclade.aligned.fasta + - path: miniwdl_run/call-nextclade_v3/work/ERR6319327.ivar.consensus.nextclade.auspice.json + - path: miniwdl_run/call-nextclade_v3/work/ERR6319327.ivar.consensus.nextclade.json + - path: miniwdl_run/call-nextclade_v3/work/ERR6319327.ivar.consensus.nextclade.tsv + - path: miniwdl_run/call-nextclade_v3/work/NEXTCLADE_VERSION + md5sum: 70aa6879bf9f0e8ba2b9953b0d4a2216 + - path: miniwdl_run/call-nextclade_v3/work/nextclade.aligned.fasta md5sum: 2e3156ddefcdf94930edd98bdd6c19c6 - - path: miniwdl_run/call-nextclade/work/nextclade.csv - - path: miniwdl_run/call-nextclade/work/nextclade.errors.csv - md5sum: 2d1dad70d68e56d0a1191900c17061bc - - path: miniwdl_run/call-nextclade/work/nextclade.insertions.csv - md5sum: 3fb6db0807dc663e2821e0bbbccdc5aa - - path: miniwdl_run/call-nextclade/work/nextclade.ndjson - - path: miniwdl_run/call-nextclade/work/nextclade_gene_E.translation.fasta + - path: miniwdl_run/call-nextclade_v3/work/nextclade.csv + - path: miniwdl_run/call-nextclade_v3/work/nextclade.ndjson + - path: miniwdl_run/call-nextclade_v3/work/nextclade.cds_translation.E.fasta md5sum: b84502318ddddc339eae05c5eb2a1ff8 - - path: miniwdl_run/call-nextclade/work/nextclade_gene_M.translation.fasta + - path: miniwdl_run/call-nextclade_v3/work/nextclade.cds_translation.M.fasta md5sum: 7b4b60c7ed0c3b02be1095913c8a19e0 - - path: miniwdl_run/call-nextclade/work/nextclade_gene_N.translation.fasta + - path: miniwdl_run/call-nextclade_v3/work/nextclade.cds_translation.N.fasta md5sum: 1d98ea23593e865d064f89fc50d327a3 - - path: miniwdl_run/call-nextclade/work/nextclade_gene_ORF1a.translation.fasta + - path: miniwdl_run/call-nextclade_v3/work/nextclade.cds_translation.ORF1a.fasta md5sum: 64222f7837a780c749a1843906536409 - - path: miniwdl_run/call-nextclade/work/nextclade_gene_ORF1b.translation.fasta + - path: miniwdl_run/call-nextclade_v3/work/nextclade.cds_translation.ORF1b.fasta md5sum: 3786323324737ac465ca9b4bea6a7904 - - path: miniwdl_run/call-nextclade/work/nextclade_gene_ORF3a.translation.fasta + - path: miniwdl_run/call-nextclade_v3/work/nextclade.cds_translation.ORF3a.fasta md5sum: 0b5e002d3b925c4d35aa7f757ac324e6 - - path: miniwdl_run/call-nextclade/work/nextclade_gene_ORF6.translation.fasta + - path: miniwdl_run/call-nextclade_v3/work/nextclade.cds_translation.ORF6.fasta md5sum: c1d610f9e45acd3915e40f0d643f0188 - - path: miniwdl_run/call-nextclade/work/nextclade_gene_ORF7a.translation.fasta + - path: miniwdl_run/call-nextclade_v3/work/nextclade.cds_translation.ORF7a.fasta md5sum: 54066a55295e54aa71916dd31499e541 - - path: miniwdl_run/call-nextclade/work/nextclade_gene_ORF7b.translation.fasta + - path: miniwdl_run/call-nextclade_v3/work/nextclade.cds_translation.ORF7b.fasta md5sum: 2073dd2f459ad2db27d4c4b05b668155 - - path: miniwdl_run/call-nextclade/work/nextclade_gene_ORF8.translation.fasta + - path: miniwdl_run/call-nextclade_v3/work/nextclade.cds_translation.ORF8.fasta md5sum: fa8708767c9ec6921b8217c07598aa61 - - path: miniwdl_run/call-nextclade/work/nextclade_gene_ORF9b.translation.fasta + - path: miniwdl_run/call-nextclade_v3/work/nextclade.cds_translation.ORF9b.fasta md5sum: d3dab52c5ff6ee918f1a6666b93d2588 - - path: miniwdl_run/call-nextclade/work/nextclade_gene_S.translation.fasta + - path: miniwdl_run/call-nextclade_v3/work/nextclade.cds_translation.S.fasta md5sum: 7ea73e4fd00bfd6ce3236bb528cb6e6c - - path: miniwdl_run/call-nextclade/work/nextclade_dataset_dir/genemap.gff - - path: miniwdl_run/call-nextclade/work/nextclade_dataset_dir/primers.csv - md5sum: 5990c3483bf66ce607aeb90a44e7ef2e - - path: miniwdl_run/call-nextclade/work/nextclade_dataset_dir/qc.json - md5sum: b01f4491a54941fea12ec5b04a10fb8c - - path: miniwdl_run/call-nextclade/work/nextclade_dataset_dir/reference.fasta + - path: miniwdl_run/call-nextclade_v3/work/nextclade_dataset_dir/genome_annotation.gff3 + - path: miniwdl_run/call-nextclade_v3/work/nextclade_dataset_dir/reference.fasta md5sum: c7ce05f28e4ec0322c96f24e064ef55c - - path: miniwdl_run/call-nextclade/work/nextclade_dataset_dir/sequences.fasta - - path: miniwdl_run/call-nextclade/work/nextclade_dataset_dir/tag.json - - path: miniwdl_run/call-nextclade/work/nextclade_dataset_dir/tree.json - - path: miniwdl_run/call-nextclade/work/nextclade_dataset_dir/virus_properties.json - - path: miniwdl_run/call-nextclade/work/_miniwdl_inputs/0/ERR6319327.ivar.consensus.fasta + - path: miniwdl_run/call-nextclade_v3/work/nextclade_dataset_dir/sequences.fasta + - path: miniwdl_run/call-nextclade_v3/work/nextclade_dataset_dir/tree.json + - path: miniwdl_run/call-nextclade_v3/work/nextclade_dataset_dir/pathogen.json + md5sum: 9f99ba19333ff907af307611fbb73e21 + - path: miniwdl_run/call-nextclade_v3/work/_miniwdl_inputs/0/ERR6319327.ivar.consensus.fasta md5sum: d41d8cd98f00b204e9800998ecf8427e # nextclade output parsing - path: miniwdl_run/call-nextclade_output_parser/command diff --git a/tests/workflows/theiacov/test_wf_theiacov_ont.yml b/tests/workflows/theiacov/test_wf_theiacov_ont.yml index 5607c04ad..41e043475 100644 --- a/tests/workflows/theiacov/test_wf_theiacov_ont.yml +++ b/tests/workflows/theiacov/test_wf_theiacov_ont.yml @@ -116,66 +116,57 @@ md5sum: 1684062540bab8897921ed5e40c747cf - path: miniwdl_run/call-consensus_qc/work/_miniwdl_inputs/0/ont.medaka.consensus.fasta md5sum: d41d8cd98f00b204e9800998ecf8427e - - path: miniwdl_run/call-nextclade/command - - path: miniwdl_run/call-nextclade/inputs.json + - path: miniwdl_run/call-nextclade_v3/command + - path: miniwdl_run/call-nextclade_v3/inputs.json contains: ["dataset_name", "dataset_tag", "genome_fasta"] - - path: miniwdl_run/call-nextclade/outputs.json + - path: miniwdl_run/call-nextclade_v3/outputs.json contains: ["nextclade_json", "nextclade_version"] - - path: miniwdl_run/call-nextclade/stderr.txt - - path: miniwdl_run/call-nextclade/stderr.txt.offset - - path: miniwdl_run/call-nextclade/stdout.txt - - path: miniwdl_run/call-nextclade/task.log + - path: miniwdl_run/call-nextclade_v3/stderr.txt + - path: miniwdl_run/call-nextclade_v3/stderr.txt.offset + - path: miniwdl_run/call-nextclade_v3/stdout.txt + - path: miniwdl_run/call-nextclade_v3/task.log contains: ["wdl", "theiacov_ont", "done"] - - path: miniwdl_run/call-nextclade/work/NEXTCLADE_VERSION - md5sum: 91a455762183b41af0d8de5596e28e7f - - path: miniwdl_run/call-nextclade/work/_miniwdl_inputs/0/ont.medaka.consensus.fasta + - path: miniwdl_run/call-nextclade_v3/work/NEXTCLADE_VERSION + md5sum: 70aa6879bf9f0e8ba2b9953b0d4a2216 + - path: miniwdl_run/call-nextclade_v3/work/_miniwdl_inputs/0/ont.medaka.consensus.fasta md5sum: d41d8cd98f00b204e9800998ecf8427e - - path: miniwdl_run/call-nextclade/work/nextclade.aligned.fasta + - path: miniwdl_run/call-nextclade_v3/work/nextclade.aligned.fasta md5sum: 9af2828c3169f789cd1266960c8595da - - path: miniwdl_run/call-nextclade/work/nextclade.csv - - path: miniwdl_run/call-nextclade/work/nextclade.errors.csv - md5sum: 2d1dad70d68e56d0a1191900c17061bc - - path: miniwdl_run/call-nextclade/work/nextclade.insertions.csv - md5sum: 3fb6db0807dc663e2821e0bbbccdc5aa - - path: miniwdl_run/call-nextclade/work/nextclade.ndjson - - path: miniwdl_run/call-nextclade/work/nextclade_dataset_dir/genemap.gff - - path: miniwdl_run/call-nextclade/work/nextclade_dataset_dir/primers.csv - md5sum: 5990c3483bf66ce607aeb90a44e7ef2e - - path: miniwdl_run/call-nextclade/work/nextclade_dataset_dir/qc.json - md5sum: b01f4491a54941fea12ec5b04a10fb8c - - path: miniwdl_run/call-nextclade/work/nextclade_dataset_dir/reference.fasta + - path: miniwdl_run/call-nextclade_v3/work/nextclade.csv + - path: miniwdl_run/call-nextclade_v3/work/nextclade.ndjson + - path: miniwdl_run/call-nextclade_v3/work/nextclade_dataset_dir/genome_annotation.gff3 + - path: miniwdl_run/call-nextclade_v3/work/nextclade_dataset_dir/reference.fasta md5sum: c7ce05f28e4ec0322c96f24e064ef55c - - path: miniwdl_run/call-nextclade/work/nextclade_dataset_dir/sequences.fasta - - path: miniwdl_run/call-nextclade/work/nextclade_dataset_dir/tag.json - - path: miniwdl_run/call-nextclade/work/nextclade_dataset_dir/tree.json - - path: miniwdl_run/call-nextclade/work/nextclade_dataset_dir/virus_properties.json - - path: miniwdl_run/call-nextclade/work/nextclade_gene_E.translation.fasta + - path: miniwdl_run/call-nextclade_v3/work/nextclade_dataset_dir/sequences.fasta + - path: miniwdl_run/call-nextclade_v3/work/nextclade_dataset_dir/tree.json + - path: miniwdl_run/call-nextclade_v3/work/nextclade_dataset_dir/pathogen.json + - path: miniwdl_run/call-nextclade_v3/work/nextclade.cds_translation.E.fasta md5sum: b84502318ddddc339eae05c5eb2a1ff8 - - path: miniwdl_run/call-nextclade/work/nextclade_gene_M.translation.fasta + - path: miniwdl_run/call-nextclade_v3/work/nextclade.cds_translation.M.fasta md5sum: 7b4b60c7ed0c3b02be1095913c8a19e0 - - path: miniwdl_run/call-nextclade/work/nextclade_gene_N.translation.fasta + - path: miniwdl_run/call-nextclade_v3/work/nextclade.cds_translation.N.fasta md5sum: b07a3ee9b75d9a5e85561e2fed5cccfc - - path: miniwdl_run/call-nextclade/work/nextclade_gene_ORF1a.translation.fasta - md5sum: f73f802b99e28d2dab449ae1ccf7db06 - - path: miniwdl_run/call-nextclade/work/nextclade_gene_ORF1b.translation.fasta - md5sum: a3b8207cd881b1ca88d05cfedf23f1ce - - path: miniwdl_run/call-nextclade/work/nextclade_gene_ORF3a.translation.fasta + - path: miniwdl_run/call-nextclade_v3/work/nextclade.cds_translation.ORF1a.fasta + md5sum: 28e0341110fd8c446ea1c4a2c14bcff1 + - path: miniwdl_run/call-nextclade_v3/work/nextclade.cds_translation.ORF1b.fasta + md5sum: 5fea3fa7473ea78ce7d67840377fe5e8 + - path: miniwdl_run/call-nextclade_v3/work/nextclade.cds_translation.ORF3a.fasta md5sum: 9a53be681f7d92d46200438d03f8a16b - - path: miniwdl_run/call-nextclade/work/nextclade_gene_ORF6.translation.fasta + - path: miniwdl_run/call-nextclade_v3/work/nextclade.cds_translation.ORF6.fasta md5sum: c1d610f9e45acd3915e40f0d643f0188 - - path: miniwdl_run/call-nextclade/work/nextclade_gene_ORF7a.translation.fasta + - path: miniwdl_run/call-nextclade_v3/work/nextclade.cds_translation.ORF7a.fasta md5sum: a209e21c4a9a49649746b39ee449331f - - path: miniwdl_run/call-nextclade/work/nextclade_gene_ORF7b.translation.fasta + - path: miniwdl_run/call-nextclade_v3/work/nextclade.cds_translation.ORF7b.fasta md5sum: 4ba532a9baaf5454f662eb67fa2caa74 - - path: miniwdl_run/call-nextclade/work/nextclade_gene_ORF8.translation.fasta + - path: miniwdl_run/call-nextclade_v3/work/nextclade.cds_translation.ORF8.fasta md5sum: c9b62e72831fa3198ebd28758aca5b29 - - path: miniwdl_run/call-nextclade/work/nextclade_gene_ORF9b.translation.fasta + - path: miniwdl_run/call-nextclade_v3/work/nextclade.cds_translation.ORF9b.fasta md5sum: 0f55e05f70734e109cca95918da37881 - - path: miniwdl_run/call-nextclade/work/nextclade_gene_S.translation.fasta + - path: miniwdl_run/call-nextclade_v3/work/nextclade.cds_translation.S.fasta md5sum: 9efd0dad1c8fc8bd802f20a6d5105bb0 - - path: miniwdl_run/call-nextclade/work/ont.medaka.consensus.nextclade.auspice.json - - path: miniwdl_run/call-nextclade/work/ont.medaka.consensus.nextclade.json - - path: miniwdl_run/call-nextclade/work/ont.medaka.consensus.nextclade.tsv + - path: miniwdl_run/call-nextclade_v3/work/ont.medaka.consensus.nextclade.auspice.json + - path: miniwdl_run/call-nextclade_v3/work/ont.medaka.consensus.nextclade.json + - path: miniwdl_run/call-nextclade_v3/work/ont.medaka.consensus.nextclade.tsv - path: miniwdl_run/call-nextclade_output_parser/command md5sum: 6b2d20d9df7c656a76755a4048acf3ec - path: miniwdl_run/call-nextclade_output_parser/inputs.json diff --git a/workflows/phylogenetics/wf_nextclade_addToRefTree.wdl b/workflows/phylogenetics/wf_nextclade_addToRefTree.wdl index faf85c48e..f0a2fd1b5 100644 --- a/workflows/phylogenetics/wf_nextclade_addToRefTree.wdl +++ b/workflows/phylogenetics/wf_nextclade_addToRefTree.wdl @@ -8,28 +8,22 @@ workflow nextclade_addToRefTree { description: "Nextclade workflow that adds samples to a curated JSON tree from Augur." } input { - File assembly_fastas - File? root_sequence_fasta + File assembly_fasta + File? input_ref File? gene_annotations_gff File? reference_tree_json - File? qc_config_json - File? pcr_primers_csv - File? virus_properties + File? nextclade_pathogen_json String nextclade_dataset_name - String? dataset_reference String? dataset_tag } call nextclade_analysis.nextclade_add_ref { # nextclade analysis input: - genome_fasta = assembly_fastas, - root_sequence = root_sequence_fasta, + genome_fasta = assembly_fasta, reference_tree_json = reference_tree_json, - qc_config_json = qc_config_json, + nextclade_pathogen_json = nextclade_pathogen_json, gene_annotations_gff = gene_annotations_gff, - pcr_primers_csv = pcr_primers_csv, - virus_properties = virus_properties, + input_ref = input_ref, dataset_name = nextclade_dataset_name, - dataset_reference = dataset_reference, dataset_tag = dataset_tag } call versioning.version_capture { diff --git a/workflows/theiacov/wf_theiacov_clearlabs.wdl b/workflows/theiacov/wf_theiacov_clearlabs.wdl index 3610ac0b7..64c964794 100644 --- a/workflows/theiacov/wf_theiacov_clearlabs.wdl +++ b/workflows/theiacov/wf_theiacov_clearlabs.wdl @@ -31,7 +31,6 @@ workflow theiacov_clearlabs { # reference values File? reference_genome # nextclade inputs - String? nextclade_dataset_reference String? nextclade_dataset_tag String? nextclade_dataset_name # kraken parameters @@ -43,7 +42,6 @@ workflow theiacov_clearlabs { input: organism = organism, reference_genome = reference_genome, - nextclade_dataset_reference_input = nextclade_dataset_reference, nextclade_dataset_tag_input = nextclade_dataset_tag, nextclade_dataset_name_input = nextclade_dataset_name, kraken_target_organism_input = target_organism @@ -117,16 +115,15 @@ workflow theiacov_clearlabs { } if (organism_parameters.standardized_organism == "MPXV" || organism_parameters.standardized_organism == "sars-cov-2") { # tasks specific to either MPXV or sars-cov-2 - call nextclade_task.nextclade { + call nextclade_task.nextclade_v3 { input: genome_fasta = consensus.consensus_seq, dataset_name = organism_parameters.nextclade_dataset_name, - dataset_reference = organism_parameters.nextclade_dataset_reference, dataset_tag = organism_parameters.nextclade_dataset_tag } call nextclade_task.nextclade_output_parser { input: - nextclade_tsv = nextclade.nextclade_tsv, + nextclade_tsv = nextclade_v3.nextclade_tsv, organism = organism } call vadr_task.vadr { @@ -224,11 +221,11 @@ workflow theiacov_clearlabs { String? pangolin_docker = pangolin4.pangolin_docker String? pangolin_versions = pangolin4.pangolin_versions # Nextclade outputs - File? nextclade_json = nextclade.nextclade_json - File? auspice_json = nextclade.auspice_json - File? nextclade_tsv = nextclade.nextclade_tsv - String? nextclade_version = nextclade.nextclade_version - String? nextclade_docker = nextclade.nextclade_docker + File? nextclade_json = nextclade_v3.nextclade_json + File? auspice_json = nextclade_v3.auspice_json + File? nextclade_tsv = nextclade_v3.nextclade_tsv + String? nextclade_version = nextclade_v3.nextclade_version + String? nextclade_docker = nextclade_v3.nextclade_docker String nextclade_ds_tag = organism_parameters.nextclade_dataset_tag String? nextclade_aa_subs = nextclade_output_parser.nextclade_aa_subs String? nextclade_aa_dels = nextclade_output_parser.nextclade_aa_dels diff --git a/workflows/theiacov/wf_theiacov_fasta.wdl b/workflows/theiacov/wf_theiacov_fasta.wdl index 34a8f952a..8ef716a17 100644 --- a/workflows/theiacov/wf_theiacov_fasta.wdl +++ b/workflows/theiacov/wf_theiacov_fasta.wdl @@ -25,7 +25,6 @@ workflow theiacov_fasta { File? reference_genome Int? genome_length # nextclade inputs (default SC2) - String? nextclade_dataset_reference String? nextclade_dataset_tag String? nextclade_dataset_name # sequencing values @@ -54,7 +53,6 @@ workflow theiacov_fasta { flu_subtype = select_first([flu_subtype, abricate_subtype, "N/A"]), reference_genome = reference_genome, genome_length_input = genome_length, - nextclade_dataset_reference_input = nextclade_dataset_reference, nextclade_dataset_tag_input = nextclade_dataset_tag, nextclade_dataset_name_input = nextclade_dataset_name, vadr_max_length = max_length, @@ -77,16 +75,15 @@ workflow theiacov_fasta { } if (organism_parameters.standardized_organism == "sars-cov-2" || organism_parameters.standardized_organism == "MPXV" || organism_parameters.standardized_organism == "rsv_a" || organism_parameters.standardized_organism == "rsv_b" || organism_parameters.standardized_organism == "flu") { if (organism_parameters.nextclade_dataset_tag != "NA") { - call nextclade_task.nextclade { + call nextclade_task.nextclade_v3 { input: genome_fasta = assembly_fasta, dataset_name = organism_parameters.nextclade_dataset_name, - dataset_reference = organism_parameters.nextclade_dataset_reference, dataset_tag = organism_parameters.nextclade_dataset_tag } call nextclade_task.nextclade_output_parser { input: - nextclade_tsv = nextclade.nextclade_tsv, + nextclade_tsv = nextclade_v3.nextclade_tsv, organism = organism_parameters.standardized_organism } } @@ -141,11 +138,11 @@ workflow theiacov_fasta { String? pangolin_docker = pangolin4.pangolin_docker String? pangolin_versions = pangolin4.pangolin_versions # Nextclade outputs - File? nextclade_json = nextclade.nextclade_json - File? auspice_json = nextclade.auspice_json - File? nextclade_tsv = nextclade.nextclade_tsv - String? nextclade_version = nextclade.nextclade_version - String? nextclade_docker = nextclade.nextclade_docker + File? nextclade_json = nextclade_v3.nextclade_json + File? auspice_json = nextclade_v3.auspice_json + File? nextclade_tsv = nextclade_v3.nextclade_tsv + String? nextclade_version = nextclade_v3.nextclade_version + String? nextclade_docker = nextclade_v3.nextclade_docker String nextclade_ds_tag = organism_parameters.nextclade_dataset_tag String? nextclade_clade = nextclade_output_parser.nextclade_clade String? nextclade_aa_subs = nextclade_output_parser.nextclade_aa_subs diff --git a/workflows/theiacov/wf_theiacov_fasta_batch.wdl b/workflows/theiacov/wf_theiacov_fasta_batch.wdl index f7b13e998..dcabfbdd5 100644 --- a/workflows/theiacov/wf_theiacov_fasta_batch.wdl +++ b/workflows/theiacov/wf_theiacov_fasta_batch.wdl @@ -16,7 +16,6 @@ workflow theiacov_fasta_batch { Array[File] assembly_fastas String organism = "sars-cov-2" # nextclade inputs - String? nextclade_dataset_reference String? nextclade_dataset_tag String? nextclade_dataset_name # pangolin inputs @@ -30,7 +29,6 @@ workflow theiacov_fasta_batch { call set_organism_defaults.organism_parameters { input: organism = organism, - nextclade_dataset_reference_input = nextclade_dataset_reference, nextclade_dataset_tag_input = nextclade_dataset_tag, nextclade_dataset_name_input = nextclade_dataset_name, pangolin_docker_image = pangolin_docker @@ -52,11 +50,10 @@ workflow theiacov_fasta_batch { } if (organism == "MPXV" || organism == "sars-cov-2") { # tasks specific to either MPXV or sars-cov-2 - call nextclade_task.nextclade { + call nextclade_task.nextclade_v3 { input: genome_fasta = cat_files_fasta.concatenated_files, dataset_name = organism_parameters.nextclade_dataset_name, - dataset_reference = organism_parameters.nextclade_dataset_reference, dataset_tag = organism_parameters.nextclade_dataset_tag } } @@ -71,11 +68,11 @@ workflow theiacov_fasta_batch { bucket_name = bucket_name, samplenames = samplenames, organism = organism_parameters.standardized_organism, - nextclade_tsv = nextclade.nextclade_tsv, - nextclade_docker = nextclade.nextclade_docker, - nextclade_version = nextclade.nextclade_version, + nextclade_tsv = nextclade_v3.nextclade_tsv, + nextclade_docker = nextclade_v3.nextclade_docker, + nextclade_version = nextclade_v3.nextclade_version, nextclade_ds_tag = nextclade_dataset_tag, - nextclade_json = nextclade.nextclade_json, + nextclade_json = nextclade_v3.nextclade_json, pango_lineage_report = pangolin4.pango_lineage_report, pangolin_docker = pangolin4.pangolin_docker, theiacov_fasta_analysis_date = version_capture.date, @@ -88,8 +85,8 @@ workflow theiacov_fasta_batch { # Pangolin outputs File? pango_lineage_report = pangolin4.pango_lineage_report # Nextclade outputs - File? nextclade_json = nextclade.nextclade_json - File? nextclade_tsv = nextclade.nextclade_tsv + File? nextclade_json = nextclade_v3.nextclade_json + File? nextclade_tsv = nextclade_v3.nextclade_tsv # Wrangling outputs File datatable = sm_theiacov_fasta_wrangling.terra_table } diff --git a/workflows/theiacov/wf_theiacov_illumina_pe.wdl b/workflows/theiacov/wf_theiacov_illumina_pe.wdl index 3589a8572..22740c449 100644 --- a/workflows/theiacov/wf_theiacov_illumina_pe.wdl +++ b/workflows/theiacov/wf_theiacov_illumina_pe.wdl @@ -46,7 +46,6 @@ workflow theiacov_illumina_pe { Float consensus_min_freq = 0.6 # minimum frequency for a variant to be called as SNP in consensus genome Float variant_min_freq = 0.6 # minimum frequency for a variant to be reported in ivar outputs # nextclade inputs - String? nextclade_dataset_reference String? nextclade_dataset_tag String? nextclade_dataset_name # vadr parameters @@ -75,7 +74,6 @@ workflow theiacov_illumina_pe { reference_genome = reference_genome, gene_locations_bed_file = reference_gene_locations_bed, genome_length_input = genome_length, - nextclade_dataset_reference_input = nextclade_dataset_reference, nextclade_dataset_tag_input = nextclade_dataset_tag, nextclade_dataset_name_input = nextclade_dataset_name, vadr_max_length = vadr_max_length, @@ -185,7 +183,6 @@ workflow theiacov_illumina_pe { reference_gff_file = reference_gff, reference_genome = reference_genome, genome_length_input = genome_length, - nextclade_dataset_reference_input = nextclade_dataset_reference, nextclade_dataset_tag_input = nextclade_dataset_tag, nextclade_dataset_name_input = nextclade_dataset_name, vadr_max_length = vadr_max_length, @@ -205,7 +202,6 @@ workflow theiacov_illumina_pe { reference_gff_file = reference_gff, reference_genome = reference_genome, genome_length_input = genome_length, - nextclade_dataset_reference_input = nextclade_dataset_reference, nextclade_dataset_tag_input = nextclade_dataset_tag, nextclade_dataset_name_input = nextclade_dataset_name, vadr_max_length = vadr_max_length, @@ -244,26 +240,24 @@ workflow theiacov_illumina_pe { # run organism-specific typing if (organism_parameters.standardized_organism == "MPXV" || organism_parameters.standardized_organism == "sars-cov-2" || (organism_parameters.standardized_organism == "flu" && defined(irma.seg_ha_assembly) && ! defined(do_not_run_flu_ha_nextclade))) { # tasks specific to either MPXV, sars-cov-2, or flu - call nextclade_task.nextclade { + call nextclade_task.nextclade_v3 { input: genome_fasta = select_first([irma.seg_ha_assembly, ivar_consensus.assembly_fasta]), dataset_name = select_first([set_flu_ha_nextclade_values.nextclade_dataset_name, organism_parameters.nextclade_dataset_name]), - dataset_reference = select_first([set_flu_ha_nextclade_values.nextclade_dataset_reference, organism_parameters.nextclade_dataset_reference]), dataset_tag = select_first([set_flu_ha_nextclade_values.nextclade_dataset_tag, organism_parameters.nextclade_dataset_tag]) } call nextclade_task.nextclade_output_parser { input: - nextclade_tsv = nextclade.nextclade_tsv, + nextclade_tsv = nextclade_v3.nextclade_tsv, organism = organism_parameters.standardized_organism } } if (organism_parameters.standardized_organism == "flu" && defined(irma.seg_na_assembly) && ! defined(do_not_run_flu_na_nextclade)) { # tasks specific to flu NA - run nextclade a second time - call nextclade_task.nextclade as nextclade_flu_na { + call nextclade_task.nextclade_v3 as nextclade_flu_na { input: genome_fasta = select_first([irma.seg_na_assembly]), dataset_name = select_first([set_flu_na_nextclade_values.nextclade_dataset_name, organism_parameters.nextclade_dataset_name]), - dataset_reference = select_first([set_flu_na_nextclade_values.nextclade_dataset_reference, organism_parameters.nextclade_dataset_reference]), dataset_tag = select_first([set_flu_na_nextclade_values.nextclade_dataset_tag, organism_parameters.nextclade_dataset_tag]) } call nextclade_task.nextclade_output_parser as nextclade_output_parser_flu_na { @@ -448,11 +442,11 @@ workflow theiacov_illumina_pe { String? pangolin_docker = pangolin4.pangolin_docker String? pangolin_versions = pangolin4.pangolin_versions # Nextclade outputs - String nextclade_json = select_first([nextclade.nextclade_json, ""]) - String auspice_json = select_first([ nextclade.auspice_json, ""]) - String nextclade_tsv = select_first([nextclade.nextclade_tsv, ""]) - String nextclade_version = select_first([nextclade.nextclade_version, ""]) - String nextclade_docker = select_first([nextclade.nextclade_docker, ""]) + String nextclade_json = select_first([nextclade_v3.nextclade_json, ""]) + String auspice_json = select_first([ nextclade_v3.auspice_json, ""]) + String nextclade_tsv = select_first([nextclade_v3.nextclade_tsv, ""]) + String nextclade_version = select_first([nextclade_v3.nextclade_version, ""]) + String nextclade_docker = select_first([nextclade_v3.nextclade_docker, ""]) String nextclade_ds_tag = select_first([ha_na_nextclade_ds_tag, set_flu_ha_nextclade_values.nextclade_dataset_tag, organism_parameters.nextclade_dataset_tag, ""]) String nextclade_aa_subs = select_first([ha_na_nextclade_aa_subs, nextclade_output_parser.nextclade_aa_subs, ""]) String nextclade_aa_dels = select_first([ha_na_nextclade_aa_dels, nextclade_output_parser.nextclade_aa_dels, ""]) diff --git a/workflows/theiacov/wf_theiacov_illumina_se.wdl b/workflows/theiacov/wf_theiacov_illumina_se.wdl index 3644d068f..a493babdc 100644 --- a/workflows/theiacov/wf_theiacov_illumina_se.wdl +++ b/workflows/theiacov/wf_theiacov_illumina_se.wdl @@ -31,7 +31,6 @@ workflow theiacov_illumina_se { Int trim_quality_min_score = 30 Int trim_window_size = 4 # nextclade inputs - String? nextclade_dataset_reference String? nextclade_dataset_tag String? nextclade_dataset_name # reference values @@ -67,7 +66,6 @@ workflow theiacov_illumina_se { reference_genome = reference_genome, gene_locations_bed_file = reference_gene_locations_bed, genome_length_input = genome_length, - nextclade_dataset_reference_input = nextclade_dataset_reference, nextclade_dataset_tag_input = nextclade_dataset_tag, nextclade_dataset_name_input = nextclade_dataset_name, vadr_max_length = vadr_max_length, @@ -155,16 +153,15 @@ workflow theiacov_illumina_se { } if (organism_parameters.standardized_organism == "MPXV" || organism_parameters.standardized_organism == "sars-cov-2") { # tasks specific to either MPXV or sars-cov-2 - call nextclade_task.nextclade { + call nextclade_task.nextclade_v3 { input: genome_fasta = ivar_consensus.assembly_fasta, dataset_name = organism_parameters.nextclade_dataset_name, - dataset_reference = organism_parameters.nextclade_dataset_reference, dataset_tag = organism_parameters.nextclade_dataset_tag } call nextclade_task.nextclade_output_parser { input: - nextclade_tsv = nextclade.nextclade_tsv, + nextclade_tsv = nextclade_v3.nextclade_tsv, organism = organism_parameters.standardized_organism } } @@ -289,11 +286,11 @@ workflow theiacov_illumina_se { String? pangolin_docker = pangolin4.pangolin_docker String? pangolin_versions = pangolin4.pangolin_versions # Nextclade outputs - File? nextclade_json = nextclade.nextclade_json - File? auspice_json = nextclade.auspice_json - File? nextclade_tsv = nextclade.nextclade_tsv - String? nextclade_version = nextclade.nextclade_version - String? nextclade_docker = nextclade.nextclade_docker + File? nextclade_json = nextclade_v3.nextclade_json + File? auspice_json = nextclade_v3.auspice_json + File? nextclade_tsv = nextclade_v3.nextclade_tsv + String? nextclade_version = nextclade_v3.nextclade_version + String? nextclade_docker = nextclade_v3.nextclade_docker String? nextclade_ds_tag = organism_parameters.nextclade_dataset_tag String? nextclade_aa_subs = nextclade_output_parser.nextclade_aa_subs String? nextclade_aa_dels = nextclade_output_parser.nextclade_aa_dels diff --git a/workflows/theiacov/wf_theiacov_ont.wdl b/workflows/theiacov/wf_theiacov_ont.wdl index 946a89b51..9b3e71a66 100644 --- a/workflows/theiacov/wf_theiacov_ont.wdl +++ b/workflows/theiacov/wf_theiacov_ont.wdl @@ -35,7 +35,6 @@ workflow theiacov_ont { Int min_length = 400 Int min_depth = 20 # nextclade inputs - String? nextclade_dataset_reference String? nextclade_dataset_tag String? nextclade_dataset_name # reference values @@ -67,7 +66,6 @@ workflow theiacov_ont { reference_genome = reference_genome, gene_locations_bed_file = reference_gene_locations_bed, genome_length_input = genome_length, - nextclade_dataset_reference_input = nextclade_dataset_reference, nextclade_dataset_tag_input = nextclade_dataset_tag, nextclade_dataset_name_input = nextclade_dataset_name, vadr_max_length = vadr_max_length, @@ -164,7 +162,6 @@ workflow theiacov_ont { # including these to block from terra reference_genome = reference_genome, genome_length_input = genome_length, - nextclade_dataset_reference_input = nextclade_dataset_reference, nextclade_dataset_tag_input = nextclade_dataset_tag, nextclade_dataset_name_input = nextclade_dataset_name, vadr_max_length = vadr_max_length, @@ -182,7 +179,6 @@ workflow theiacov_ont { # including these to block from terra reference_genome = reference_genome, genome_length_input = genome_length, - nextclade_dataset_reference_input = nextclade_dataset_reference, nextclade_dataset_tag_input = nextclade_dataset_tag, nextclade_dataset_name_input = nextclade_dataset_name, vadr_max_length = vadr_max_length, @@ -236,26 +232,24 @@ workflow theiacov_ont { # run organism-specific typing if (organism_parameters.standardized_organism == "MPXV" || organism_parameters.standardized_organism == "sars-cov-2" || (organism_parameters.standardized_organism == "flu" && defined(irma.seg_ha_assembly) && ! defined(do_not_run_flu_ha_nextclade))) { # tasks specific to either MPXV, sars-cov-2, or flu - call nextclade_task.nextclade { + call nextclade_task.nextclade_v3 { input: genome_fasta = select_first([irma.seg_ha_assembly, consensus.consensus_seq]), dataset_name = select_first([set_flu_ha_nextclade_values.nextclade_dataset_name, organism_parameters.nextclade_dataset_name]), - dataset_reference = select_first([set_flu_ha_nextclade_values.nextclade_dataset_reference, organism_parameters.nextclade_dataset_reference]), dataset_tag = select_first([set_flu_ha_nextclade_values.nextclade_dataset_tag, organism_parameters.nextclade_dataset_tag]) } call nextclade_task.nextclade_output_parser { input: - nextclade_tsv = nextclade.nextclade_tsv, + nextclade_tsv = nextclade_v3.nextclade_tsv, organism = organism_parameters.standardized_organism } } if (organism_parameters.standardized_organism == "flu" && defined(irma.seg_na_assembly) && ! defined(do_not_run_flu_na_nextclade)) { # tasks specific to flu NA - run nextclade a second time - call nextclade_task.nextclade as nextclade_flu_na { + call nextclade_task.nextclade_v3 as nextclade_flu_na { input: genome_fasta = select_first([irma.seg_na_assembly]), dataset_name = select_first([set_flu_na_nextclade_values.nextclade_dataset_name, organism_parameters.nextclade_dataset_name]), - dataset_reference = select_first([set_flu_na_nextclade_values.nextclade_dataset_reference, organism_parameters.nextclade_dataset_reference]), dataset_tag = select_first([set_flu_na_nextclade_values.nextclade_dataset_tag, organism_parameters.nextclade_dataset_tag]) } call nextclade_task.nextclade_output_parser as nextclade_output_parser_flu_na { @@ -409,11 +403,11 @@ workflow theiacov_ont { String? pangolin_docker = pangolin4.pangolin_docker String? pangolin_versions = pangolin4.pangolin_versions # Nextclade outputs - String nextclade_json = select_first([nextclade.nextclade_json, ""]) - String auspice_json = select_first([ nextclade.auspice_json, ""]) - String nextclade_tsv = select_first([nextclade.nextclade_tsv, ""]) - String nextclade_version = select_first([nextclade.nextclade_version, ""]) - String nextclade_docker = select_first([nextclade.nextclade_docker, ""]) + String nextclade_json = select_first([nextclade_v3.nextclade_json, ""]) + String auspice_json = select_first([ nextclade_v3.auspice_json, ""]) + String nextclade_tsv = select_first([nextclade_v3.nextclade_tsv, ""]) + String nextclade_version = select_first([nextclade_v3.nextclade_version, ""]) + String nextclade_docker = select_first([nextclade_v3.nextclade_docker, ""]) String nextclade_ds_tag = select_first([ha_na_nextclade_ds_tag, set_flu_ha_nextclade_values.nextclade_dataset_tag, organism_parameters.nextclade_dataset_tag, ""]) String nextclade_aa_subs = select_first([ha_na_nextclade_aa_subs, nextclade_output_parser.nextclade_aa_subs, ""]) String nextclade_aa_dels = select_first([ha_na_nextclade_aa_dels, nextclade_output_parser.nextclade_aa_dels, ""]) diff --git a/workflows/utilities/wf_organism_parameters.wdl b/workflows/utilities/wf_organism_parameters.wdl index 1dd7db055..42edd6445 100644 --- a/workflows/utilities/wf_organism_parameters.wdl +++ b/workflows/utilities/wf_organism_parameters.wdl @@ -22,7 +22,6 @@ workflow organism_parameters { Int? genome_length_input # set default nextclade information as NA - String? nextclade_dataset_reference_input String? nextclade_dataset_tag_input String? nextclade_dataset_name_input @@ -41,9 +40,8 @@ workflow organism_parameters { String sc2_org_name = "sars-cov-2" String sc2_reference_genome = "gs://theiagen-public-files-rp/terra/augur-sars-cov-2-references/MN908947.fasta" String sc2_gene_locations_bed = "gs://theiagen-public-files-rp/terra/sars-cov-2-files/sc2_gene_locations.bed" - String sc2_nextclade_ds_tag = "2023-12-03T12:00:00Z" - String sc2_nextclade_ref = "MN908947" - String sc2_nextclade_ds_name = "sars-cov-2" + String sc2_nextclade_ds_tag = "2024-02-16--04-00-32Z" + String sc2_nextclade_ds_name = "nextstrain/sars-cov-2/wuhan-hu-1/orfs" String sc2_pangolin_docker = "us-docker.pkg.dev/general-theiagen/staphb/pangolin:4.3.1-pdata-1.26" Int sc2_genome_len = 29903 Int sc2_vadr_max_length = 30000 @@ -54,9 +52,8 @@ workflow organism_parameters { String mpox_org_name = "MPXV" String mpox_reference_genome = "gs://theiagen-public-files/terra/mpxv-files/MPXV.MT903345.reference.fasta" String mpox_gene_locations_bed = "gs://theiagen-public-files/terra/mpxv-files/mpox_gene_locations.bed" - String mpox_nextclade_ds_tag = "2023-08-01T12:00:00Z" - String mpox_nextclade_ref = "pseudo_ON563414" - String mpox_nextclade_ds_name = "hMPXV_B1" + String mpox_nextclade_ds_tag = "2024-01-16--20-31-02Z" + String mpox_nextclade_ds_name = "nextstrain/mpox/lineage-b.1" String mpox_kraken_target_organism = "Monkeypox virus" String mpox_primer_bed_file = "gs://theiagen-public-files/terra/mpxv-files/MPXV.primer.bed" String mpox_reference_gff_file = "gs://theiagen-public-files/terra/mpxv-files/Mpox-MT903345.1.reference.gff3" @@ -75,7 +72,6 @@ workflow organism_parameters { Int wnv_vadr_max_length = 11000 Int wnv_vadr_memory = 8 String wnv_nextclade_ds_tag = "NA" - String wnv_nextclade_ref = "NA" String wnv_nextclade_ds_name = "NA" } if (organism == "flu" || organism == "influenza" || organism == "Flu" || organism == "Influenza") { @@ -91,62 +87,53 @@ workflow organism_parameters { if (flu_segment == "HA") { if (flu_subtype == "H1N1") { String h1n1_ha_reference = "gs://theiagen-public-files-rp/terra/flu-references/reference_h1n1pdm_ha.fasta" - String h1n1_ha_nextclade_ds_tag = "2023-11-18T12:00:00Z" - String h1n1_ha_nextclade_ds_name = "flu_h1n1pdm_ha" - String h1n1_ha_nextclade_ref = "MW626062" + String h1n1_ha_nextclade_ds_tag = "2024-01-16--20-31-02Z" + String h1n1_ha_nextclade_ds_name = "nextstrain/flu/h1n1pdm/ha/MW626062" } if (flu_subtype == "H3N2") { String h3n2_ha_reference = "gs://theiagen-public-files-rp/terra/flu-references/reference_h3n2_ha.fasta" - String h3n2_ha_nextclade_ds_tag = "2023-11-18T12:00:00Z" - String h3n2_ha_nextclade_ds_name = "flu_h3n2_ha" - String h3n2_ha_nextclade_ref = "EPI1857216" + String h3n2_ha_nextclade_ds_tag = "2024-02-22--16-12-03Z" + String h3n2_ha_nextclade_ds_name = "nextstrain/flu/h3n2/ha/EPI1857216" } if (flu_subtype == "Victoria") { String vic_ha_reference = "gs://theiagen-public-files-rp/terra/flu-references/reference_vic_ha.fasta" - String vic_ha_nextclade_ds_tag = "2023-11-22T12:00:00Z" - String vic_ha_nextclade_ds_name = "flu_vic_ha" - String vic_ha_nextclade_ref = "KX058884" + String vic_ha_nextclade_ds_tag = "2024-01-16--20-31-02Z" + String vic_ha_nextclade_ds_name = "nextstrain/flu/vic/ha/KX058884" } if (flu_subtype == "Yamagata") { String yam_ha_reference = "gs://theiagen-public-files-rp/terra/flu-references/reference_yam_ha.fasta" - String yam_ha_nextclade_ds_tag = "2023-11-18T12:00:00Z" - String yam_ha_nextclade_ds_name = "flu_yam_ha" - String yam_ha_nextclade_ref = "JN993010" + String yam_ha_nextclade_ds_tag = "2024-01-30--16-34-55Z" + String yam_ha_nextclade_ds_name = "nextstrain/flu/yam/ha/JN993010" } } if (flu_segment == "NA") { if (flu_subtype == "H1N1") { String h1n1_na_reference = "gs://theiagen-public-files-rp/terra/flu-references/reference_h1n1pdm_na.fasta" - String h1n1_na_nextclade_ds_tag = "2023-11-18T12:00:00Z" - String h1n1_na_nextclade_ds_name = "flu_h1n1pdm_na" - String h1n1_na_nextclade_ref = "MW626056" + String h1n1_na_nextclade_ds_tag = "2024-01-16--20-31-02Z" + String h1n1_na_nextclade_ds_name = "nextstrain/flu/h1n1pdm/na/MW626056" } if (flu_subtype == "H3N2") { String h3n2_na_reference = "gs://theiagen-public-files-rp/terra/flu-references/reference_h3n2_na.fasta" - String h3n2_na_nextclade_ds_tag = "2023-11-18T12:00:00Z" - String h3n2_na_nextclade_ds_name = "flu_h3n2_na" - String h3n2_na_nextclade_ref = "EPI1857215" + String h3n2_na_nextclade_ds_tag = "2024-01-16--20-31-02Z" + String h3n2_na_nextclade_ds_name = "nextstrain/flu/h3n2/na/EPI1857215" } if (flu_subtype == "Victoria") { String vic_na_reference = "gs://theiagen-public-files-rp/terra/flu-references/reference_vic_na.fasta" - String vic_na_nextclade_ds_tag = "2023-11-18T12:00:00Z" - String vic_na_nextclade_ds_name = "flu_vic_na" - String vic_na_nextclade_ref = "CY073894" + String vic_na_nextclade_ds_tag = "2024-01-16--20-31-02Z" + String vic_na_nextclade_ds_name = "nextstrain/flu/vic/na/CY073894" } if (flu_subtype == "Yamagata") { String yam_na_reference = "gs://theiagen-public-files-rp/terra/flu-references/reference_yam_na.fasta" String yam_na_nextclade_ds_tag = "NA" String yam_na_nextclade_ds_name = "NA" - String yam_na_nextclade_ref = "NA" } } } if (organism == "rsv_a" || organism == "rsv-a" || organism == "RSV-A" || organism == "RSV_A") { String rsv_a_org_name = "rsv_a" String rsv_a_reference_genome = "gs://theiagen-public-files-rp/terra/rsv_references/reference_rsv_a.fasta" - String rsv_a_nextclade_ds_tag = "2023-02-03T12:00:00Z" - String rsv_a_nextclade_ref = "EPI_ISL_412866" - String rsv_a_nextclade_ds_name = "rsv_a" + String rsv_a_nextclade_ds_tag = "2024-01-29--10-29-43Z" + String rsv_a_nextclade_ds_name = "nextstrain/rsv/a/EPI_ISL_412866" Int rsv_a_genome_len = 16000 String rsv_a_vadr_options = "-r --mkey rsv --xnocomp" Int rsv_a_vadr_max_length = 15500 @@ -155,9 +142,8 @@ workflow organism_parameters { if (organism == "rsv_b" || organism == "rsv-b" || organism == "RSV-B" || organism == "RSV_B") { String rsv_b_org_name = "rsv_b" String rsv_b_reference_genome = "gs://theiagen-public-files-rp/terra/rsv_references/reference_rsv_b.fasta" - String rsv_b_nextclade_ds_tag = "2023-02-03T12:00:00Z" - String rsv_b_nextclade_ref = "EPI_ISL_1653999" - String rsv_b_nextclade_ds_name = "rsv_b" + String rsv_b_nextclade_ds_tag = "2024-01-29--10-29-43Z" + String rsv_b_nextclade_ds_name = "nextstrain/rsv/b/EPI_ISL_1653999" Int rsv_b_genome_len = 16000 String rsv_b_vadr_options = "-r --mkey rsv --xnocomp" Int rsv_b_vadr_max_length = 15500 @@ -191,7 +177,6 @@ workflow organism_parameters { Int genome_length = select_first([genome_length_input, sc2_genome_len, mpox_genome_len, wnv_genome_len, flu_genome_len, rsv_a_genome_len, rsv_b_genome_len, hiv_v1_genome_len, hiv_v2_genome_len]) # nextclade information String nextclade_dataset_tag = select_first([nextclade_dataset_tag_input, sc2_nextclade_ds_tag, mpox_nextclade_ds_tag, wnv_nextclade_ds_tag, h1n1_ha_nextclade_ds_tag, h3n2_ha_nextclade_ds_tag, vic_ha_nextclade_ds_tag, yam_ha_nextclade_ds_tag, h1n1_na_nextclade_ds_tag, h3n2_na_nextclade_ds_tag, vic_na_nextclade_ds_tag, yam_na_nextclade_ds_tag, rsv_a_nextclade_ds_tag, rsv_b_nextclade_ds_tag, "NA"]) - String nextclade_dataset_reference = select_first([nextclade_dataset_reference_input, sc2_nextclade_ref, mpox_nextclade_ref, wnv_nextclade_ref, h1n1_ha_nextclade_ref, h3n2_ha_nextclade_ref, vic_ha_nextclade_ref, yam_ha_nextclade_ref, h1n1_na_nextclade_ref, h3n2_na_nextclade_ref, vic_na_nextclade_ref, yam_na_nextclade_ref, rsv_a_nextclade_ref, rsv_b_nextclade_ref, "NA"]) String nextclade_dataset_name = select_first([nextclade_dataset_name_input, sc2_nextclade_ds_name, mpox_nextclade_ds_name, wnv_nextclade_ds_name, h1n1_ha_nextclade_ds_name, h3n2_ha_nextclade_ds_name, vic_ha_nextclade_ds_name, yam_ha_nextclade_ds_name, h1n1_na_nextclade_ds_name, h3n2_na_nextclade_ds_name, vic_na_nextclade_ds_name, yam_na_nextclade_ds_name, rsv_a_nextclade_ds_name, rsv_b_nextclade_ds_name, "NA"]) # pangolin options String pangolin_docker = select_first([pangolin_docker_image, sc2_pangolin_docker, ""])