Skip to content

Commit

Permalink
refactor: determine taxaID, fast ani ica CDCgov#149
Browse files Browse the repository at this point in the history
  • Loading branch information
slsevilla committed Apr 3, 2024
1 parent 5ab72ca commit fd2463a
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 35 deletions.
9 changes: 3 additions & 6 deletions modules/local/determine_taxa_id.nf
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,17 @@ process DETERMINE_TAXA_ID {
path("versions.yml") , emit: versions

script: // This script is bundled with the pipeline, in cdcgov/phoenix/bin/
// Adding if/else for if running on ICA it is a requirement to state where the script is, however, this causes CLI users to not run the pipeline from any directory.
if (params.ica==false) { ica = "" }
else if (params.ica==true) { ica = "bash ${workflow.launchDir}/bin/" }
else { error "Please set params.ica to either \"true\" if running on ICA or \"false\" for all other methods." }
// define variables
def prefix = task.ext.prefix ?: "${meta.id}"
// -r needs to be last as in -entry SCAFFOLDS/CDC_SCAFFOLDS k2_bh_summary is not passed so its a blank argument
def k2_bh_file = k2_bh_summary ? "-r $k2_bh_summary" : ""
def container_version = "base_v2.1.0"
def container = task.container.toString() - "quay.io/jvhagey/phoenix@"
def script = params.ica ? "${params.ica_path}/determine_taxID.sh" : "determine_taxID.sh"
"""
${ica}determine_taxID.sh -k $kraken_weighted -s $meta.id -f $formatted_ani_file -d $nodes_file -m $names_file $k2_bh_file
${script} -k $kraken_weighted -s $meta.id -f $formatted_ani_file -d $nodes_file -m $names_file $k2_bh_file
script_version=\$(${ica}determine_taxID.sh -V)
script_version=\$(${script} -V)
cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
14 changes: 4 additions & 10 deletions modules/local/format_ANI_best_hit.nf
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,12 @@ process FORMAT_ANI {
path("versions.yml"), emit: versions

script: // This script is bundled with the pipeline, in cdcgov/phoenix/bin/
// terra=true sets paths for bc/wget for terra container paths
if (params.terra==false) { terra = ""}
else if (params.terra==true) { terra = "-t terra" }
else { error "Please set params.terra to either \"true\" or \"false\"" }
// Adding if/else for if running on ICA it is a requirement to state where the script is, however, this causes CLI users to not run the pipeline from any directory.
if (params.ica==false) { ica = "" }
else if (params.ica==true) { ica = "bash ${workflow.launchDir}/bin/" }
else { error "Please set params.ica to either \"true\" if running on ICA or \"false\" for all other methods." }
// define variables
def prefix = task.ext.prefix ?: "${meta.id}"
def container_version = "base_v2.1.0"
def container = task.container.toString() - "quay.io/jvhagey/phoenix@"
def script = params.ica ? "${params.ica_path}/ANI_best_hit_formatter.sh" : "ANI_best_hit_formatter.sh"
def terra = params.terra ? "-t terra" : ""
"""
line=\$(head -n1 ${ani_file})
if [[ "\${line}" == "Mash/FastANI Error:"* ]]; then
Expand All @@ -35,10 +29,10 @@ process FORMAT_ANI {
db_version="REFSEQ_unknown"
fi
# script also checks that match is 80 or > otherwise an error is thrown
${ica}ANI_best_hit_formatter.sh -a ${ani_file} -n ${prefix} -d \${db_version} ${terra}
${script} -a ${ani_file} -n ${prefix} -d \${db_version} ${terra}
fi
script_version=\$(${ica}ANI_best_hit_formatter.sh -V)
script_version=\$(${script} -V)
cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
38 changes: 19 additions & 19 deletions workflows/phoenix.nf
Original file line number Diff line number Diff line change
Expand Up @@ -296,28 +296,28 @@ workflow PHOENIX_EXTERNAL {
.join(DETERMINE_TOP_MASH_HITS.out.top_taxa_list.map{ meta, top_taxa_list -> [[id:meta.id], top_taxa_list ]}, by: [0])\
.join(DETERMINE_TOP_MASH_HITS.out.reference_dir.map{ meta, reference_dir -> [[id:meta.id], reference_dir ]}, by: [0])

// // Getting species ID
// FASTANI (
// top_taxa_list_ch
// )
// ch_versions = ch_versions.mix(FASTANI.out.versions)
// Getting species ID
FASTANI (
top_taxa_list_ch
)
ch_versions = ch_versions.mix(FASTANI.out.versions)

// // Reformat ANI headers
// FORMAT_ANI (
// FASTANI.out.ani
// )
// ch_versions = ch_versions.mix(FORMAT_ANI.out.versions)
// Reformat ANI headers
FORMAT_ANI (
FASTANI.out.ani
)
ch_versions = ch_versions.mix(FORMAT_ANI.out.versions)

// // Combining weighted kraken report with the FastANI hit based on meta.id
// best_hit_ch = KRAKEN2_WTASMBLD.out.k2_bh_summary.map{meta, k2_bh_summary -> [[id:meta.id], k2_bh_summary]}\
// .join(FORMAT_ANI.out.ani_best_hit.map{ meta, ani_best_hit -> [[id:meta.id], ani_best_hit ]}, by: [0])\
// .join(KRAKEN2_TRIMD.out.k2_bh_summary.map{ meta, k2_bh_summary -> [[id:meta.id], k2_bh_summary ]}, by: [0])
// Combining weighted kraken report with the FastANI hit based on meta.id
best_hit_ch = KRAKEN2_WTASMBLD.out.k2_bh_summary.map{meta, k2_bh_summary -> [[id:meta.id], k2_bh_summary]}\
.join(FORMAT_ANI.out.ani_best_hit.map{ meta, ani_best_hit -> [[id:meta.id], ani_best_hit ]}, by: [0])\
.join(KRAKEN2_TRIMD.out.k2_bh_summary.map{ meta, k2_bh_summary -> [[id:meta.id], k2_bh_summary ]}, by: [0])

// // Getting ID from either FastANI or if fails, from Kraken2
// DETERMINE_TAXA_ID (
// best_hit_ch, params.nodes, params.names
// )
// ch_versions = ch_versions.mix(DETERMINE_TAXA_ID.out.versions)
// Getting ID from either FastANI or if fails, from Kraken2
DETERMINE_TAXA_ID (
best_hit_ch, params.nodes, params.names
)
ch_versions = ch_versions.mix(DETERMINE_TAXA_ID.out.versions)

// // Perform MLST steps on isolates (with srst2 on internal samples)
// DO_MLST (
Expand Down

0 comments on commit fd2463a

Please sign in to comment.