Bs update gatk readme (#8)

* specified gatk version in Readme * Updated wdls to match dsde-pipeline version 2.1.0 * updated link to resource bundle * updated cromwell run version
gatk-workflows · Oct 7, 2020 · cc1bdd5 · cc1bdd5
1 parent adefba2
commit cc1bdd5
Show file tree

Hide file tree

Showing 8 changed files with 38 additions and 32 deletions.
diff --git a/README.md b/README.md
@@ -1,6 +1,8 @@
 # gatk4-genome-processing-pipeline
 Workflows used for germline processing in whole genome sequence data.
 
+*- This repo will soon be archived, its new repository location will be [broadinstitute/warp](https://github.com/broadinstitute/warp/tree/develop/pipelines/broad/dna_seq/germline/single_sample/wgs)*
+
 ### WholeGenomeGermlineSingleSample :
 This WDL pipeline implements data pre-processing and initial variant calling (GVCF
 generation) according to the GATK Best Practices (June 2016) for germline SNP and
@@ -24,20 +26,20 @@ Indel discovery in human whole-genome sequencing data.
 
 ### Software version requirements :
 - GATK 4.0.10.1
+  - The Haplotypecaller call provides the option to use GATK 3, which uses GATK 4.beta.5 for PrintReads and GATK 3.5 for Haplotypecaller. 
 - Picard 2.20.0-SNAPSHOT
 - Samtools 1.3.1
 - Python 2.7
 - Cromwell version support 
-  - Successfully tested on v51
-  - Does not work on versions < v23 due to output syntax
+  - Successfully tested on v53
 
 ### Important Notes :
 - The provided JSON is a generic ready to use example template for the workflow. It is the user’s responsibility to correctly set the reference and resource variables for their own particular test case using the [GATK Tool and Tutorial Documentations](https://gatk.broadinstitute.org/hc/en-us/categories/360002310591).
 - Runtime parameters are optimized for Broad's Google Cloud Platform implementation.
 - For help running workflows on the Google Cloud Platform or locally please
 view the following tutorial [(How to) Execute Workflows from the gatk-workflows Git Organization](https://gatk.broadinstitute.org/hc/en-us/articles/360035530952).
 - Please visit the [User Guide](https://gatk.broadinstitute.org/hc/en-us/categories/360002310591) site for further documentation on our workflows and tools.
-- Relevant reference and resources bundles can be accessed in [Resource Bundle](https://gatk.broadinstitute.org/hc/en-us/articles/360036212652).
+- Relevant reference and resources bundles can be accessed in [Resource Bundle](https://gatk.broadinstitute.org/hc/en-us/articles/360035890811).
 
 ### Contact Us :
 - The following material is provided by the Data Science Platforum group at the Broad Institute. Please direct any questions or concerns to one of our forum sites : [GATK](https://gatk.broadinstitute.org/hc/en-us/community/topics) or [Terra](https://support.terra.bio/hc/en-us/community/topics/360000500432).

diff --git a/WholeGenomeGermlineSingleSample.wdl b/WholeGenomeGermlineSingleSample.wdl
@@ -38,7 +38,7 @@ import "./structs/DNASeqStructs.wdl"
 # WORKFLOW DEFINITION
 workflow WholeGenomeGermlineSingleSample {
 
-  String pipeline_version = "2.0"
+  String pipeline_version = "2.1.0"
 
   input {
     SampleAndUnmappedBams sample_and_unmapped_bams
@@ -218,4 +218,7 @@ workflow WholeGenomeGermlineSingleSample {
     File output_vcf = BamToGvcf.output_vcf
     File output_vcf_index = BamToGvcf.output_vcf_index
   }
+  meta {
+    allowNestedInputs: true
+  }
 }
diff --git a/tasks/AggregatedBamQC.wdl b/tasks/AggregatedBamQC.wdl
@@ -106,4 +106,8 @@ input {
     File? fingerprint_summary_metrics = CheckFingerprint.summary_metrics
     File? fingerprint_detail_metrics = CheckFingerprint.detail_metrics
   }
+
+  meta {
+    allowNestedInputs: true
+  }
 }
diff --git a/tasks/Alignment.wdl b/tasks/Alignment.wdl
@@ -17,30 +17,11 @@ version 1.0
 
 import "../structs/DNASeqStructs.wdl"
 
-# Get version of BWA
-task GetBwaVersion {
-  command {
-    # not setting set -o pipefail here because /bwa has a rc=1 and we dont want to allow rc=1 to succeed because
-    # the sed may also fail with that error and that is something we actually want to fail on.
-    /usr/gitc/bwa 2>&1 | \
-    grep -e '^Version' | \
-    sed 's/Version: //'
-  }
-  runtime {
-    docker: "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.4.3-1564508330"
-    memory: "1 GiB"
-  }
-  output {
-    String bwa_version = read_string(stdout())
-  }
-}
-
 # Read unmapped BAM, convert on-the-fly to FASTQ and stream to BWA MEM for alignment, then stream to MergeBamAlignment
 task SamToFastqAndBwaMemAndMba {
   input {
     File input_bam
     String bwa_commandline
-    String bwa_version
     String output_bam_basename
 
     # reference_fasta.ref_alt is the .alt file from bwa-kit
@@ -62,9 +43,21 @@ task SamToFastqAndBwaMemAndMba {
   Int disk_size = ceil(unmapped_bam_size + bwa_ref_size + (disk_multiplier * unmapped_bam_size) + 20)
 
   command <<<
+
+
+    # This is done before "set -o pipefail" because "bwa" will have a rc=1 and we don't want to allow rc=1 to succeed
+    # because the sed may also fail with that error and that is something we actually want to fail on.
+    BWA_VERSION=$(/usr/gitc/bwa 2>&1 | \
+    grep -e '^Version' | \
+    sed 's/Version: //')
+
     set -o pipefail
     set -e
 
+    if [-z ${BWA_VERSION}]; then
+        exit 1;
+    fi
+
     # set the bash variable needed for the command-line
     bash_ref_fasta=~{reference_fasta.ref_fasta}
     # if reference_fasta.ref_alt has data in it,
@@ -99,7 +92,7 @@ task SamToFastqAndBwaMemAndMba {
         MAX_INSERTIONS_OR_DELETIONS=-1 \
         PRIMARY_ALIGNMENT_STRATEGY=MostDistant \
         PROGRAM_RECORD_ID="bwamem" \
-        PROGRAM_GROUP_VERSION="~{bwa_version}" \
+        PROGRAM_GROUP_VERSION="${BWA_VERSION}" \
         PROGRAM_GROUP_COMMAND_LINE="~{bwa_commandline}" \
         PROGRAM_GROUP_NAME="bwamem" \
         UNMAPPED_READ_STRATEGY=COPY_TO_TAG \

diff --git a/tasks/BamToCram.wdl b/tasks/BamToCram.wdl
@@ -63,5 +63,8 @@ workflow BamToCram {
      File output_cram_md5 = ConvertToCram.output_cram_md5
      File validate_cram_file_report = ValidateCram.report
   }
+  meta {
+    allowNestedInputs: true
+  }
 }
 
diff --git a/tasks/SplitLargeReadGroup.wdl b/tasks/SplitLargeReadGroup.wdl
@@ -26,7 +26,6 @@ workflow SplitLargeReadGroup {
     File input_bam
 
     String bwa_commandline
-    String bwa_version
     String output_bam_basename
 
     # reference_fasta.ref_alt is the .alt file from bwa-kit
@@ -58,7 +57,6 @@ workflow SplitLargeReadGroup {
         bwa_commandline = bwa_commandline,
         output_bam_basename = current_name,
         reference_fasta = reference_fasta,
-        bwa_version = bwa_version,
         compression_level = compression_level,
         preemptible_tries = preemptible_tries,
         hard_clip_reads = hard_clip_reads
@@ -84,4 +82,7 @@ workflow SplitLargeReadGroup {
   output {
     File aligned_bam = GatherMonolithicBamFile.output_bam
   }
+  meta {
+    allowNestedInputs: true
+  }
 }
diff --git a/tasks/UnmappedBamToAlignedBam.wdl b/tasks/UnmappedBamToAlignedBam.wdl
@@ -50,10 +50,6 @@ workflow UnmappedBamToAlignedBam {
 
   Int compression_level = 2
 
-  # Get the version of BWA to include in the PG record in the header of the BAM produced
-  # by MergeBamAlignment.
-  call Alignment.GetBwaVersion
-
   # Get the size of the standard reference files as well as the additional reference files needed for BWA
 
   # Align flowcell-level unmapped input bams in parallel
@@ -78,7 +74,6 @@ workflow UnmappedBamToAlignedBam {
         input:
           input_bam = unmapped_bam,
           bwa_commandline = bwa_commandline,
-          bwa_version = GetBwaVersion.bwa_version,
           output_bam_basename = unmapped_bam_basename + ".aligned.unsorted",
           reference_fasta = references.reference_fasta,
           compression_level = compression_level,
@@ -95,7 +90,6 @@ workflow UnmappedBamToAlignedBam {
           bwa_commandline = bwa_commandline,
           output_bam_basename = unmapped_bam_basename + ".aligned.unsorted",
           reference_fasta = references.reference_fasta,
-          bwa_version = GetBwaVersion.bwa_version,
           compression_level = compression_level,
           preemptible_tries = papi_settings.preemptible_tries,
           hard_clip_reads = hard_clip_reads
@@ -277,4 +271,7 @@ workflow UnmappedBamToAlignedBam {
     File output_bam = GatherBamFiles.output_bam
     File output_bam_index = GatherBamFiles.output_bam_index
   }
+  meta {
+    allowNestedInputs: true
+  }
 }
diff --git a/tasks/VariantCalling.wdl b/tasks/VariantCalling.wdl
@@ -153,6 +153,9 @@ workflow VariantCalling {
     File? bamout = MergeBamouts.output_bam
     File? bamout_index = MergeBamouts.output_bam_index
   }
+  meta {
+    allowNestedInputs: true
+  }
 }
 
 # This task is here because merging bamout files using Picard produces an error.