From f2300a9a5e1729cb5219cc5c7295e61d8035dc3b Mon Sep 17 00:00:00 2001
From: Nikhil Kumar <nikhilkumar516@gmail.com>
Date: Tue, 11 Apr 2023 10:37:18 -0400
Subject: [PATCH 1/3] Allow empty output from facets

---
 cwl/facets-workflow.cwl | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/cwl/facets-workflow.cwl b/cwl/facets-workflow.cwl
index ce3ab28..0811a7a 100644
--- a/cwl/facets-workflow.cwl
+++ b/cwl/facets-workflow.cwl
@@ -152,7 +152,7 @@ steps:
         purity_png:
           type:
             type: array
-            items: [File]
+            items: ['null', File]
         purity_seg:
           type:
             type: array
@@ -160,7 +160,7 @@ steps:
         hisens_png:
           type:
             type: array
-            items: [File]
+            items: ['null', File]
         hisens_seg:
           type:
             type: array
@@ -281,7 +281,7 @@ steps:
             purity_png:
               type:
                 type: array
-                items: [File]
+                items: ['null', File]
             purity_seg:
               type:
                 type: array
@@ -289,7 +289,7 @@ steps:
             hisens_png:
               type:
                 type: array
-                items: [File]
+                items: ['null', File]
             hisens_seg:
               type:
                 type: array
@@ -338,7 +338,7 @@ steps:
             purity_png:
               type:
                 type: array
-                items: [File]
+                items: ['null', File]
             purity_seg:
               type:
                 type: array
@@ -346,7 +346,7 @@ steps:
             hisens_png:
               type:
                 type: array
-                items: [File]
+                items: ['null', File]
             hisens_seg:
               type:
                 type: array

From 7929448ddcb3a9f39506a9ca4882a0127d663cf3 Mon Sep 17 00:00:00 2001
From: Nikhil Kumar <nikhilkumar516@gmail.com>
Date: Tue, 11 Apr 2023 10:37:41 -0400
Subject: [PATCH 2/3] More changes to allow empty output

---
 cwl/types.yml | 563 +++++++++++++++++++++++++-------------------------
 1 file changed, 280 insertions(+), 283 deletions(-)

diff --git a/cwl/types.yml b/cwl/types.yml
index 8fc277e..4bec47a 100644
--- a/cwl/types.yml
+++ b/cwl/types.yml
@@ -1,301 +1,298 @@
 class: SchemaDefRequirement
 types:
+  - name: FilloutMafOptionalSample
+    doc: A tumor sample record for variant fillout that might lack a .maf file but HAS a .bam file
+    type: record
+    fields:
+      - name: sample_id
+        type: string
+        doc: sample identifier must match the ID used inside the maf file
+      - name: normal_id
+        type: string
+      - name: sample_type
+        type: string
+        doc: should be one of "research" or "clinical"
+      - name: maf_file
+        type: File?
+        doc: an optional input .maf file
+      - name: bam_file
+        doc: sample's alignment file
+        type: File
+        secondaryFiles:
+          - .bai # Sample.bam.bai
 
-- name: FilloutMafOptionalSample
-  doc: A tumor sample record for variant fillout that might lack a .maf file but HAS a .bam file
-  type: record
-  fields:
-    - name: sample_id
-      type: string
-      doc: sample identifier must match the ID used inside the maf file
-    - name: normal_id
-      type: string
-    - name: sample_type
-      type: string
-      doc: should be one of "research" or "clinical"
-    - name: maf_file
-      type: File?
-      doc: an optional input .maf file
-    - name: bam_file
-      doc: sample's alignment file
-      type: File
-      secondaryFiles:
-        - .bai # Sample.bam.bai
+  - name: FilloutNoMafsample
+    doc: A tumor sample record for variant fillout does NOT have a .maf file
+    type: record
+    fields:
+      - name: sample_id
+        type: string
+        doc: sample identifier must match the ID used inside the maf file
+      - name: normal_id
+        type: string
+      - name: sample_type
+        type: string
+        doc: should be one of "research" or "clinical"
+      - name: bam_file
+        doc: sample's alignment file
+        type: File
+        secondaryFiles:
+          - .bai # Sample.bam.bai
+      # These files get added during downstream processing
+      - name: unfiltered_vcf
+        type: File?
+        doc: fillout vcf file produced for the sample with no filtering applied
+      - name: filtered_vcf
+        type: File?
+        doc: fillout vcf that has had extra filters applied such as germline filtering
+      - name: unfiltered_maf
+        type: File?
+        doc: fillout maf file produced for the sample
+      - name: filtered_maf
+        type: File?
+        doc: fillout maf that has had extra filters applied such as germline filtering
 
-- name: FilloutNoMafsample
-  doc: A tumor sample record for variant fillout does NOT have a .maf file
-  type: record
-  fields:
-    - name: sample_id
-      type: string
-      doc: sample identifier must match the ID used inside the maf file
-    - name: normal_id
-      type: string
-    - name: sample_type
-      type: string
-      doc: should be one of "research" or "clinical"
-    - name: bam_file
-      doc: sample's alignment file
-      type: File
-      secondaryFiles:
-        - .bai # Sample.bam.bai
-    # These files get added during downstream processing
-    - name: unfiltered_vcf
-      type: File?
-      doc: fillout vcf file produced for the sample with no filtering applied
-    - name: filtered_vcf
-      type: File?
-      doc: fillout vcf that has had extra filters applied such as germline filtering
-    - name: unfiltered_maf
-      type: File?
-      doc: fillout maf file produced for the sample
-    - name: filtered_maf
-      type: File?
-      doc: fillout maf that has had extra filters applied such as germline filtering
+  - name: FilloutSample
+    doc: A tumor sample record to be used for variant fillout which MUST have a .maf file and indexed .bam file
+    type: record
+    fields:
+      - name: sample_id
+        type: string
+        doc: sample identifier must match the ID used inside the maf file
+      - name: normal_id
+        type: string
+      - name: sample_type
+        type: string
+        doc: should be one of "research" or "clinical"
+      - name: maf_file
+        type: File
+        doc: the sample's input .maf file
+      - name: bam_file
+        doc: sample's alignment file
+        type: File
+        secondaryFiles:
+          - .bai # Sample.bam.bai
+          # - ^.bai # Sample.bai
+          # NOTE: need to figure out how to make .bai and .bam.bai both work at once;
+          # if I add them as separate entries here then the CWL requires that BOTH be present but we only have one
+      # These files get added during downstream processing
+      - name: unfiltered_vcf
+        type: File?
+        doc: fillout vcf file produced for the sample with no filtering applied
+      - name: filtered_vcf
+        type: File?
+        doc: fillout vcf that has had extra filters applied such as germline filtering
+      - name: unfiltered_maf
+        type: File?
+        doc: fillout maf file produced for the sample
+      - name: filtered_maf
+        type: File?
+        doc: fillout maf that has had extra filters applied such as germline filtering
 
-- name: FilloutSample
-  doc: A tumor sample record to be used for variant fillout which MUST have a .maf file and indexed .bam file
-  type: record
-  fields:
-    - name: sample_id
-      type: string
-      doc: sample identifier must match the ID used inside the maf file
-    - name: normal_id
-      type: string
-    - name: sample_type
-      type: string
-      doc: should be one of "research" or "clinical"
-    - name: maf_file
-      type: File
-      doc: the sample's input .maf file
-    - name: bam_file
-      doc: sample's alignment file
-      type: File
-      secondaryFiles:
-        - .bai # Sample.bam.bai
-        # - ^.bai # Sample.bai
-        # NOTE: need to figure out how to make .bai and .bam.bai both work at once;
-        # if I add them as separate entries here then the CWL requires that BOTH be present but we only have one
-    # These files get added during downstream processing
-    - name: unfiltered_vcf
-      type: File?
-      doc: fillout vcf file produced for the sample with no filtering applied
-    - name: filtered_vcf
-      type: File?
-      doc: fillout vcf that has had extra filters applied such as germline filtering
-    - name: unfiltered_maf
-      type: File?
-      doc: fillout maf file produced for the sample
-    - name: filtered_maf
-      type: File?
-      doc: fillout maf that has had extra filters applied such as germline filtering
+  # TODO: rename this to "FilloutNoIndexSample"
+  - name: FilloutIndexSample
+    doc: A FilloutSample needs .bam indexing and prefiltering applied
+    type: record
+    fields:
+      - name: sample_id
+        type: string
+        doc: sample identifier must match the ID used inside the maf file
+      - name: normal_id
+        type: string
+      - name: sample_type
+        type: string
+        doc: should be one of "research" or "clinical"
+      - name: prefilter
+        type: boolean
+        doc: if the sample maf file needs to be pre-filtered (true) or not (false)
+      - name: maf_file
+        type: File
+        doc: the sample's input .maf file
+      - name: bam_file
+        type: File
+        doc: bam file that needs a .bai file generated
 
-# TODO: rename this to "FilloutNoIndexSample"
-- name: FilloutIndexSample
-  doc: A FilloutSample needs .bam indexing and prefiltering applied
-  type: record
-  fields:
-    - name: sample_id
-      type: string
-      doc: sample identifier must match the ID used inside the maf file
-    - name: normal_id
-      type: string
-    - name: sample_type
-      type: string
-      doc: should be one of "research" or "clinical"
-    - name: prefilter
-      type: boolean
-      doc: if the sample maf file needs to be pre-filtered (true) or not (false)
-    - name: maf_file
-      type: File
-      doc: the sample's input .maf file
-    - name: bam_file
-      type: File
-      doc: bam file that needs a .bai file generated
+  - name: FilloutMafOptionalNoIndexSample
+    doc: A tumor sample record for variant fillout that might lack a .maf file but HAS a .bam file without a .bai index file
+    type: record
+    fields:
+      - name: sample_id
+        type: string
+        doc: sample identifier must match the ID used inside the maf file
+      - name: normal_id
+        type: string
+      - name: sample_type
+        type: string
+        doc: should be one of "research" or "clinical"
+      - name: maf_file
+        type: File?
+        doc: an optional input .maf file
+      - name: bam_file
+        doc: sample's alignment file
+        type: File
 
-- name: FilloutMafOptionalNoIndexSample
-  doc: A tumor sample record for variant fillout that might lack a .maf file but HAS a .bam file without a .bai index file
-  type: record
-  fields:
-    - name: sample_id
-      type: string
-      doc: sample identifier must match the ID used inside the maf file
-    - name: normal_id
-      type: string
-    - name: sample_type
-      type: string
-      doc: should be one of "research" or "clinical"
-    - name: maf_file
-      type: File?
-      doc: an optional input .maf file
-    - name: bam_file
-      doc: sample's alignment file
-      type: File
-
-- name: FilloutMafOptionalIndexedSample
-  doc: A tumor sample record for variant fillout that might lack a .maf file but HAS a .bam file and a .bai index file
-  type: record
-  fields:
-    - name: sample_id
-      type: string
-      doc: sample identifier must match the ID used inside the maf file
-    - name: normal_id
-      type: string
-    - name: sample_type
-      type: string
-      doc: should be one of "research" or "clinical"
-    - name: maf_file
-      type: File?
-      doc: an optional input .maf file
-    - name: bam_file
-      doc: sample's alignment file
-      type: File
-      secondaryFiles:
-        - .bai
-
-- name: FilloutIndexedSample
-  doc: A fillout sample that HAS a pre-filtered .maf file and HAS an indexed .bam file
-  type: record
-  fields:
-    - name: sample_id
-      type: string
-      doc: sample identifier must match the ID used inside the maf file
-    - name: normal_id
-      type: string
-    - name: sample_type
-      type: string
-      doc: should be one of "research" or "clinical"
-    - name: prefilter
-      type: boolean
-      doc: if the sample maf file needs to be pre-filtered (true) or not (false)
-    - name: maf_file
-      type: File
-      doc: the sample's input .maf file
-    - name: bam_file
-      type: File
-      doc: bam file that needs a .bai file generated
-      secondaryFiles:
+  - name: FilloutMafOptionalIndexedSample
+    doc: A tumor sample record for variant fillout that might lack a .maf file but HAS a .bam file and a .bai index file
+    type: record
+    fields:
+      - name: sample_id
+        type: string
+        doc: sample identifier must match the ID used inside the maf file
+      - name: normal_id
+        type: string
+      - name: sample_type
+        type: string
+        doc: should be one of "research" or "clinical"
+      - name: maf_file
+        type: File?
+        doc: an optional input .maf file
+      - name: bam_file
+        doc: sample's alignment file
+        type: File
+        secondaryFiles:
           - .bai
 
-- name: TNMafPileupPair
-  doc: a tumor normal sample pair with .maf and snp pileup files
-  type: record
-  fields:
-    - name: tumor_id
-      type: string
-    - name: normal_id
-      type: string
-    - name: pair_id
-      type: string
-    - name: snp_pileup
-      type: File
-    - name: pair_maf
-      type: File
-
-- name: TMBInputPair
-  doc: a tumor normal pair for input to Tumor Mutation Burden analysis
-  type: record
-  fields:
-    - name: tumor_id
-      type: string
-    - name: normal_id
-      type: string
-    - name: pair_id
-      type: string
-    - name: pair_maf
-      type: File
+  - name: FilloutIndexedSample
+    doc: A fillout sample that HAS a pre-filtered .maf file and HAS an indexed .bam file
+    type: record
+    fields:
+      - name: sample_id
+        type: string
+        doc: sample identifier must match the ID used inside the maf file
+      - name: normal_id
+        type: string
+      - name: sample_type
+        type: string
+        doc: should be one of "research" or "clinical"
+      - name: prefilter
+        type: boolean
+        doc: if the sample maf file needs to be pre-filtered (true) or not (false)
+      - name: maf_file
+        type: File
+        doc: the sample's input .maf file
+      - name: bam_file
+        type: File
+        doc: bam file that needs a .bai file generated
+        secondaryFiles:
+          - .bai
 
-- name: TMBOutputPair
-  doc: a tumor normal pair for input to Tumor Mutation Burden analysis
-  type: record
-  fields:
-    - name: tumor_id
-      type: string
-    - name: normal_id
-      type: string
-    - name: pair_id
-      type: string
-    - name: tmb_maf
-      type: File
-      doc: filtered mutations used for TMB calculation
-    - name: tmb_tsv
-      type: File
-      doc: table with TMB values for the sample pair
+  - name: TNMafPileupPair
+    doc: a tumor normal sample pair with .maf and snp pileup files
+    type: record
+    fields:
+      - name: tumor_id
+        type: string
+      - name: normal_id
+        type: string
+      - name: pair_id
+        type: string
+      - name: snp_pileup
+        type: File
+      - name: pair_maf
+        type: File
 
-- name: MSIInputPair
-  doc: a tumor normal sample pair to be used for microsatellite instability analysis
-  type: record
-  fields:
-    - name: tumor_id
-      type: string
-    - name: normal_id
-      type: string
-    - name: pair_id
-      type: string
+  - name: TMBInputPair
+    doc: a tumor normal pair for input to Tumor Mutation Burden analysis
+    type: record
+    fields:
+      - name: tumor_id
+        type: string
+      - name: normal_id
+        type: string
+      - name: pair_id
+        type: string
+      - name: pair_maf
+        type: File
 
-- name: MSIOutputPair
-  doc: a tumor normal sample pair that has undergone microsatellite instability analysis
-  type: record
-  fields:
-    - name: tumor_id
-      type: string
-    - name: normal_id
-      type: string
-    - name: pair_id
-      type: string
-    - name: msi_tsv
-      doc: parsed output from msisensor
-      type: File
+  - name: TMBOutputPair
+    doc: a tumor normal pair for input to Tumor Mutation Burden analysis
+    type: record
+    fields:
+      - name: tumor_id
+        type: string
+      - name: normal_id
+        type: string
+      - name: pair_id
+        type: string
+      - name: tmb_maf
+        type: File
+        doc: filtered mutations used for TMB calculation
+      - name: tmb_tsv
+        type: File
+        doc: table with TMB values for the sample pair
 
-- name: FacetsPair
-  doc: a tumor normal sample pair used in Facets
-  type: record
-  fields:
-    - name: tumor_id
-      type: string
-      doc: ...
-    - name: normal_id
-      type: string
-    - name: pair_id
-      type: string
-    - name: purity_png
-      type: File
-      doc: ...
-    - name: purity_seg
-      type: File
-      doc: Tumor1.Normal1_purity.seg
-    - name: hisens_png
-      type: File
-      doc: ...
-    - name: hisens_seg
-      type: File
-      doc: Tumor1.Normal1_hisens.seg
-    - name: qc_txt
-      type: File
-      doc: Tumor1.Normal1.qc.txt
-    - name: gene_level_txt
-      type: File
-      doc: Tumor1.Normal1.gene_level.txt
-    - name: arm_level_txt
-      type: File
-      doc: Tumor2.Normal2.arm_level.txt
-    - name: facets_txt
-      type: File
-      doc: Tumor1.Normal1.txt
-    - name: purity_rds
-      type: File
-      doc: Tumor1.Normal1_purity.rds
-    - name: hisens_rds
-      type: File
-      doc: Tumor1.Normal1_hisens.rds
-    - name: annotated_maf
-      type: File
-      doc: Tumor1.Normal1_hisens.ccf.maf
-    - name: hisens_cncf_txt
-      type: File
-      doc: Tumor1.Normal1_hisens.cncf.txt (from legacy facets output)
+  - name: MSIInputPair
+    doc: a tumor normal sample pair to be used for microsatellite instability analysis
+    type: record
+    fields:
+      - name: tumor_id
+        type: string
+      - name: normal_id
+        type: string
+      - name: pair_id
+        type: string
 
+  - name: MSIOutputPair
+    doc: a tumor normal sample pair that has undergone microsatellite instability analysis
+    type: record
+    fields:
+      - name: tumor_id
+        type: string
+      - name: normal_id
+        type: string
+      - name: pair_id
+        type: string
+      - name: msi_tsv
+        doc: parsed output from msisensor
+        type: File
 
+  - name: FacetsPair
+    doc: a tumor normal sample pair used in Facets
+    type: record
+    fields:
+      - name: tumor_id
+        type: string
+        doc: ...
+      - name: normal_id
+        type: string
+      - name: pair_id
+        type: string
+      - name: purity_png
+        type: File?
+        doc: ...
+      - name: purity_seg
+        type: File?
+        doc: Tumor1.Normal1_purity.seg
+      - name: hisens_png
+        type: File?
+        doc: ...
+      - name: hisens_seg
+        type: File?
+        doc: Tumor1.Normal1_hisens.seg
+      - name: qc_txt
+        type: File?
+        doc: Tumor1.Normal1.qc.txt
+      - name: gene_level_txt
+        type: File?
+        doc: Tumor1.Normal1.gene_level.txt
+      - name: arm_level_txt
+        type: File?
+        doc: Tumor2.Normal2.arm_level.txt
+      - name: facets_txt
+        type: File?
+        doc: Tumor1.Normal1.txt
+      - name: purity_rds
+        type: File?
+        doc: Tumor1.Normal1_purity.rds
+      - name: hisens_rds
+        type: File?
+        doc: Tumor1.Normal1_hisens.rds
+      - name: annotated_maf
+        type: File?
+        doc: Tumor1.Normal1_hisens.ccf.maf
+      - name: hisens_cncf_txt
+        type: File?
+        doc: Tumor1.Normal1_hisens.cncf.txt (from legacy facets output)
 # TODO: get this working; right now only record types are supported here not File types
 # - name: RefFasta
 #   doc: reference genome .fasta file

From 035dd7803a5ee894e741643481b342003bd19201 Mon Sep 17 00:00:00 2001
From: Nikhil Kumar <nikhilkumar516@gmail.com>
Date: Tue, 11 Apr 2023 10:38:03 -0400
Subject: [PATCH 3/3] Filter out failed runs from post-facets processing

---
 cwl/workflow_with_facets.cwl | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/cwl/workflow_with_facets.cwl b/cwl/workflow_with_facets.cwl
index cd846d8..b4210fa 100644
--- a/cwl/workflow_with_facets.cwl
+++ b/cwl/workflow_with_facets.cwl
@@ -334,6 +334,11 @@ steps:
             hisens_segs.push(inputs.pairs[i].hisens_seg)
           };
 
+          annotated_mafs = annotated_mafs.filter(function(elem){return elem; });
+          facets_txts = facets_txts.filter(function(elem){return elem; });
+          hisens_cncf_txts = hisens_cncf_txts.filter(function(elem){return elem; });
+          hisens_segs = hisens_segs.filter(function(elem){return elem; });
+
           return {
             "annotated_mafs": annotated_mafs,
             "facets_txts": facets_txts,