From a82e2f139d09d853125cb0190d9b22e41446c425 Mon Sep 17 00:00:00 2001
From: Stephen Kelly <stevekm@users.noreply.github.com>
Date: Tue, 25 Jan 2022 17:45:57 -0500
Subject: [PATCH 1/4] refactor samples fillout workflow to use array of sample
 records

---
 cwl/samples_fillout_workflow.cwl           | 74 ++++++++++++++++------
 tests/test_samples_fillout_workflow_cwl.py | 33 ++++++----
 2 files changed, 77 insertions(+), 30 deletions(-)

diff --git a/cwl/samples_fillout_workflow.cwl b/cwl/samples_fillout_workflow.cwl
index a4304c1..7793d8b 100644
--- a/cwl/samples_fillout_workflow.cwl
+++ b/cwl/samples_fillout_workflow.cwl
@@ -13,24 +13,24 @@ requirements:
 
 inputs:
   # NOTE: arrays for sample_ids, bam_files, maf_files must all be the same length and in the same order by sample
-  
+  samples:
+    type:
+      type: array
+      items:
+        type: record
+        fields:
+          maf_file: File
+          sample_id: string # must match sample ID used inside maf file
+          normal_id: string
   output_fname:
     type: [ 'null', string ]
     default: "output.maf"
-  sample_ids:
-    type:
-        type: array
-        items: string
   bam_files:
     type:
         type: array
         items: File
     secondaryFiles:
         - ^.bai
-  maf_files:
-    type:
-        type: array
-        items: File
   ref_fasta:
     type: File
     secondaryFiles:
@@ -54,14 +54,42 @@ inputs:
   #     path: /juno/work/ci/resources/vep/cache
 
 steps:
+  # create a list of just sample_ids out of the samples record array
+  create_samples_list:
+    in:
+      samples: samples
+    out: [ sample_ids ]
+    run:
+      class: ExpressionTool
+      inputs:
+        samples:
+          type:
+            type: array
+            items:
+              type: record
+              fields:
+                sample_id: string
+      outputs:
+        sample_ids: string[]
+      expression: "${
+        var sample_ids = [];
+        for ( var i in inputs.samples ){
+            sample_ids.push(inputs.samples[i]['sample_id']);
+          };
+        return {'sample_ids': sample_ids};
+        }"
+
+
   # convert all maf input files back to vcf because they are much easier to manipulate that way
   # NOTE: This is important; do NOT try to do these complex manipulations on maf format file
   maf2vcf:
-    scatter: [ sample_id, maf_file ]
-    scatterMethod: dotproduct
+    scatter: sample
     in:
-      sample_id: sample_ids
-      maf_file: maf_files
+      sample: samples
+      sample_id:
+        valueFrom: ${ return inputs.sample['sample_id']; }
+      maf_file:
+        valueFrom: ${ return inputs.sample['maf_file']; }
       ref_fasta: ref_fasta
     out:
       [ output_file ]
@@ -116,7 +144,7 @@ steps:
   # this will be used as the target regions for fillout
   merge_vcfs:
     in:
-      sample_ids: sample_ids
+      sample_ids: create_samples_list/sample_ids
       vcf_gz_files: maf2vcf/output_file
     out:
       [ merged_vcf, merged_vcf_gz ]
@@ -163,9 +191,11 @@ steps:
             - .tbi
 
   # run GetBaseCountsMultiSample on all the bam files against the target regions (the merged vcf from all samples)
+  # TODO: convert this to a `scatter` step that runs per-sample in parallel, then merge the outputs
+  # otherwise we will hit the command line arg length issues
   gbcms:
     in:
-      sample_ids: sample_ids
+      sample_ids: create_samples_list/sample_ids
       bam_files: bam_files
       targets_vcf: merge_vcfs/merged_vcf
       ref_fasta: ref_fasta
@@ -227,7 +257,6 @@ steps:
   # also we are going to add a column called SRC telling the source (which sample) each variant was originally found in
   fix_labels_and_merge_vcfs:
     in:
-      sample_ids: sample_ids
       fillout_vcf: gbcms/output_file
       merged_vcf: merge_vcfs/merged_vcf
       merged_vcf_gz: merge_vcfs/merged_vcf_gz
@@ -316,9 +345,13 @@ steps:
 
   # next we need to split apart the merged fillout vcf back into individual sample maf files
   split_vcf_to_mafs:
-    scatter: [ sample_id ]
+    scatter: sample
     in:
-      sample_id: sample_ids
+      sample: samples
+      sample_id:
+        valueFrom: ${ return inputs.sample['sample_id']; }
+      normal_id:
+        valueFrom: ${ return inputs.sample['normal_id']; }
       fillout_vcf: fix_labels_and_merge_vcfs/fillout_sources_vcf
       ref_fasta: ref_fasta
       exac_filter: exac_filter
@@ -338,6 +371,7 @@ steps:
                 set -eu
                 # convert the multi-sample annotated fillout vcf back into individual sample maf files
                 sample_id="${ return inputs.sample_id ; }"
+                normal_id="${ return inputs.normal_id ; }"
                 ref_fasta="${ return inputs.ref_fasta.path ; }"
                 input_vcf="${ return inputs.fillout_vcf.path ; }"
                 exac_filter="${ return inputs.exac_filter.path ; }"
@@ -364,9 +398,11 @@ steps:
                 --retain-fmt GT,FL_AD,FL_ADN,FL_ADP,FL_DP,FL_DPN,FL_DPP,FL_RD,FL_RDN,FL_RDP,FL_VF,AD,DP \\
                 --vep-forks 8 \\
                 --vcf-tumor-id "\${sample_id}" \\
-                --tumor-id "\${sample_id}"
+                --tumor-id "\${sample_id}" \\
+                --normal-id "\${normal_id}"
       inputs:
         sample_id: string
+        normal_id: string
         ref_fasta:
           type: File
           secondaryFiles:
diff --git a/tests/test_samples_fillout_workflow_cwl.py b/tests/test_samples_fillout_workflow_cwl.py
index 467ab3e..8475f0a 100644
--- a/tests/test_samples_fillout_workflow_cwl.py
+++ b/tests/test_samples_fillout_workflow_cwl.py
@@ -117,17 +117,28 @@ def test_run_fillout_workflow(self):
         Test case for running the fillout workflow on a number of samples, each with a bam and maf
         """
         self.maxDiff = None
+        # self.runner_args['debug'] = True
+        # self.runner_args['js_console'] = True
+        # self.preserve = True
+        # print(self.tmpdir)
 
         self.input = {
+            "samples": [
+                {
+                    "sample_id": "Sample24",
+                    "normal_id": "Sample24-N",
+                    "maf_file": { "class": "File", "path": self.maf1 }
+                },
+                {
+                    "sample_id": "Sample23",
+                    "normal_id": "Sample23-N",
+                    "maf_file": { "class": "File", "path": self.maf2 }
+                },
+            ],
             "ref_fasta": {"class": "File", "path": self.DATA_SETS['Proj_08390_G']['REF_FASTA']},
-            "sample_ids": ["Sample24", "Sample23"],
             "bam_files": [
                 { "class": "File", "path": os.path.join(self.DATA_SETS['Proj_08390_G']['BAM_DIR'], "Sample24.rg.md.abra.printreads.bam") },
                 { "class": "File", "path": os.path.join(self.DATA_SETS['Proj_08390_G']['BAM_DIR'], "Sample23.rg.md.abra.printreads.bam") }
-            ],
-            "maf_files": [
-                { "class": "File", "path": self.maf1 },
-                { "class": "File", "path": self.maf2 }
             ]
         }
 
@@ -138,8 +149,8 @@ def test_run_fillout_workflow(self):
                 'location': 'file://' + os.path.join(output_dir,'output.maf'),
                 'basename': 'output.maf',
                 'class': 'File',
-                'checksum': 'sha1$45719b33d5de77f789b43a2f66f1bf0a5d039cbd',
-                'size': 7992,
+                'checksum': 'sha1$7932ae9938a5686f6328f143a6c82308877cb822',
+                'size': 8008,
                 'path':  os.path.join(output_dir,'output.maf')
                 }
             }
@@ -154,13 +165,13 @@ def test_run_fillout_workflow(self):
         self.assertTrue(len(records) == 4)
 
         expected_records = [
-        {'Hugo_Symbol': 'KMT2C', 'Entrez_Gene_Id': '58508', 'Center': '.', 'NCBI_Build': 'GRCh37', 'Chromosome': '7', 'Start_Position': '151845367', 'End_Position': '151845367', 'Strand': '+', 'Variant_Classification': 'Missense_Mutation', 'Variant_Type': 'SNP', 'Reference_Allele': 'G', 'Tumor_Seq_Allele1': 'G', 'Tumor_Seq_Allele2': 'A', 'dbSNP_RS': '', 'dbSNP_Val_Status': '', 'Tumor_Sample_Barcode': 'Sample24', 'Matched_Norm_Sample_Barcode': 'NORMAL', 'Match_Norm_Seq_Allele1': 'G', 'Match_Norm_Seq_Allele2': 'G', 'Tumor_Validation_Allele1': '', 'Tumor_Validation_Allele2': '', 'Match_Norm_Validation_Allele1': '', 'Match_Norm_Validation_Allele2': '', 'Verification_Status': '', 'Validation_Status': '', 'Mutation_Status': '', 'Sequencing_Phase': '', 'Sequence_Source': '', 'Validation_Method': '', 'Score': '', 'BAM_File': '', 'Sequencer': '', 'Tumor_Sample_UUID': '', 'Matched_Norm_Sample_UUID': '', 'HGVSc': 'c.13645C>T', 'HGVSp': 'p.Arg4549Cys', 'HGVSp_Short': 'p.R4549C', 'Transcript_ID': 'ENST00000262189', 'Exon_Number': '52/59', 't_depth': '72', 't_ref_count': '68', 't_alt_count': '4', 'n_depth': '', 'n_ref_count': '', 'n_alt_count': '', 'all_effects': 'KMT2C,missense_variant,p.Arg4606Cys,ENST00000355193,;KMT2C,missense_variant,p.Arg4549Cys,ENST00000262189,NM_170606.2;KMT2C,missense_variant,p.Arg2110Cys,ENST00000360104,;KMT2C,missense_variant,p.Arg1166Cys,ENST00000424877,;KMT2C,downstream_gene_variant,,ENST00000418061,;KMT2C,downstream_gene_variant,,ENST00000485241,;KMT2C,3_prime_UTR_variant,,ENST00000558084,;KMT2C,non_coding_transcript_exon_variant,,ENST00000473186,;', 'Allele': 'A', 'Gene': 'ENSG00000055609', 'Feature': 'ENST00000262189', 'Feature_type': 'Transcript', 'Consequence': 'missense_variant', 'cDNA_position': '13864/16862', 'CDS_position': '13645/14736', 'Protein_position': '4549/4911', 'Amino_acids': 'R/C', 'Codons': 'Cgc/Tgc', 'Existing_variation': 'COSM245709,COSM245710', 'ALLELE_NUM': '1', 'DISTANCE': '', 'STRAND_VEP': '-1', 'SYMBOL': 'KMT2C', 'SYMBOL_SOURCE': 'HGNC', 'HGNC_ID': '13726', 'BIOTYPE': 'protein_coding', 'CANONICAL': 'YES', 'CCDS': 'CCDS5931.1', 'ENSP': 'ENSP00000262189', 'SWISSPROT': 'Q8NEZ4', 'TREMBL': 'Q6N019,Q75MN6,H0YMU7', 'UNIPARC': 'UPI0000141B9F', 'RefSeq': 'NM_170606.2', 'SIFT': '', 'PolyPhen': 'probably_damaging(0.999)', 'EXON': '52/59', 'INTRON': '', 'DOMAINS': 'PROSITE_profiles:PS51542,hmmpanther:PTHR22884,hmmpanther:PTHR22884:SF305', 'AF': '', 'AFR_AF': '', 'AMR_AF': '', 'ASN_AF': '', 'EAS_AF': '', 'EUR_AF': '', 'SAS_AF': '', 'AA_AF': '', 'EA_AF': '', 'CLIN_SIG': '', 'SOMATIC': '1,1', 'PUBMED': '', 'MOTIF_NAME': '', 'MOTIF_POS': '', 'HIGH_INF_POS': '', 'MOTIF_SCORE_CHANGE': '', 'IMPACT': 'MODERATE', 'PICK': '1', 'VARIANT_CLASS': 'SNV', 'TSL': '', 'HGVS_OFFSET': '', 'PHENO': '1,1', 'MINIMISED': '', 'ExAC_AF': '', 'ExAC_AF_AFR': '', 'ExAC_AF_AMR': '', 'ExAC_AF_EAS': '', 'ExAC_AF_FIN': '', 'ExAC_AF_NFE': '', 'ExAC_AF_OTH': '', 'ExAC_AF_SAS': '', 'GENE_PHENO': '', 'FILTER': '.', 'flanking_bps': 'CGA', 'vcf_id': '.', 'vcf_qual': '.', 'ExAC_AF_Adj': '', 'ExAC_AC_AN_Adj': '', 'ExAC_AC_AN': '', 'ExAC_AC_AN_AFR': '', 'ExAC_AC_AN_AMR': '', 'ExAC_AC_AN_EAS': '', 'ExAC_AC_AN_FIN': '', 'ExAC_AC_AN_NFE': '', 'ExAC_AC_AN_OTH': '', 'ExAC_AC_AN_SAS': '', 'ExAC_FILTER': '', 'gnomAD_AF': '', 'gnomAD_AFR_AF': '', 'gnomAD_AMR_AF': '', 'gnomAD_ASJ_AF': '', 'gnomAD_EAS_AF': '', 'gnomAD_FIN_AF': '', 'gnomAD_NFE_AF': '', 'gnomAD_OTH_AF': '', 'gnomAD_SAS_AF': '', 'vcf_pos': '151845367', 'AC': '1', 'AN': '2', 'SRC': 'Sample23,', 't_GT': './.', 'n_GT': '', 't_FL_AD': '4', 'n_FL_AD': '', 't_FL_ADN': '2', 'n_FL_ADN': '', 't_FL_ADP': '2', 'n_FL_ADP': '', 't_FL_DP': '72', 'n_FL_DP': '', 't_FL_DPN': '38', 'n_FL_DPN': '', 't_FL_DPP': '34', 'n_FL_DPP': '', 't_FL_RD': '68', 'n_FL_RD': '', 't_FL_RDN': '36', 'n_FL_RDN': '', 't_FL_RDP': '32', 'n_FL_RDP': '', 't_FL_VF': '0.0555556', 'n_FL_VF': '', 't_AD': '', 'n_AD': '', 't_DP': '.', 'n_DP': '', 't_depth_sample': '', 't_ref_count_sample': '', 't_alt_count_sample': '', 'is_fillout': 'True'},
+        {'Hugo_Symbol': 'KMT2C', 'Entrez_Gene_Id': '58508', 'Center': '.', 'NCBI_Build': 'GRCh37', 'Chromosome': '7', 'Start_Position': '151845367', 'End_Position': '151845367', 'Strand': '+', 'Variant_Classification': 'Missense_Mutation', 'Variant_Type': 'SNP', 'Reference_Allele': 'G', 'Tumor_Seq_Allele1': 'G', 'Tumor_Seq_Allele2': 'A', 'dbSNP_RS': '', 'dbSNP_Val_Status': '', 'Tumor_Sample_Barcode': 'Sample24', 'Matched_Norm_Sample_Barcode': 'Sample24-N', 'Match_Norm_Seq_Allele1': 'G', 'Match_Norm_Seq_Allele2': 'G', 'Tumor_Validation_Allele1': '', 'Tumor_Validation_Allele2': '', 'Match_Norm_Validation_Allele1': '', 'Match_Norm_Validation_Allele2': '', 'Verification_Status': '', 'Validation_Status': '', 'Mutation_Status': '', 'Sequencing_Phase': '', 'Sequence_Source': '', 'Validation_Method': '', 'Score': '', 'BAM_File': '', 'Sequencer': '', 'Tumor_Sample_UUID': '', 'Matched_Norm_Sample_UUID': '', 'HGVSc': 'c.13645C>T', 'HGVSp': 'p.Arg4549Cys', 'HGVSp_Short': 'p.R4549C', 'Transcript_ID': 'ENST00000262189', 'Exon_Number': '52/59', 't_depth': '72', 't_ref_count': '68', 't_alt_count': '4', 'n_depth': '', 'n_ref_count': '', 'n_alt_count': '', 'all_effects': 'KMT2C,missense_variant,p.Arg4606Cys,ENST00000355193,;KMT2C,missense_variant,p.Arg4549Cys,ENST00000262189,NM_170606.2;KMT2C,missense_variant,p.Arg2110Cys,ENST00000360104,;KMT2C,missense_variant,p.Arg1166Cys,ENST00000424877,;KMT2C,downstream_gene_variant,,ENST00000418061,;KMT2C,downstream_gene_variant,,ENST00000485241,;KMT2C,3_prime_UTR_variant,,ENST00000558084,;KMT2C,non_coding_transcript_exon_variant,,ENST00000473186,;', 'Allele': 'A', 'Gene': 'ENSG00000055609', 'Feature': 'ENST00000262189', 'Feature_type': 'Transcript', 'Consequence': 'missense_variant', 'cDNA_position': '13864/16862', 'CDS_position': '13645/14736', 'Protein_position': '4549/4911', 'Amino_acids': 'R/C', 'Codons': 'Cgc/Tgc', 'Existing_variation': 'COSM245709,COSM245710', 'ALLELE_NUM': '1', 'DISTANCE': '', 'STRAND_VEP': '-1', 'SYMBOL': 'KMT2C', 'SYMBOL_SOURCE': 'HGNC', 'HGNC_ID': '13726', 'BIOTYPE': 'protein_coding', 'CANONICAL': 'YES', 'CCDS': 'CCDS5931.1', 'ENSP': 'ENSP00000262189', 'SWISSPROT': 'Q8NEZ4', 'TREMBL': 'Q6N019,Q75MN6,H0YMU7', 'UNIPARC': 'UPI0000141B9F', 'RefSeq': 'NM_170606.2', 'SIFT': '', 'PolyPhen': 'probably_damaging(0.999)', 'EXON': '52/59', 'INTRON': '', 'DOMAINS': 'PROSITE_profiles:PS51542,hmmpanther:PTHR22884,hmmpanther:PTHR22884:SF305', 'AF': '', 'AFR_AF': '', 'AMR_AF': '', 'ASN_AF': '', 'EAS_AF': '', 'EUR_AF': '', 'SAS_AF': '', 'AA_AF': '', 'EA_AF': '', 'CLIN_SIG': '', 'SOMATIC': '1,1', 'PUBMED': '', 'MOTIF_NAME': '', 'MOTIF_POS': '', 'HIGH_INF_POS': '', 'MOTIF_SCORE_CHANGE': '', 'IMPACT': 'MODERATE', 'PICK': '1', 'VARIANT_CLASS': 'SNV', 'TSL': '', 'HGVS_OFFSET': '', 'PHENO': '1,1', 'MINIMISED': '', 'ExAC_AF': '', 'ExAC_AF_AFR': '', 'ExAC_AF_AMR': '', 'ExAC_AF_EAS': '', 'ExAC_AF_FIN': '', 'ExAC_AF_NFE': '', 'ExAC_AF_OTH': '', 'ExAC_AF_SAS': '', 'GENE_PHENO': '', 'FILTER': '.', 'flanking_bps': 'CGA', 'vcf_id': '.', 'vcf_qual': '.', 'ExAC_AF_Adj': '', 'ExAC_AC_AN_Adj': '', 'ExAC_AC_AN': '', 'ExAC_AC_AN_AFR': '', 'ExAC_AC_AN_AMR': '', 'ExAC_AC_AN_EAS': '', 'ExAC_AC_AN_FIN': '', 'ExAC_AC_AN_NFE': '', 'ExAC_AC_AN_OTH': '', 'ExAC_AC_AN_SAS': '', 'ExAC_FILTER': '', 'gnomAD_AF': '', 'gnomAD_AFR_AF': '', 'gnomAD_AMR_AF': '', 'gnomAD_ASJ_AF': '', 'gnomAD_EAS_AF': '', 'gnomAD_FIN_AF': '', 'gnomAD_NFE_AF': '', 'gnomAD_OTH_AF': '', 'gnomAD_SAS_AF': '', 'vcf_pos': '151845367', 'AC': '1', 'AN': '2', 'SRC': 'Sample23,', 't_GT': './.', 'n_GT': '', 't_FL_AD': '4', 'n_FL_AD': '', 't_FL_ADN': '2', 'n_FL_ADN': '', 't_FL_ADP': '2', 'n_FL_ADP': '', 't_FL_DP': '72', 'n_FL_DP': '', 't_FL_DPN': '38', 'n_FL_DPN': '', 't_FL_DPP': '34', 'n_FL_DPP': '', 't_FL_RD': '68', 'n_FL_RD': '', 't_FL_RDN': '36', 'n_FL_RDN': '', 't_FL_RDP': '32', 'n_FL_RDP': '', 't_FL_VF': '0.0555556', 'n_FL_VF': '', 't_AD': '', 'n_AD': '', 't_DP': '.', 'n_DP': '', 't_depth_sample': '', 't_ref_count_sample': '', 't_alt_count_sample': '', 'is_fillout': 'True'},
 
-        {'Hugo_Symbol': 'RTEL1', 'Entrez_Gene_Id': '51750', 'Center': '.', 'NCBI_Build': 'GRCh37', 'Chromosome': '20', 'Start_Position': '62321135', 'End_Position': '62321135', 'Strand': '+', 'Variant_Classification': 'Silent', 'Variant_Type': 'SNP', 'Reference_Allele': 'G', 'Tumor_Seq_Allele1': 'G', 'Tumor_Seq_Allele2': 'A', 'dbSNP_RS': 'rs746824222', 'dbSNP_Val_Status': '', 'Tumor_Sample_Barcode': 'Sample24', 'Matched_Norm_Sample_Barcode': 'NORMAL', 'Match_Norm_Seq_Allele1': 'G', 'Match_Norm_Seq_Allele2': 'G', 'Tumor_Validation_Allele1': '', 'Tumor_Validation_Allele2': '', 'Match_Norm_Validation_Allele1': '', 'Match_Norm_Validation_Allele2': '', 'Verification_Status': '', 'Validation_Status': '', 'Mutation_Status': '', 'Sequencing_Phase': '', 'Sequence_Source': '', 'Validation_Method': '', 'Score': '', 'BAM_File': '', 'Sequencer': '', 'Tumor_Sample_UUID': '', 'Matched_Norm_Sample_UUID': '', 'HGVSc': 'c.2130G>A', 'HGVSp': 'p.=', 'HGVSp_Short': 'p.Q710=', 'Transcript_ID': 'ENST00000508582', 'Exon_Number': '24/35', 't_depth': '653', 't_ref_count': '511', 't_alt_count': '142', 'n_depth': '', 'n_ref_count': '', 'n_alt_count': '', 'all_effects': 'RTEL1,synonymous_variant,p.=,ENST00000318100,;RTEL1,synonymous_variant,p.=,ENST00000370018,NM_032957.4,NM_016434.3;RTEL1,synonymous_variant,p.=,ENST00000360203,NM_001283009.1;RTEL1,synonymous_variant,p.=,ENST00000508582,;RTEL1,synonymous_variant,p.=,ENST00000425905,;RTEL1,upstream_gene_variant,,ENST00000370003,;RTEL1-TNFRSF6B,synonymous_variant,p.=,ENST00000482936,;RTEL1-TNFRSF6B,synonymous_variant,p.=,ENST00000492259,;RTEL1-TNFRSF6B,non_coding_transcript_exon_variant,,ENST00000480273,;RTEL1-TNFRSF6B,non_coding_transcript_exon_variant,,ENST00000496281,;RTEL1,upstream_gene_variant,,ENST00000496816,;', 'Allele': 'A', 'Gene': 'ENSG00000258366', 'Feature': 'ENST00000508582', 'Feature_type': 'Transcript', 'Consequence': 'synonymous_variant', 'cDNA_position': '2476/4273', 'CDS_position': '2130/3732', 'Protein_position': '710/1243', 'Amino_acids': 'Q', 'Codons': 'caG/caA', 'Existing_variation': 'rs746824222', 'ALLELE_NUM': '1', 'DISTANCE': '', 'STRAND_VEP': '1', 'SYMBOL': 'RTEL1', 'SYMBOL_SOURCE': 'HGNC', 'HGNC_ID': '15888', 'BIOTYPE': 'protein_coding', 'CANONICAL': 'YES', 'CCDS': 'CCDS13530.3', 'ENSP': 'ENSP00000424307', 'SWISSPROT': 'Q9NZ71', 'TREMBL': '', 'UNIPARC': 'UPI00019B2219', 'RefSeq': '', 'SIFT': '', 'PolyPhen': '', 'EXON': '24/35', 'INTRON': '', 'DOMAINS': 'Superfamily_domains:SSF52540,SMART_domains:SM00491,Pfam_domain:PF13307,TIGRFAM_domain:TIGR00604,hmmpanther:PTHR11472:SF4,hmmpanther:PTHR11472', 'AF': '', 'AFR_AF': '', 'AMR_AF': '', 'ASN_AF': '', 'EAS_AF': '', 'EUR_AF': '', 'SAS_AF': '', 'AA_AF': '', 'EA_AF': '', 'CLIN_SIG': '', 'SOMATIC': '', 'PUBMED': '', 'MOTIF_NAME': '', 'MOTIF_POS': '', 'HIGH_INF_POS': '', 'MOTIF_SCORE_CHANGE': '', 'IMPACT': 'LOW', 'PICK': '1', 'VARIANT_CLASS': 'SNV', 'TSL': '', 'HGVS_OFFSET': '', 'PHENO': '', 'MINIMISED': '', 'ExAC_AF': '', 'ExAC_AF_AFR': '', 'ExAC_AF_AMR': '', 'ExAC_AF_EAS': '', 'ExAC_AF_FIN': '', 'ExAC_AF_NFE': '', 'ExAC_AF_OTH': '', 'ExAC_AF_SAS': '', 'GENE_PHENO': '1', 'FILTER': '.', 'flanking_bps': 'AGG', 'vcf_id': '.', 'vcf_qual': '.', 'ExAC_AF_Adj': '', 'ExAC_AC_AN_Adj': '', 'ExAC_AC_AN': '', 'ExAC_AC_AN_AFR': '', 'ExAC_AC_AN_AMR': '', 'ExAC_AC_AN_EAS': '', 'ExAC_AC_AN_FIN': '', 'ExAC_AC_AN_NFE': '', 'ExAC_AC_AN_OTH': '', 'ExAC_AC_AN_SAS': '', 'ExAC_FILTER': '', 'gnomAD_AF': '', 'gnomAD_AFR_AF': '', 'gnomAD_AMR_AF': '', 'gnomAD_ASJ_AF': '', 'gnomAD_EAS_AF': '', 'gnomAD_FIN_AF': '', 'gnomAD_NFE_AF': '', 'gnomAD_OTH_AF': '', 'gnomAD_SAS_AF': '', 'vcf_pos': '62321135', 'AC': '1', 'AN': '2', 'SRC': 'Sample24,', 't_GT': '0/1', 'n_GT': '', 't_FL_AD': '0', 'n_FL_AD': '', 't_FL_ADN': '0', 'n_FL_ADN': '', 't_FL_ADP': '0', 'n_FL_ADP': '', 't_FL_DP': '129', 'n_FL_DP': '', 't_FL_DPN': '63', 'n_FL_DPN': '', 't_FL_DPP': '66', 'n_FL_DPP': '', 't_FL_RD': '129', 'n_FL_RD': '', 't_FL_RDN': '63', 'n_FL_RDN': '', 't_FL_RDP': '66', 'n_FL_RDP': '', 't_FL_VF': '0', 'n_FL_VF': '', 't_AD': '511,142', 'n_AD': '', 't_DP': '653', 'n_DP': '', 't_depth_sample': '653', 't_ref_count_sample': '511', 't_alt_count_sample': '142', 'is_fillout': 'False'},
+        {'Hugo_Symbol': 'RTEL1', 'Entrez_Gene_Id': '51750', 'Center': '.', 'NCBI_Build': 'GRCh37', 'Chromosome': '20', 'Start_Position': '62321135', 'End_Position': '62321135', 'Strand': '+', 'Variant_Classification': 'Silent', 'Variant_Type': 'SNP', 'Reference_Allele': 'G', 'Tumor_Seq_Allele1': 'G', 'Tumor_Seq_Allele2': 'A', 'dbSNP_RS': 'rs746824222', 'dbSNP_Val_Status': '', 'Tumor_Sample_Barcode': 'Sample24', 'Matched_Norm_Sample_Barcode': 'Sample24-N', 'Match_Norm_Seq_Allele1': 'G', 'Match_Norm_Seq_Allele2': 'G', 'Tumor_Validation_Allele1': '', 'Tumor_Validation_Allele2': '', 'Match_Norm_Validation_Allele1': '', 'Match_Norm_Validation_Allele2': '', 'Verification_Status': '', 'Validation_Status': '', 'Mutation_Status': '', 'Sequencing_Phase': '', 'Sequence_Source': '', 'Validation_Method': '', 'Score': '', 'BAM_File': '', 'Sequencer': '', 'Tumor_Sample_UUID': '', 'Matched_Norm_Sample_UUID': '', 'HGVSc': 'c.2130G>A', 'HGVSp': 'p.=', 'HGVSp_Short': 'p.Q710=', 'Transcript_ID': 'ENST00000508582', 'Exon_Number': '24/35', 't_depth': '653', 't_ref_count': '511', 't_alt_count': '142', 'n_depth': '', 'n_ref_count': '', 'n_alt_count': '', 'all_effects': 'RTEL1,synonymous_variant,p.=,ENST00000318100,;RTEL1,synonymous_variant,p.=,ENST00000370018,NM_032957.4,NM_016434.3;RTEL1,synonymous_variant,p.=,ENST00000360203,NM_001283009.1;RTEL1,synonymous_variant,p.=,ENST00000508582,;RTEL1,synonymous_variant,p.=,ENST00000425905,;RTEL1,upstream_gene_variant,,ENST00000370003,;RTEL1-TNFRSF6B,synonymous_variant,p.=,ENST00000482936,;RTEL1-TNFRSF6B,synonymous_variant,p.=,ENST00000492259,;RTEL1-TNFRSF6B,non_coding_transcript_exon_variant,,ENST00000480273,;RTEL1-TNFRSF6B,non_coding_transcript_exon_variant,,ENST00000496281,;RTEL1,upstream_gene_variant,,ENST00000496816,;', 'Allele': 'A', 'Gene': 'ENSG00000258366', 'Feature': 'ENST00000508582', 'Feature_type': 'Transcript', 'Consequence': 'synonymous_variant', 'cDNA_position': '2476/4273', 'CDS_position': '2130/3732', 'Protein_position': '710/1243', 'Amino_acids': 'Q', 'Codons': 'caG/caA', 'Existing_variation': 'rs746824222', 'ALLELE_NUM': '1', 'DISTANCE': '', 'STRAND_VEP': '1', 'SYMBOL': 'RTEL1', 'SYMBOL_SOURCE': 'HGNC', 'HGNC_ID': '15888', 'BIOTYPE': 'protein_coding', 'CANONICAL': 'YES', 'CCDS': 'CCDS13530.3', 'ENSP': 'ENSP00000424307', 'SWISSPROT': 'Q9NZ71', 'TREMBL': '', 'UNIPARC': 'UPI00019B2219', 'RefSeq': '', 'SIFT': '', 'PolyPhen': '', 'EXON': '24/35', 'INTRON': '', 'DOMAINS': 'Superfamily_domains:SSF52540,SMART_domains:SM00491,Pfam_domain:PF13307,TIGRFAM_domain:TIGR00604,hmmpanther:PTHR11472:SF4,hmmpanther:PTHR11472', 'AF': '', 'AFR_AF': '', 'AMR_AF': '', 'ASN_AF': '', 'EAS_AF': '', 'EUR_AF': '', 'SAS_AF': '', 'AA_AF': '', 'EA_AF': '', 'CLIN_SIG': '', 'SOMATIC': '', 'PUBMED': '', 'MOTIF_NAME': '', 'MOTIF_POS': '', 'HIGH_INF_POS': '', 'MOTIF_SCORE_CHANGE': '', 'IMPACT': 'LOW', 'PICK': '1', 'VARIANT_CLASS': 'SNV', 'TSL': '', 'HGVS_OFFSET': '', 'PHENO': '', 'MINIMISED': '', 'ExAC_AF': '', 'ExAC_AF_AFR': '', 'ExAC_AF_AMR': '', 'ExAC_AF_EAS': '', 'ExAC_AF_FIN': '', 'ExAC_AF_NFE': '', 'ExAC_AF_OTH': '', 'ExAC_AF_SAS': '', 'GENE_PHENO': '1', 'FILTER': '.', 'flanking_bps': 'AGG', 'vcf_id': '.', 'vcf_qual': '.', 'ExAC_AF_Adj': '', 'ExAC_AC_AN_Adj': '', 'ExAC_AC_AN': '', 'ExAC_AC_AN_AFR': '', 'ExAC_AC_AN_AMR': '', 'ExAC_AC_AN_EAS': '', 'ExAC_AC_AN_FIN': '', 'ExAC_AC_AN_NFE': '', 'ExAC_AC_AN_OTH': '', 'ExAC_AC_AN_SAS': '', 'ExAC_FILTER': '', 'gnomAD_AF': '', 'gnomAD_AFR_AF': '', 'gnomAD_AMR_AF': '', 'gnomAD_ASJ_AF': '', 'gnomAD_EAS_AF': '', 'gnomAD_FIN_AF': '', 'gnomAD_NFE_AF': '', 'gnomAD_OTH_AF': '', 'gnomAD_SAS_AF': '', 'vcf_pos': '62321135', 'AC': '1', 'AN': '2', 'SRC': 'Sample24,', 't_GT': '0/1', 'n_GT': '', 't_FL_AD': '0', 'n_FL_AD': '', 't_FL_ADN': '0', 'n_FL_ADN': '', 't_FL_ADP': '0', 'n_FL_ADP': '', 't_FL_DP': '129', 'n_FL_DP': '', 't_FL_DPN': '63', 'n_FL_DPN': '', 't_FL_DPP': '66', 'n_FL_DPP': '', 't_FL_RD': '129', 'n_FL_RD': '', 't_FL_RDN': '63', 'n_FL_RDN': '', 't_FL_RDP': '66', 'n_FL_RDP': '', 't_FL_VF': '0', 'n_FL_VF': '', 't_AD': '511,142', 'n_AD': '', 't_DP': '653', 'n_DP': '', 't_depth_sample': '653', 't_ref_count_sample': '511', 't_alt_count_sample': '142', 'is_fillout': 'False'},
 
-        {'Hugo_Symbol': 'KMT2C', 'Entrez_Gene_Id': '58508', 'Center': '.', 'NCBI_Build': 'GRCh37', 'Chromosome': '7', 'Start_Position': '151845367', 'End_Position': '151845367', 'Strand': '+', 'Variant_Classification': 'Missense_Mutation', 'Variant_Type': 'SNP', 'Reference_Allele': 'G', 'Tumor_Seq_Allele1': 'G', 'Tumor_Seq_Allele2': 'A', 'dbSNP_RS': '', 'dbSNP_Val_Status': '', 'Tumor_Sample_Barcode': 'Sample23', 'Matched_Norm_Sample_Barcode': 'NORMAL', 'Match_Norm_Seq_Allele1': 'G', 'Match_Norm_Seq_Allele2': 'G', 'Tumor_Validation_Allele1': '', 'Tumor_Validation_Allele2': '', 'Match_Norm_Validation_Allele1': '', 'Match_Norm_Validation_Allele2': '', 'Verification_Status': '', 'Validation_Status': '', 'Mutation_Status': '', 'Sequencing_Phase': '', 'Sequence_Source': '', 'Validation_Method': '', 'Score': '', 'BAM_File': '', 'Sequencer': '', 'Tumor_Sample_UUID': '', 'Matched_Norm_Sample_UUID': '', 'HGVSc': 'c.13645C>T', 'HGVSp': 'p.Arg4549Cys', 'HGVSp_Short': 'p.R4549C', 'Transcript_ID': 'ENST00000262189', 'Exon_Number': '52/59', 't_depth': '653', 't_ref_count': '511', 't_alt_count': '142', 'n_depth': '', 'n_ref_count': '', 'n_alt_count': '', 'all_effects': 'KMT2C,missense_variant,p.Arg4606Cys,ENST00000355193,;KMT2C,missense_variant,p.Arg4549Cys,ENST00000262189,NM_170606.2;KMT2C,missense_variant,p.Arg2110Cys,ENST00000360104,;KMT2C,missense_variant,p.Arg1166Cys,ENST00000424877,;KMT2C,downstream_gene_variant,,ENST00000418061,;KMT2C,downstream_gene_variant,,ENST00000485241,;KMT2C,3_prime_UTR_variant,,ENST00000558084,;KMT2C,non_coding_transcript_exon_variant,,ENST00000473186,;', 'Allele': 'A', 'Gene': 'ENSG00000055609', 'Feature': 'ENST00000262189', 'Feature_type': 'Transcript', 'Consequence': 'missense_variant', 'cDNA_position': '13864/16862', 'CDS_position': '13645/14736', 'Protein_position': '4549/4911', 'Amino_acids': 'R/C', 'Codons': 'Cgc/Tgc', 'Existing_variation': 'COSM245709,COSM245710', 'ALLELE_NUM': '1', 'DISTANCE': '', 'STRAND_VEP': '-1', 'SYMBOL': 'KMT2C', 'SYMBOL_SOURCE': 'HGNC', 'HGNC_ID': '13726', 'BIOTYPE': 'protein_coding', 'CANONICAL': 'YES', 'CCDS': 'CCDS5931.1', 'ENSP': 'ENSP00000262189', 'SWISSPROT': 'Q8NEZ4', 'TREMBL': 'Q6N019,Q75MN6,H0YMU7', 'UNIPARC': 'UPI0000141B9F', 'RefSeq': 'NM_170606.2', 'SIFT': '', 'PolyPhen': 'probably_damaging(0.999)', 'EXON': '52/59', 'INTRON': '', 'DOMAINS': 'PROSITE_profiles:PS51542,hmmpanther:PTHR22884,hmmpanther:PTHR22884:SF305', 'AF': '', 'AFR_AF': '', 'AMR_AF': '', 'ASN_AF': '', 'EAS_AF': '', 'EUR_AF': '', 'SAS_AF': '', 'AA_AF': '', 'EA_AF': '', 'CLIN_SIG': '', 'SOMATIC': '1,1', 'PUBMED': '', 'MOTIF_NAME': '', 'MOTIF_POS': '', 'HIGH_INF_POS': '', 'MOTIF_SCORE_CHANGE': '', 'IMPACT': 'MODERATE', 'PICK': '1', 'VARIANT_CLASS': 'SNV', 'TSL': '', 'HGVS_OFFSET': '', 'PHENO': '1,1', 'MINIMISED': '', 'ExAC_AF': '', 'ExAC_AF_AFR': '', 'ExAC_AF_AMR': '', 'ExAC_AF_EAS': '', 'ExAC_AF_FIN': '', 'ExAC_AF_NFE': '', 'ExAC_AF_OTH': '', 'ExAC_AF_SAS': '', 'GENE_PHENO': '', 'FILTER': '.', 'flanking_bps': 'CGA', 'vcf_id': '.', 'vcf_qual': '.', 'ExAC_AF_Adj': '', 'ExAC_AC_AN_Adj': '', 'ExAC_AC_AN': '', 'ExAC_AC_AN_AFR': '', 'ExAC_AC_AN_AMR': '', 'ExAC_AC_AN_EAS': '', 'ExAC_AC_AN_FIN': '', 'ExAC_AC_AN_NFE': '', 'ExAC_AC_AN_OTH': '', 'ExAC_AC_AN_SAS': '', 'ExAC_FILTER': '', 'gnomAD_AF': '', 'gnomAD_AFR_AF': '', 'gnomAD_AMR_AF': '', 'gnomAD_ASJ_AF': '', 'gnomAD_EAS_AF': '', 'gnomAD_FIN_AF': '', 'gnomAD_NFE_AF': '', 'gnomAD_OTH_AF': '', 'gnomAD_SAS_AF': '', 'vcf_pos': '151845367', 'AC': '1', 'AN': '2', 'SRC': 'Sample23,', 't_GT': '0/1', 'n_GT': '', 't_FL_AD': '0', 'n_FL_AD': '', 't_FL_ADN': '0', 'n_FL_ADN': '', 't_FL_ADP': '0', 'n_FL_ADP': '', 't_FL_DP': '91', 'n_FL_DP': '', 't_FL_DPN': '48', 'n_FL_DPN': '', 't_FL_DPP': '43', 'n_FL_DPP': '', 't_FL_RD': '91', 'n_FL_RD': '', 't_FL_RDN': '48', 'n_FL_RDN': '', 't_FL_RDP': '43', 'n_FL_RDP': '', 't_FL_VF': '0', 'n_FL_VF': '', 't_AD': '511,142', 'n_AD': '', 't_DP': '653', 'n_DP': '', 't_depth_sample': '653', 't_ref_count_sample': '511', 't_alt_count_sample': '142', 'is_fillout': 'False'},
+        {'Hugo_Symbol': 'KMT2C', 'Entrez_Gene_Id': '58508', 'Center': '.', 'NCBI_Build': 'GRCh37', 'Chromosome': '7', 'Start_Position': '151845367', 'End_Position': '151845367', 'Strand': '+', 'Variant_Classification': 'Missense_Mutation', 'Variant_Type': 'SNP', 'Reference_Allele': 'G', 'Tumor_Seq_Allele1': 'G', 'Tumor_Seq_Allele2': 'A', 'dbSNP_RS': '', 'dbSNP_Val_Status': '', 'Tumor_Sample_Barcode': 'Sample23', 'Matched_Norm_Sample_Barcode': 'Sample23-N', 'Match_Norm_Seq_Allele1': 'G', 'Match_Norm_Seq_Allele2': 'G', 'Tumor_Validation_Allele1': '', 'Tumor_Validation_Allele2': '', 'Match_Norm_Validation_Allele1': '', 'Match_Norm_Validation_Allele2': '', 'Verification_Status': '', 'Validation_Status': '', 'Mutation_Status': '', 'Sequencing_Phase': '', 'Sequence_Source': '', 'Validation_Method': '', 'Score': '', 'BAM_File': '', 'Sequencer': '', 'Tumor_Sample_UUID': '', 'Matched_Norm_Sample_UUID': '', 'HGVSc': 'c.13645C>T', 'HGVSp': 'p.Arg4549Cys', 'HGVSp_Short': 'p.R4549C', 'Transcript_ID': 'ENST00000262189', 'Exon_Number': '52/59', 't_depth': '653', 't_ref_count': '511', 't_alt_count': '142', 'n_depth': '', 'n_ref_count': '', 'n_alt_count': '', 'all_effects': 'KMT2C,missense_variant,p.Arg4606Cys,ENST00000355193,;KMT2C,missense_variant,p.Arg4549Cys,ENST00000262189,NM_170606.2;KMT2C,missense_variant,p.Arg2110Cys,ENST00000360104,;KMT2C,missense_variant,p.Arg1166Cys,ENST00000424877,;KMT2C,downstream_gene_variant,,ENST00000418061,;KMT2C,downstream_gene_variant,,ENST00000485241,;KMT2C,3_prime_UTR_variant,,ENST00000558084,;KMT2C,non_coding_transcript_exon_variant,,ENST00000473186,;', 'Allele': 'A', 'Gene': 'ENSG00000055609', 'Feature': 'ENST00000262189', 'Feature_type': 'Transcript', 'Consequence': 'missense_variant', 'cDNA_position': '13864/16862', 'CDS_position': '13645/14736', 'Protein_position': '4549/4911', 'Amino_acids': 'R/C', 'Codons': 'Cgc/Tgc', 'Existing_variation': 'COSM245709,COSM245710', 'ALLELE_NUM': '1', 'DISTANCE': '', 'STRAND_VEP': '-1', 'SYMBOL': 'KMT2C', 'SYMBOL_SOURCE': 'HGNC', 'HGNC_ID': '13726', 'BIOTYPE': 'protein_coding', 'CANONICAL': 'YES', 'CCDS': 'CCDS5931.1', 'ENSP': 'ENSP00000262189', 'SWISSPROT': 'Q8NEZ4', 'TREMBL': 'Q6N019,Q75MN6,H0YMU7', 'UNIPARC': 'UPI0000141B9F', 'RefSeq': 'NM_170606.2', 'SIFT': '', 'PolyPhen': 'probably_damaging(0.999)', 'EXON': '52/59', 'INTRON': '', 'DOMAINS': 'PROSITE_profiles:PS51542,hmmpanther:PTHR22884,hmmpanther:PTHR22884:SF305', 'AF': '', 'AFR_AF': '', 'AMR_AF': '', 'ASN_AF': '', 'EAS_AF': '', 'EUR_AF': '', 'SAS_AF': '', 'AA_AF': '', 'EA_AF': '', 'CLIN_SIG': '', 'SOMATIC': '1,1', 'PUBMED': '', 'MOTIF_NAME': '', 'MOTIF_POS': '', 'HIGH_INF_POS': '', 'MOTIF_SCORE_CHANGE': '', 'IMPACT': 'MODERATE', 'PICK': '1', 'VARIANT_CLASS': 'SNV', 'TSL': '', 'HGVS_OFFSET': '', 'PHENO': '1,1', 'MINIMISED': '', 'ExAC_AF': '', 'ExAC_AF_AFR': '', 'ExAC_AF_AMR': '', 'ExAC_AF_EAS': '', 'ExAC_AF_FIN': '', 'ExAC_AF_NFE': '', 'ExAC_AF_OTH': '', 'ExAC_AF_SAS': '', 'GENE_PHENO': '', 'FILTER': '.', 'flanking_bps': 'CGA', 'vcf_id': '.', 'vcf_qual': '.', 'ExAC_AF_Adj': '', 'ExAC_AC_AN_Adj': '', 'ExAC_AC_AN': '', 'ExAC_AC_AN_AFR': '', 'ExAC_AC_AN_AMR': '', 'ExAC_AC_AN_EAS': '', 'ExAC_AC_AN_FIN': '', 'ExAC_AC_AN_NFE': '', 'ExAC_AC_AN_OTH': '', 'ExAC_AC_AN_SAS': '', 'ExAC_FILTER': '', 'gnomAD_AF': '', 'gnomAD_AFR_AF': '', 'gnomAD_AMR_AF': '', 'gnomAD_ASJ_AF': '', 'gnomAD_EAS_AF': '', 'gnomAD_FIN_AF': '', 'gnomAD_NFE_AF': '', 'gnomAD_OTH_AF': '', 'gnomAD_SAS_AF': '', 'vcf_pos': '151845367', 'AC': '1', 'AN': '2', 'SRC': 'Sample23,', 't_GT': '0/1', 'n_GT': '', 't_FL_AD': '0', 'n_FL_AD': '', 't_FL_ADN': '0', 'n_FL_ADN': '', 't_FL_ADP': '0', 'n_FL_ADP': '', 't_FL_DP': '91', 'n_FL_DP': '', 't_FL_DPN': '48', 'n_FL_DPN': '', 't_FL_DPP': '43', 'n_FL_DPP': '', 't_FL_RD': '91', 'n_FL_RD': '', 't_FL_RDN': '48', 'n_FL_RDN': '', 't_FL_RDP': '43', 'n_FL_RDP': '', 't_FL_VF': '0', 'n_FL_VF': '', 't_AD': '511,142', 'n_AD': '', 't_DP': '653', 'n_DP': '', 't_depth_sample': '653', 't_ref_count_sample': '511', 't_alt_count_sample': '142', 'is_fillout': 'False'},
 
-        {'Hugo_Symbol': 'RTEL1', 'Entrez_Gene_Id': '51750', 'Center': '.', 'NCBI_Build': 'GRCh37', 'Chromosome': '20', 'Start_Position': '62321135', 'End_Position': '62321135', 'Strand': '+', 'Variant_Classification': 'Silent', 'Variant_Type': 'SNP', 'Reference_Allele': 'G', 'Tumor_Seq_Allele1': 'G', 'Tumor_Seq_Allele2': 'A', 'dbSNP_RS': 'rs746824222', 'dbSNP_Val_Status': '', 'Tumor_Sample_Barcode': 'Sample23', 'Matched_Norm_Sample_Barcode': 'NORMAL', 'Match_Norm_Seq_Allele1': 'G', 'Match_Norm_Seq_Allele2': 'G', 'Tumor_Validation_Allele1': '', 'Tumor_Validation_Allele2': '', 'Match_Norm_Validation_Allele1': '', 'Match_Norm_Validation_Allele2': '', 'Verification_Status': '', 'Validation_Status': '', 'Mutation_Status': '', 'Sequencing_Phase': '', 'Sequence_Source': '', 'Validation_Method': '', 'Score': '', 'BAM_File': '', 'Sequencer': '', 'Tumor_Sample_UUID': '', 'Matched_Norm_Sample_UUID': '', 'HGVSc': 'c.2130G>A', 'HGVSp': 'p.=', 'HGVSp_Short': 'p.Q710=', 'Transcript_ID': 'ENST00000508582', 'Exon_Number': '24/35', 't_depth': '184', 't_ref_count': '184', 't_alt_count': '0', 'n_depth': '', 'n_ref_count': '', 'n_alt_count': '', 'all_effects': 'RTEL1,synonymous_variant,p.=,ENST00000318100,;RTEL1,synonymous_variant,p.=,ENST00000370018,NM_032957.4,NM_016434.3;RTEL1,synonymous_variant,p.=,ENST00000360203,NM_001283009.1;RTEL1,synonymous_variant,p.=,ENST00000508582,;RTEL1,synonymous_variant,p.=,ENST00000425905,;RTEL1,upstream_gene_variant,,ENST00000370003,;RTEL1-TNFRSF6B,synonymous_variant,p.=,ENST00000482936,;RTEL1-TNFRSF6B,synonymous_variant,p.=,ENST00000492259,;RTEL1-TNFRSF6B,non_coding_transcript_exon_variant,,ENST00000480273,;RTEL1-TNFRSF6B,non_coding_transcript_exon_variant,,ENST00000496281,;RTEL1,upstream_gene_variant,,ENST00000496816,;', 'Allele': 'A', 'Gene': 'ENSG00000258366', 'Feature': 'ENST00000508582', 'Feature_type': 'Transcript', 'Consequence': 'synonymous_variant', 'cDNA_position': '2476/4273', 'CDS_position': '2130/3732', 'Protein_position': '710/1243', 'Amino_acids': 'Q', 'Codons': 'caG/caA', 'Existing_variation': 'rs746824222', 'ALLELE_NUM': '1', 'DISTANCE': '', 'STRAND_VEP': '1', 'SYMBOL': 'RTEL1', 'SYMBOL_SOURCE': 'HGNC', 'HGNC_ID': '15888', 'BIOTYPE': 'protein_coding', 'CANONICAL': 'YES', 'CCDS': 'CCDS13530.3', 'ENSP': 'ENSP00000424307', 'SWISSPROT': 'Q9NZ71', 'TREMBL': '', 'UNIPARC': 'UPI00019B2219', 'RefSeq': '', 'SIFT': '', 'PolyPhen': '', 'EXON': '24/35', 'INTRON': '', 'DOMAINS': 'Superfamily_domains:SSF52540,SMART_domains:SM00491,Pfam_domain:PF13307,TIGRFAM_domain:TIGR00604,hmmpanther:PTHR11472:SF4,hmmpanther:PTHR11472', 'AF': '', 'AFR_AF': '', 'AMR_AF': '', 'ASN_AF': '', 'EAS_AF': '', 'EUR_AF': '', 'SAS_AF': '', 'AA_AF': '', 'EA_AF': '', 'CLIN_SIG': '', 'SOMATIC': '', 'PUBMED': '', 'MOTIF_NAME': '', 'MOTIF_POS': '', 'HIGH_INF_POS': '', 'MOTIF_SCORE_CHANGE': '', 'IMPACT': 'LOW', 'PICK': '1', 'VARIANT_CLASS': 'SNV', 'TSL': '', 'HGVS_OFFSET': '', 'PHENO': '', 'MINIMISED': '', 'ExAC_AF': '', 'ExAC_AF_AFR': '', 'ExAC_AF_AMR': '', 'ExAC_AF_EAS': '', 'ExAC_AF_FIN': '', 'ExAC_AF_NFE': '', 'ExAC_AF_OTH': '', 'ExAC_AF_SAS': '', 'GENE_PHENO': '1', 'FILTER': '.', 'flanking_bps': 'AGG', 'vcf_id': '.', 'vcf_qual': '.', 'ExAC_AF_Adj': '', 'ExAC_AC_AN_Adj': '', 'ExAC_AC_AN': '', 'ExAC_AC_AN_AFR': '', 'ExAC_AC_AN_AMR': '', 'ExAC_AC_AN_EAS': '', 'ExAC_AC_AN_FIN': '', 'ExAC_AC_AN_NFE': '', 'ExAC_AC_AN_OTH': '', 'ExAC_AC_AN_SAS': '', 'ExAC_FILTER': '', 'gnomAD_AF': '', 'gnomAD_AFR_AF': '', 'gnomAD_AMR_AF': '', 'gnomAD_ASJ_AF': '', 'gnomAD_EAS_AF': '', 'gnomAD_FIN_AF': '', 'gnomAD_NFE_AF': '', 'gnomAD_OTH_AF': '', 'gnomAD_SAS_AF': '', 'vcf_pos': '62321135', 'AC': '1', 'AN': '2', 'SRC': 'Sample24,', 't_GT': './.', 'n_GT': '', 't_FL_AD': '0', 'n_FL_AD': '', 't_FL_ADN': '0', 'n_FL_ADN': '', 't_FL_ADP': '0', 'n_FL_ADP': '', 't_FL_DP': '184', 'n_FL_DP': '', 't_FL_DPN': '95', 'n_FL_DPN': '', 't_FL_DPP': '89', 'n_FL_DPP': '', 't_FL_RD': '184', 'n_FL_RD': '', 't_FL_RDN': '95', 'n_FL_RDN': '', 't_FL_RDP': '89', 'n_FL_RDP': '', 't_FL_VF': '0', 'n_FL_VF': '', 't_AD': '', 'n_AD': '', 't_DP': '.', 'n_DP': '', 't_depth_sample': '', 't_ref_count_sample': '', 't_alt_count_sample': '', 'is_fillout': 'True'}
+        {'Hugo_Symbol': 'RTEL1', 'Entrez_Gene_Id': '51750', 'Center': '.', 'NCBI_Build': 'GRCh37', 'Chromosome': '20', 'Start_Position': '62321135', 'End_Position': '62321135', 'Strand': '+', 'Variant_Classification': 'Silent', 'Variant_Type': 'SNP', 'Reference_Allele': 'G', 'Tumor_Seq_Allele1': 'G', 'Tumor_Seq_Allele2': 'A', 'dbSNP_RS': 'rs746824222', 'dbSNP_Val_Status': '', 'Tumor_Sample_Barcode': 'Sample23', 'Matched_Norm_Sample_Barcode': 'Sample23-N', 'Match_Norm_Seq_Allele1': 'G', 'Match_Norm_Seq_Allele2': 'G', 'Tumor_Validation_Allele1': '', 'Tumor_Validation_Allele2': '', 'Match_Norm_Validation_Allele1': '', 'Match_Norm_Validation_Allele2': '', 'Verification_Status': '', 'Validation_Status': '', 'Mutation_Status': '', 'Sequencing_Phase': '', 'Sequence_Source': '', 'Validation_Method': '', 'Score': '', 'BAM_File': '', 'Sequencer': '', 'Tumor_Sample_UUID': '', 'Matched_Norm_Sample_UUID': '', 'HGVSc': 'c.2130G>A', 'HGVSp': 'p.=', 'HGVSp_Short': 'p.Q710=', 'Transcript_ID': 'ENST00000508582', 'Exon_Number': '24/35', 't_depth': '184', 't_ref_count': '184', 't_alt_count': '0', 'n_depth': '', 'n_ref_count': '', 'n_alt_count': '', 'all_effects': 'RTEL1,synonymous_variant,p.=,ENST00000318100,;RTEL1,synonymous_variant,p.=,ENST00000370018,NM_032957.4,NM_016434.3;RTEL1,synonymous_variant,p.=,ENST00000360203,NM_001283009.1;RTEL1,synonymous_variant,p.=,ENST00000508582,;RTEL1,synonymous_variant,p.=,ENST00000425905,;RTEL1,upstream_gene_variant,,ENST00000370003,;RTEL1-TNFRSF6B,synonymous_variant,p.=,ENST00000482936,;RTEL1-TNFRSF6B,synonymous_variant,p.=,ENST00000492259,;RTEL1-TNFRSF6B,non_coding_transcript_exon_variant,,ENST00000480273,;RTEL1-TNFRSF6B,non_coding_transcript_exon_variant,,ENST00000496281,;RTEL1,upstream_gene_variant,,ENST00000496816,;', 'Allele': 'A', 'Gene': 'ENSG00000258366', 'Feature': 'ENST00000508582', 'Feature_type': 'Transcript', 'Consequence': 'synonymous_variant', 'cDNA_position': '2476/4273', 'CDS_position': '2130/3732', 'Protein_position': '710/1243', 'Amino_acids': 'Q', 'Codons': 'caG/caA', 'Existing_variation': 'rs746824222', 'ALLELE_NUM': '1', 'DISTANCE': '', 'STRAND_VEP': '1', 'SYMBOL': 'RTEL1', 'SYMBOL_SOURCE': 'HGNC', 'HGNC_ID': '15888', 'BIOTYPE': 'protein_coding', 'CANONICAL': 'YES', 'CCDS': 'CCDS13530.3', 'ENSP': 'ENSP00000424307', 'SWISSPROT': 'Q9NZ71', 'TREMBL': '', 'UNIPARC': 'UPI00019B2219', 'RefSeq': '', 'SIFT': '', 'PolyPhen': '', 'EXON': '24/35', 'INTRON': '', 'DOMAINS': 'Superfamily_domains:SSF52540,SMART_domains:SM00491,Pfam_domain:PF13307,TIGRFAM_domain:TIGR00604,hmmpanther:PTHR11472:SF4,hmmpanther:PTHR11472', 'AF': '', 'AFR_AF': '', 'AMR_AF': '', 'ASN_AF': '', 'EAS_AF': '', 'EUR_AF': '', 'SAS_AF': '', 'AA_AF': '', 'EA_AF': '', 'CLIN_SIG': '', 'SOMATIC': '', 'PUBMED': '', 'MOTIF_NAME': '', 'MOTIF_POS': '', 'HIGH_INF_POS': '', 'MOTIF_SCORE_CHANGE': '', 'IMPACT': 'LOW', 'PICK': '1', 'VARIANT_CLASS': 'SNV', 'TSL': '', 'HGVS_OFFSET': '', 'PHENO': '', 'MINIMISED': '', 'ExAC_AF': '', 'ExAC_AF_AFR': '', 'ExAC_AF_AMR': '', 'ExAC_AF_EAS': '', 'ExAC_AF_FIN': '', 'ExAC_AF_NFE': '', 'ExAC_AF_OTH': '', 'ExAC_AF_SAS': '', 'GENE_PHENO': '1', 'FILTER': '.', 'flanking_bps': 'AGG', 'vcf_id': '.', 'vcf_qual': '.', 'ExAC_AF_Adj': '', 'ExAC_AC_AN_Adj': '', 'ExAC_AC_AN': '', 'ExAC_AC_AN_AFR': '', 'ExAC_AC_AN_AMR': '', 'ExAC_AC_AN_EAS': '', 'ExAC_AC_AN_FIN': '', 'ExAC_AC_AN_NFE': '', 'ExAC_AC_AN_OTH': '', 'ExAC_AC_AN_SAS': '', 'ExAC_FILTER': '', 'gnomAD_AF': '', 'gnomAD_AFR_AF': '', 'gnomAD_AMR_AF': '', 'gnomAD_ASJ_AF': '', 'gnomAD_EAS_AF': '', 'gnomAD_FIN_AF': '', 'gnomAD_NFE_AF': '', 'gnomAD_OTH_AF': '', 'gnomAD_SAS_AF': '', 'vcf_pos': '62321135', 'AC': '1', 'AN': '2', 'SRC': 'Sample24,', 't_GT': './.', 'n_GT': '', 't_FL_AD': '0', 'n_FL_AD': '', 't_FL_ADN': '0', 'n_FL_ADN': '', 't_FL_ADP': '0', 'n_FL_ADP': '', 't_FL_DP': '184', 'n_FL_DP': '', 't_FL_DPN': '95', 'n_FL_DPN': '', 't_FL_DPP': '89', 'n_FL_DPP': '', 't_FL_RD': '184', 'n_FL_RD': '', 't_FL_RDN': '95', 'n_FL_RDN': '', 't_FL_RDP': '89', 'n_FL_RDP': '', 't_FL_VF': '0', 'n_FL_VF': '', 't_AD': '', 'n_AD': '', 't_DP': '.', 'n_DP': '', 't_depth_sample': '', 't_ref_count_sample': '', 't_alt_count_sample': '', 'is_fillout': 'True'}
         ]
 
         self.assertEqual(records, expected_records)

From 11466570dbdd5409c5f39faa428c0b64260d8830 Mon Sep 17 00:00:00 2001
From: Stephen Kelly <stevekm@users.noreply.github.com>
Date: Tue, 25 Jan 2022 18:40:27 -0500
Subject: [PATCH 2/4] update submodule

---
 pluto | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pluto b/pluto
index 7372d40..f2e6c56 160000
--- a/pluto
+++ b/pluto
@@ -1 +1 @@
-Subproject commit 7372d40fc2393dfad168faea149ef23326b3de1a
+Subproject commit f2e6c5603db5ae4d7c1b13ea09ac7bd220f0c778

From 4b296760dcf26b1ce2e196d22329c5cdafd7f708 Mon Sep 17 00:00:00 2001
From: Stephen Kelly <stevekm@users.noreply.github.com>
Date: Tue, 25 Jan 2022 20:19:29 -0500
Subject: [PATCH 3/4] update test cases for samples fillout workflow

---
 tests/test_samples_fillout_workflow_cwl.py | 80 ++++++++++++++--------
 1 file changed, 50 insertions(+), 30 deletions(-)

diff --git a/tests/test_samples_fillout_workflow_cwl.py b/tests/test_samples_fillout_workflow_cwl.py
index 8475f0a..3edbd1d 100644
--- a/tests/test_samples_fillout_workflow_cwl.py
+++ b/tests/test_samples_fillout_workflow_cwl.py
@@ -143,21 +143,21 @@ def test_run_fillout_workflow(self):
         }
 
         output_json, output_dir = self.run_cwl()
+        output_path = os.path.join(output_dir,'output.maf')
 
         expected_output = {
             'output_file': {
-                'location': 'file://' + os.path.join(output_dir,'output.maf'),
+                'location': 'file://' + output_path,
                 'basename': 'output.maf',
                 'class': 'File',
                 'checksum': 'sha1$7932ae9938a5686f6328f143a6c82308877cb822',
                 'size': 8008,
-                'path':  os.path.join(output_dir,'output.maf')
+                'path':  output_path
                 }
             }
-        self.assertEqual(output_json, expected_output)
+        self.assertCWLDictEqual(output_json, expected_output)
 
-        output_file = output_json['output_file']['path']
-        reader = TableReader(output_file)
+        reader = TableReader(output_path)
         comments = reader.comment_lines
         fieldnames = reader.get_fieldnames()
         records = [ rec for rec in reader.read() ]
@@ -188,40 +188,47 @@ def test_run_fillout_workflow2(self):
         This test uses full samples
         """
         self.maxDiff = None
-
+        maf1 = os.path.join(self.DATA_SETS['Proj_1']['MAF_DIR'], "Sample1.Sample2.muts.maf")
+        maf24 = os.path.join(self.DATA_SETS['Proj_1']['MAF_DIR'], "Sample24.Sample23.muts.maf")
         self.input = {
+            "samples": [
+                {
+                    "sample_id": "Sample1",
+                    "normal_id": "Sample1-N",
+                    "maf_file": { "class": "File", "path": maf1 }
+                },
+                {
+                    "sample_id": "Sample24",
+                    "normal_id": "Sample24-N",
+                    "maf_file": { "class": "File", "path": maf24 }
+                },
+            ],
             "ref_fasta": {"class": "File", "path": self.DATA_SETS['Proj_1']['REF_FASTA']},
-            "sample_ids": ["Sample1", "Sample24"],
             "bam_files": [
                 { "class": "File", "path": os.path.join(self.DATA_SETS['Proj_1']['BAM_DIR'], "Sample1.bam") },
                 { "class": "File", "path": os.path.join(self.DATA_SETS['Proj_1']['BAM_DIR'], "Sample24.bam") }
-            ],
-            "maf_files": [
-                { "class": "File", "path": os.path.join(self.DATA_SETS['Proj_1']['MAF_DIR'], "Sample1.Sample2.muts.maf") },
-                { "class": "File", "path": os.path.join(self.DATA_SETS['Proj_1']['MAF_DIR'], "Sample24.Sample23.muts.maf") }
             ]
         }
 
         output_json, output_dir = self.run_cwl()
-
+        output_path = os.path.join(output_dir,'output.maf')
         expected_output = {
             'output_file': {
-                'location': 'file://' + os.path.join(output_dir,'output.maf'),
+                'location': 'file://' + output_path,
                 'basename': 'output.maf',
                 'class': 'File',
                 # 'checksum': 'sha1$be8534bcaf326de029790a832ab5b44a17a03d22',
                 # 'size': 40194610,
-                'path':  os.path.join(output_dir,'output.maf')
+                'path':  output_path
                 }
             }
         # NOTE: for some reason, this file keeps coming out with different annotations for 'splice_acceptor_variant' or `splice_donor_variant`
         # this keeps changing the byte size and checksum so need to remove those here for now
         output_json['output_file'].pop('checksum')
         output_json['output_file'].pop('size')
-        self.assertEqual(output_json, expected_output)
+        self.assertCWLDictEqual(output_json, expected_output)
 
-        output_file = output_json['output_file']['path']
-        comments, mutations = self.load_mutations(output_file)
+        comments, mutations = self.load_mutations(output_path)
 
         self.assertEqual(len(mutations), 38920)
 
@@ -232,7 +239,7 @@ def test_run_fillout_workflow2(self):
             mut.pop('Variant_Classification')
 
         hash = md5_obj(mutations)
-        expected_hash = '4b25d900ab90e0ed0b3702666ff01e94'
+        expected_hash = 'bc5f54f1057a7ba29f55d9d4aac92a01'
         self.assertEqual(hash, expected_hash)
 
 
@@ -245,38 +252,51 @@ def test_run_fillout_workflow3(self):
         This test uses full samples
         """
         self.maxDiff = None
-
+        maf1 = os.path.join(self.DATA_SETS['Proj_1']['MAF_DIR'], "Sample1.Sample2.muts.maf")
+        maf4 = os.path.join(self.DATA_SETS['Proj_1']['MAF_DIR'], "Sample4.Sample3.muts.maf")
         self.input = {
+            "samples": [
+                {
+                    "sample_id": "Sample1",
+                    "normal_id": "Sample1-N",
+                    "maf_file": { "class": "File", "path": maf1 }
+                },
+                {
+                    "sample_id": "Sample4",
+                    "normal_id": "Sample4-N",
+                    "maf_file": { "class": "File", "path": maf4 }
+                },
+            ],
             "ref_fasta": {"class": "File", "path": self.DATA_SETS['Proj_1']['REF_FASTA']},
-            "sample_ids": ["Sample1", "Sample4"],
+            # "sample_ids": ["Sample1", "Sample4"],
             "bam_files": [
                 { "class": "File", "path": os.path.join(self.DATA_SETS['Proj_1']['BAM_DIR'], "Sample1.bam") },
                 { "class": "File", "path": os.path.join(self.DATA_SETS['Proj_1']['BAM_DIR'], "Sample4.bam") }
-            ],
-            "maf_files": [
-                { "class": "File", "path": os.path.join(self.DATA_SETS['Proj_1']['MAF_DIR'], "Sample1.Sample2.muts.maf") },
-                { "class": "File", "path": os.path.join(self.DATA_SETS['Proj_1']['MAF_DIR'], "Sample4.Sample3.muts.maf") }
             ]
+            # "maf_files": [
+            #     { "class": "File", "path": os.path.join(self.DATA_SETS['Proj_1']['MAF_DIR'], "Sample1.Sample2.muts.maf") },
+            #     { "class": "File", "path": os.path.join(self.DATA_SETS['Proj_1']['MAF_DIR'], "Sample4.Sample3.muts.maf") }
+            # ]
         }
 
         output_json, output_dir = self.run_cwl()
+        output_path = os.path.join(output_dir,'output.maf')
 
         expected_output = {
             'output_file': {
-                'location': 'file://' + os.path.join(output_dir,'output.maf'),
+                'location': 'file://' + output_path,
                 'basename': 'output.maf',
                 'class': 'File',
                 # 'checksum': 'sha1$2f60f58389ec65af87612c7532ad28b882fb84ba',
                 # 'size': 26238820,
-                'path':  os.path.join(output_dir,'output.maf')
+                'path':  output_path
                 }
             }
         output_json['output_file'].pop('checksum')
         output_json['output_file'].pop('size')
-        self.assertEqual(output_json, expected_output)
+        self.assertCWLDictEqual(output_json, expected_output)
 
-        output_file = output_json['output_file']['path']
-        comments, mutations = self.load_mutations(output_file)
+        comments, mutations = self.load_mutations(output_path)
 
         hash = md5_obj(mutations)
 
@@ -289,7 +309,7 @@ def test_run_fillout_workflow3(self):
             mut.pop('Variant_Classification')
 
         hash = md5_obj(mutations)
-        expected_hash = '77fb1f3aa26ddf06029232ee720a709c'
+        expected_hash = '4a03d128d76b72328b62a87814d89993'
         self.assertEqual(hash, expected_hash)
 
 

From 342e6f1f4f7a3839e579fbe96ccc8d6f7a61ac77 Mon Sep 17 00:00:00 2001
From: Stephen Kelly <stevekm@users.noreply.github.com>
Date: Wed, 26 Jan 2022 14:38:00 -0500
Subject: [PATCH 4/4] add samples input schema to
 samples_fillout_index_workflow

---
 cwl/samples_fillout_index_workflow.cwl        | 114 +++++++++++++-----
 cwl/samples_fillout_workflow.cwl              |   2 +
 ...test_samples_fillout_index_workflow_cwl.py |  45 ++++---
 tests/test_samples_fillout_workflow_cwl.py    |   5 -
 4 files changed, 114 insertions(+), 52 deletions(-)

diff --git a/cwl/samples_fillout_index_workflow.cwl b/cwl/samples_fillout_index_workflow.cwl
index 6de0f4f..c6d6102 100644
--- a/cwl/samples_fillout_index_workflow.cwl
+++ b/cwl/samples_fillout_index_workflow.cwl
@@ -18,11 +18,15 @@ requirements:
   SubworkflowFeatureRequirement: {}
 
 inputs:
-  sample_ids:
+  samples:
     type:
-        type: array
-        items: string
-
+      type: array
+      items:
+        type: record
+        fields:
+          maf_file: File
+          sample_id: string # must match sample ID used inside maf file
+          normal_id: string
   bam_files:
     type:
         type: array
@@ -30,26 +34,20 @@ inputs:
     secondaryFiles:
         - ^.bai
 
-  maf_files:
+  unindexed_samples:
     type:
       type: array
-      items: File
+      items:
+        type: record
+        fields:
+          maf_file: File
+          sample_id: string # must match sample ID used inside maf file
+          normal_id: string
 
   unindexed_bam_files:
     type:
       type: array
       items: File
-
-  unindexed_sample_ids:
-    type:
-      type: array
-      items: string
-
-  unindexed_maf_files:
-    type:
-      type: array
-      items: File
-
   ref_fasta:
     type: File
     secondaryFiles:
@@ -60,8 +58,8 @@ inputs:
       - .sa
       - .fai
       - ^.dict
-
   exac_filter: # need this to resolve error in subworkflow: Anonymous file object must have 'contents' and 'basename' fields.
+  # TODO: this needs the .tbi/.csi index file added!!
     type: File
     default:
       class: File
@@ -85,41 +83,95 @@ steps:
     run: index_bam.cwl
     in:
       bam: unindexed_bam_files
-    scatter: [ bam ]
-    scatterMethod: dotproduct
+    scatter: bam
     out: [ bam_indexed ]
 
   # run filter script to apply cBioPortal filters on variants for fillout
   run_maf_filter:
     run: maf_filter.cwl
     in:
-      maf_file: maf_files
+      sample: samples
+      maf_file:
+        valueFrom: ${ return inputs.sample['maf_file']; }
       is_impact: is_impact
       argos_version_string: argos_version_string
-    scatter: [ maf_file ]
-    scatterMethod: dotproduct
+    scatter: sample
     out: [ cbio_mutation_data_file ]
 
-  # run the actual fillout workflow
+  run_maf_filter_unindexed:
+    run: maf_filter.cwl
+    in:
+      sample: unindexed_samples
+      maf_file:
+        valueFrom: ${ return inputs.sample['maf_file']; }
+      is_impact: is_impact
+      argos_version_string: argos_version_string
+    scatter: sample
+    out: [ cbio_mutation_data_file ]
+
+  # update the samples to use the new filtered maf files and output a single list of samples
+  merge_samples_replace_mafs:
+    in:
+      samples:
+        source: [ samples, unindexed_samples ]
+        linkMerge: merge_flattened
+      maf_files:
+        source: [ run_maf_filter/cbio_mutation_data_file, run_maf_filter_unindexed/cbio_mutation_data_file ]
+        linkMerge: merge_flattened
+    out: [ samples ]
+    run:
+      class: ExpressionTool
+      inputs:
+        samples:
+          type:
+            type: array
+            items:
+              type: record
+              fields:
+                maf_file: File
+                sample_id: string
+                normal_id: string
+        maf_files: File[]
+      outputs:
+        samples:
+          type:
+            type: array
+            items:
+              type: record
+              fields:
+                maf_file: File
+                sample_id: string
+                normal_id: string
+      # NOTE: in the line below `var i in inputs.samples`, `i` is an int representing the index position in the array `inputs.samples`
+      # in Python it would look like ` x = ['a', 'b']; for i in range(len(x)): print(i, x[i]) `
+      expression: "${
+        var new_samples = [];
+
+        for ( var i in inputs.samples ){
+            new_samples.push({
+              'sample_id': inputs.samples[i]['sample_id'],
+              'normal_id': inputs.samples[i]['normal_id'],
+              'maf_file': inputs.maf_files[i]
+            });
+          };
+
+        return {'samples': new_samples};
+        }"
+
+  # run the fillout workflow
   run_samples_fillout:
     run: samples_fillout_workflow.cwl
     in:
       output_fname: fillout_output_fname
       exac_filter: exac_filter
-      sample_ids:
-        source: [ sample_ids, unindexed_sample_ids ]
-        linkMerge: merge_flattened
+      samples: merge_samples_replace_mafs/samples
       bam_files:
         source: [ bam_files, run_indexer/bam_indexed ]
         linkMerge: merge_flattened
-      maf_files:
-        source: [ run_maf_filter/cbio_mutation_data_file, unindexed_maf_files ]
-        linkMerge: merge_flattened
       ref_fasta: ref_fasta
     out: [ output_file ]
 
 outputs:
-
   output_file:
     type: File
     outputSource: run_samples_fillout/output_file
diff --git a/cwl/samples_fillout_workflow.cwl b/cwl/samples_fillout_workflow.cwl
index 7793d8b..69c6520 100644
--- a/cwl/samples_fillout_workflow.cwl
+++ b/cwl/samples_fillout_workflow.cwl
@@ -71,6 +71,8 @@ steps:
                 sample_id: string
       outputs:
         sample_ids: string[]
+      # NOTE: in the line below `var i in inputs.samples`, `i` is an int representing the index position in the array `inputs.samples`
+      # in Python it would look like ` x = ['a', 'b']; for i in range(len(x)): print(i, x[i]) `
       expression: "${
         var sample_ids = [];
         for ( var i in inputs.samples ){
diff --git a/tests/test_samples_fillout_index_workflow_cwl.py b/tests/test_samples_fillout_index_workflow_cwl.py
index 26761f9..437e00c 100644
--- a/tests/test_samples_fillout_index_workflow_cwl.py
+++ b/tests/test_samples_fillout_index_workflow_cwl.py
@@ -32,40 +32,54 @@ def test_run_fillout_workflow(self):
         """
         Test case for running the fillout workflow on a number of samples, each with a bam and maf
         """
+        # self.preserve = True
         self.maxDiff = None
         self.runner_args['use_cache'] = False # do not use cache for samples fillout workflow it breaks on split_vcf_to_mafs
+        # self.runner_args['debug'] = True
+        # self.runner_args['js_console'] = True
 
         self.input = {
-            "ref_fasta": {"class": "File", "path": self.DATA_SETS['Proj_08390_G']['REF_FASTA']},
-            "sample_ids": ["Sample1"],
+            "samples": [
+                {
+                    "sample_id": "Sample1",
+                    "normal_id": "Sample1-N",
+                    "maf_file": { "class": "File", "path": self.sample1_maf }
+                }
+            ],
             "bam_files": [
                 { "class": "File", "path": self.sample1_bam }
             ],
-            "maf_files": [
-                { "class": "File", "path": self.sample1_maf }
+            "unindexed_samples": [
+                {
+                    "sample_id": "Sample4",
+                    "normal_id": "DMP-MATCHED-NORMAL",
+                    "maf_file": { "class": "File", "path": self.sample4_maf }
+                },
+                {
+                    "sample_id": "Sample24",
+                    "normal_id": "DMP-UNMATCHED-NORMAL",
+                    "maf_file": { "class": "File", "path": self.sample24_maf }
+                },
             ],
-            "unindexed_sample_ids": ["Sample4", "Sample24"],
             "unindexed_bam_files": [
                 { "class": "File", "path": self.sample4_bam },
                 { "class": "File", "path": self.sample24_bam }
             ],
-            "unindexed_maf_files": [
-                { "class": "File", "path": self.sample4_maf },
-                { "class": "File", "path": self.sample24_maf }
-            ],
-            "fillout_output_fname": 'output.maf'
+            "fillout_output_fname": 'output.maf',
+            "ref_fasta": {"class": "File", "path": self.DATA_SETS['Proj_08390_G']['REF_FASTA']},
         }
 
         output_json, output_dir = self.run_cwl()
+        output_file = os.path.join(output_dir,'output.maf')
 
         expected_output = {
             'output_file': {
-                'location': 'file://' + os.path.join(output_dir,'output.maf'),
+                'location': 'file://' + output_file,
                 'basename': 'output.maf',
                 'class': 'File',
                 # 'checksum': 'sha1$d8d63a0aca2da20d2d15e26fcf64fd6295eda05e',
                 # 'size': 25838928,
-                'path':  os.path.join(output_dir,'output.maf')
+                'path':  output_file
                 }
             }
         # NOTE: for some reason, this file keeps coming out with different annotations for 'splice_acceptor_variant' or `splice_donor_variant`
@@ -74,10 +88,9 @@ def test_run_fillout_workflow(self):
         output_json['output_file'].pop('size')
         self.assertCWLDictEqual(output_json, expected_output)
 
-        output_file = output_json['output_file']['path']
+        # instead of checksum and size, count the number of mutations and take a checksum on the mutation contents
         comments, mutations = self.load_mutations(output_file)
-
-        self.assertEqual(len(mutations), 23742)
+        self.assertEqual(len(mutations), 117)
 
         # Need to remove these fields because they are inconsistent on the output maf file;
         for mut in mutations:
@@ -86,7 +99,7 @@ def test_run_fillout_workflow(self):
             mut.pop('Variant_Classification')
 
         hash = md5_obj(mutations)
-        expected_hash = 'f153c68bc79a6f28e261ec04f51b2111'
+        expected_hash = '01af6b281f70e6821addce80a2ec5cf8'
         self.assertEqual(hash, expected_hash)
 
 
diff --git a/tests/test_samples_fillout_workflow_cwl.py b/tests/test_samples_fillout_workflow_cwl.py
index 3edbd1d..5a56c61 100644
--- a/tests/test_samples_fillout_workflow_cwl.py
+++ b/tests/test_samples_fillout_workflow_cwl.py
@@ -268,15 +268,10 @@ def test_run_fillout_workflow3(self):
                 },
             ],
             "ref_fasta": {"class": "File", "path": self.DATA_SETS['Proj_1']['REF_FASTA']},
-            # "sample_ids": ["Sample1", "Sample4"],
             "bam_files": [
                 { "class": "File", "path": os.path.join(self.DATA_SETS['Proj_1']['BAM_DIR'], "Sample1.bam") },
                 { "class": "File", "path": os.path.join(self.DATA_SETS['Proj_1']['BAM_DIR'], "Sample4.bam") }
             ]
-            # "maf_files": [
-            #     { "class": "File", "path": os.path.join(self.DATA_SETS['Proj_1']['MAF_DIR'], "Sample1.Sample2.muts.maf") },
-            #     { "class": "File", "path": os.path.join(self.DATA_SETS['Proj_1']['MAF_DIR'], "Sample4.Sample3.muts.maf") }
-            # ]
         }
 
         output_json, output_dir = self.run_cwl()