mskcc · stevekm · Jul 15, 2021
diff --git a/cwl/igv-report_maf_workflow.cwl b/cwl/igv-report_maf_workflow.cwl
@@ -1,10 +1,9 @@
 #!/usr/bin/env cwl-runner
 
-# NOTE: Important! Need  cwlVersion: v1.1 for the array record fields secondaryFiles to work here
-cwlVersion: v1.1
+cwlVersion: v1.0
 class: Workflow
 doc: "
-Workflow to run GetBaseCountsMultiSample fillout on a number of samples, each with their own bam and maf files
+Workflow to
 "
 requirements:
   MultipleInputFeatureRequirement: {}
@@ -14,6 +13,9 @@ requirements:
   SubworkflowFeatureRequirement: {}
 
 inputs:
+  report_filename:
+    type: string # name of the output report file
+    default: "igv.html"
   sites_maf: File # single input maf file that will be used as the regions to use in IGV
   maf_files: File[] # list of all maf files to list as tracks in IGV
   bam_files:
@@ -34,6 +36,7 @@ inputs:
 
 steps:
   # need to convert the sites maf to vcf format since that is how we have the igv-report.cwl configured; this could be .bed in the future
+  # TODO: need to update our igv-reports version to the new one that supports .maf input files!
   convert_sites_to_vcf:
     run: maf2vcf_gz_workflow.cwl
     in:
@@ -60,6 +63,7 @@ steps:
       vcf_gz_files: convert_mafs_to_vcf/output_file
       bam_files: bam_files
       ref_fasta: ref_fasta
+      output_filename: report_filename
     out:
       [ output_file ]
 

diff --git a/tests/test_igv_report_maf_workflow.py b/tests/test_igv_report_maf_workflow.py
@@ -2,6 +2,9 @@
 # -*- coding: utf-8 -*-
 """
 Test case for the igv-report_maf_workflow cwl
+
+NOTE: for some reason, some of the variants used in the setUp method here are getting skipped along the workflow when getting converted to vcf
+TODO: find a better way to convert maf to vcf, or just use maf with newer version of igv-reports
 """
 import os
 import sys
@@ -42,7 +45,7 @@ def setUp(self):
         ('n_ref_count', '212'),
         ('Tumor_Sample_Barcode', '.')
         ])
-        self.maf_row2 = OrderedDict([
+        self.maf_row2 = OrderedDict([ # NOTE: This one is getting skipped in output
         ('Hugo_Symbol', 'FAM46C'),
         ('Entrez_Gene_Id', '54855'),
         ('Center', 'mskcc.org'),
@@ -61,7 +64,7 @@ def setUp(self):
         ('n_ref_count', '212'),
         ('Tumor_Sample_Barcode', '.')
         ])
-        self.maf_row3 = OrderedDict([
+        self.maf_row3 = OrderedDict([ # NOTE: This one is getting skipped in output
         ('Hugo_Symbol', 'IL7R'),
         ('Entrez_Gene_Id', '3575'),
         ('Center', 'mskcc.org'),
@@ -112,6 +115,46 @@ def setUp(self):
         lines3 = self.dicts2lines(rows3, comment_list = self.comments)
         self.maf3 = self.write_table(tmpdir = self.tmpdir, filename = "3.maf", lines = lines3)
 
+    def test_igv_report_demo1(self):
+        """
+        Test case with small bam files to simulate how files would be passed in from the main workflow
+        NOTE: the maf file used should contain variants for which the bam has reads otherwise no track may appear
+        """
+        maf_file = os.path.join(self.DATA_SETS['demo']['MAF_DIR'], "Sample1.Sample2.muts.maf")
+        normal_bam = os.path.join(self.DATA_SETS['demo']['BAM_DIR'], "Sample2.bam")
+        tumor_bam = os.path.join(self.DATA_SETS['demo']['BAM_DIR'], "Sample1.bam")
+        report_filename = "Sample1.Sample2.igv.html"
+
+        self.input = {
+        # use the same single maf file for sites and for tracks to simulate how a tumor/normal pair would be handled
+            "sites_maf": {"class": "File", "path": maf_file},
+            "maf_files": [
+                {"class": "File", "path": maf_file}
+            ],
+            "bam_files":[
+                { "class": "File", "path": tumor_bam },
+                { "class": "File", "path": normal_bam }
+            ],
+            "ref_fasta": {"class": "File", "path": self.DATA_SETS['Proj_08390_G']['REF_FASTA']},
+            "report_filename": report_filename
+        }
+
+        output_json, output_dir = self.run_cwl()
+
+        # # do not include size and checksum since they are not consistent with .gz
+        output_json['output_file'].pop('checksum')
+        output_json['output_file'].pop('size')
+        expected_output = {
+            'output_file': {
+                'location': 'file://' + os.path.join(output_dir, report_filename),
+                'basename': report_filename,
+                'class': 'File',
+                # 'checksum': 'sha1$bd56f50dc16e873707dc37b2a0ae96d36258ec1f',
+                # 'size': 2596168,
+                'path': os.path.join(output_dir, report_filename) }}
+
+        self.assertEqual(output_json, expected_output)
+
     def test_igv_report(self):
         """
         Make an IGV report from a maf, a list of mafs, and bams