Skip to content

Commit

Permalink
refactor test cases for tests/test_samples_fillout_index_batch_workfl…
Browse files Browse the repository at this point in the history
…ow_cwl.py to deprecate some old tests and make a new test that covers all the required edge cases
  • Loading branch information
stevekm committed Jan 20, 2023
1 parent 5ad33cc commit 991da3f
Showing 1 changed file with 222 additions and 36 deletions.
258 changes: 222 additions & 36 deletions tests/test_samples_fillout_index_batch_workflow_cwl.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,15 @@
sample5_bam = os.path.join(DATA_SETS['Fillout01']['BAM_DIR'], 'Sample5.UnitTest01.bam')


class TestSamplesFilloutIndexBatch1Group(PlutoPreRunTestCase):
# # # # # # # # # # #
# # # # # # # # # # #
# Test setup


class TestSamplesFilloutIndexBatch(PlutoPreRunTestCase):
"""
Three sample groups,
One group has a singleton,
Some samples are clinical,
Some samples lack maf
"""

cwl_file = CWLFile('samples_fillout_index_batch_workflow.cwl')

Expand All @@ -54,9 +59,8 @@ def setUp(self):
self.runner_args['use_cache'] = False # do not use cache for samples fillout workflow it breaks on split_vcf_to_mafs

def setUpRun(self):
"""
Run the workflow and return the results; output accessible under self.res.output in downstream 'test_' methods
"""

# research + clinical sample group
sample_group1 = [
{
"sample_id": "Sample1",
Expand All @@ -69,36 +73,55 @@ def setUpRun(self):
{
"sample_id": "Sample2",
"normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2",
"sample_type": "research",
"sample_type": "clinical",
"prefilter": True,
"maf_file": { "class": "File", "path": sample2_maf },
"bam_file": { "class": "File", "path": sample2_bam }
},
]

# research + clinical sample group; no maf
sample_group2 = [
{
"sample_id": "Sample3",
"normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2",
"sample_type": "clinical",
"sample_type": "research",
"prefilter": False,
"maf_file": { "class": "File", "path": sample3_maf },
"bam_file": { "class": "File", "path": sample3_bam }
},
{
"sample_id": "Sample4",
"normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2",
"sample_type": "clinical",
"prefilter": False,
# "maf_file": { "class": "File", "path": sample4_maf },
"bam_file": { "class": "File", "path": sample4_bam }
}
]

# singleton sample group
sample_group3 = [
{
"sample_id": "Sample5",
"normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2",
"sample_type": "research",
"prefilter": True,
"maf_file": { "class": "File", "path": sample5_maf },
"bam_file": { "class": "File", "path": sample5_bam }
},
]

self.input = {
"sample_groups": [sample_group1],
"sample_groups": [sample_group1, sample_group2, sample_group3],
"fillout_output_fname": 'output.maf',
"ref_fasta": {"class": "File", "path": DATA_SETS['Proj_08390_G']['REF_FASTA']},
}

output_json, output_dir = self.run_cwl()

return(output_json, output_dir)

def getExpected(self, output_dir):
"""
Return the expected CWL workflow output with the tmpdir output dir path included
Accessible in downstream 'test_' methods under self.res.expected
"""
return({
'output_file': OFile(name = 'output.maf', dir = output_dir),
'filtered_file': OFile(name = 'output.filtered.maf', dir = output_dir),
Expand Down Expand Up @@ -126,29 +149,28 @@ def test_CWLDictEqual(self):
related_keys = strip_related_keys)

def test_output_file_num_muts(self):
self.assertNumMutations(OFile.init_dict(self.res.output['output_file']).path, 147)
self.assertNumMutations(OFile.init_dict(self.res.output['output_file']).path, 118)

def test_output_file_muts_hash(self):
self.assertMutationsHash(OFile.init_dict(self.res.output['output_file']).path, "4732e626d2859e4c2e8a7d4eeca0e0f4")

self.assertMutationsHash(OFile.init_dict(self.res.output['output_file']).path, "828804208213b258565ca5612a4bc5e0")

def test_filtered_file_num_muts(self):
self.assertNumMutations(OFile.init_dict(self.res.output['filtered_file']).path, 96)
self.assertNumMutations(OFile.init_dict(self.res.output['filtered_file']).path, 118)

def test_filtered_file_muts_hash(self):
self.assertMutationsHash(OFile.init_dict(self.res.output['filtered_file']).path, "f934e6bd6f1767372b9737d3865e9f0b")
self.assertMutationsHash(OFile.init_dict(self.res.output['filtered_file']).path, "53de1a8800f5e86979512bbc8baf88b0")

def test_portal_file_num_muts(self):
self.assertNumMutations(OFile.init_dict(self.res.output['portal_file']).path, 70)
self.assertNumMutations(OFile.init_dict(self.res.output['portal_file']).path, 105)

def test_portal_file_muts_hash(self):
self.assertMutationsHash(OFile.init_dict(self.res.output['portal_file']).path, "10f4469d0128b6e0bf9e1ef315feb08c")
self.assertMutationsHash(OFile.init_dict(self.res.output['portal_file']).path, "95fb5bc50730548f8005b1db71a22b65")

def test_uncalled_file_num_muts(self):
self.assertNumMutations(OFile.init_dict(self.res.output['uncalled_file']).path, 26)
self.assertNumMutations(OFile.init_dict(self.res.output['uncalled_file']).path, 13)

def test_uncalled_file_muts_hash(self):
self.assertMutationsHash(OFile.init_dict(self.res.output['uncalled_file']).path, "f996e92adc6d1fecb946533a9f23ae99")
self.assertMutationsHash(OFile.init_dict(self.res.output['uncalled_file']).path, "559aedb3b03d0eb5a637cb789e80f635")

def test_portal_output_path_num_muts(self):
self.assertEqualNumMutations([
Expand All @@ -160,7 +182,7 @@ def test_portal_output_path_num_muts(self):
def test_output_file_fields(self):
self.assertMutFieldContains(
OFile.init_dict(self.res.output['output_file']).path,
"Tumor_Sample_Barcode", ["Sample1", "Sample2", "Sample3"], containsAll = True)
"Tumor_Sample_Barcode", ["Sample1", "Sample2", "Sample3", "Sample4", "Sample5"], containsAll = True)

def test_portal_output_path_fields(self):
self.assertMutFieldDoesntContain(
Expand All @@ -173,7 +195,14 @@ def test_uncalled_output_path_fields(self):
"Amino_Acid_Change", [""])




class TestSamplesFilloutIndexBatch2Group0(PlutoPreRunTestCase):
"""
Test case for two sample groups,
one sample missing a maf file,
one sample group has only one singleton
"""

cwl_file = CWLFile('samples_fillout_index_batch_workflow.cwl')

Expand Down Expand Up @@ -295,7 +324,161 @@ def test_uncalled_output_path_fields(self):
"Amino_Acid_Change", [""])


class TestSamplesFilloutIndexBatch2Group2(PlutoPreRunTestCase):











# These are old test cases that we dont need to run with the test suite but we should hold on to them for a bit
class DontRun____TestSamplesFilloutIndexBatch1Group(PlutoPreRunTestCase):
"""
One sample group,
Dont need to run this test case right now but keep the code here for reference
"""
# dont run this test
# https://docs.pytest.org/en/7.1.x/example/pythoncollection.html#customizing-test-collection
__test__ = False

# # # # # # # # # # #
# # # # # # # # # # #
# Test setup

cwl_file = CWLFile('samples_fillout_index_batch_workflow.cwl')

def setUp(self):
super().setUp()
self.runner_args['use_cache'] = False # do not use cache for samples fillout workflow it breaks on split_vcf_to_mafs

def setUpRun(self):
"""
Run the workflow and return the results; output accessible under self.res.output in downstream 'test_' methods
"""
sample_group1 = [
{
"sample_id": "Sample1",
"normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2",
"sample_type": "research",
"prefilter": True,
"maf_file": { "class": "File", "path": sample1_maf },
"bam_file": { "class": "File", "path": sample1_bam }
},
{
"sample_id": "Sample2",
"normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2",
"sample_type": "research",
"prefilter": True,
"maf_file": { "class": "File", "path": sample2_maf },
"bam_file": { "class": "File", "path": sample2_bam }
},
{
"sample_id": "Sample3",
"normal_id": "FROZENPOOLEDNORMAL_IMPACT505_V2",
"sample_type": "clinical",
"prefilter": False,
"maf_file": { "class": "File", "path": sample3_maf },
"bam_file": { "class": "File", "path": sample3_bam }
},
]

self.input = {
"sample_groups": [sample_group1],
"fillout_output_fname": 'output.maf',
"ref_fasta": {"class": "File", "path": DATA_SETS['Proj_08390_G']['REF_FASTA']},
}

output_json, output_dir = self.run_cwl()

return(output_json, output_dir)

def getExpected(self, output_dir):
"""
Return the expected CWL workflow output with the tmpdir output dir path included
Accessible in downstream 'test_' methods under self.res.expected
"""
return({
'output_file': OFile(name = 'output.maf', dir = output_dir),
'filtered_file': OFile(name = 'output.filtered.maf', dir = output_dir),
'portal_file': OFile(name = 'data_mutations_extended.txt', dir = output_dir),
'uncalled_file': OFile(name = 'data_mutations_uncalled.txt', dir = output_dir),
})

# # # # # # # # # # #
# # # # # # # # # # #

def test_CWLDictEqual(self):
"""
Test case for running the fillout workflow on a number of samples, each with a bam and maf
"""
# file contents are inconsistent so strip some keys from the output dict
strip_related_keys = [
('basename', 'output.maf', ['size', 'checksum']),
('basename', 'output.filtered.maf', ['size', 'checksum']),
('basename', 'data_mutations_extended.txt', ['size', 'checksum']),
('basename', 'data_mutations_uncalled.txt', ['size', 'checksum'])
]
self.assertCWLDictEqual(
self.res.output,
self.res.expected,
related_keys = strip_related_keys)

def test_output_file_num_muts(self):
self.assertNumMutations(OFile.init_dict(self.res.output['output_file']).path, 147)

def test_output_file_muts_hash(self):
self.assertMutationsHash(OFile.init_dict(self.res.output['output_file']).path, "4732e626d2859e4c2e8a7d4eeca0e0f4")


def test_filtered_file_num_muts(self):
self.assertNumMutations(OFile.init_dict(self.res.output['filtered_file']).path, 96)

def test_filtered_file_muts_hash(self):
self.assertMutationsHash(OFile.init_dict(self.res.output['filtered_file']).path, "f934e6bd6f1767372b9737d3865e9f0b")

def test_portal_file_num_muts(self):
self.assertNumMutations(OFile.init_dict(self.res.output['portal_file']).path, 70)

def test_portal_file_muts_hash(self):
self.assertMutationsHash(OFile.init_dict(self.res.output['portal_file']).path, "10f4469d0128b6e0bf9e1ef315feb08c")

def test_uncalled_file_num_muts(self):
self.assertNumMutations(OFile.init_dict(self.res.output['uncalled_file']).path, 26)

def test_uncalled_file_muts_hash(self):
self.assertMutationsHash(OFile.init_dict(self.res.output['uncalled_file']).path, "f996e92adc6d1fecb946533a9f23ae99")

def test_portal_output_path_num_muts(self):
self.assertEqualNumMutations([
OFile.init_dict(self.res.output['portal_file']).path,
OFile.init_dict(self.res.output['uncalled_file']).path,
],
OFile.init_dict(self.res.output['filtered_file']).path)

def test_output_file_fields(self):
self.assertMutFieldContains(
OFile.init_dict(self.res.output['output_file']).path,
"Tumor_Sample_Barcode", ["Sample1", "Sample2", "Sample3"], containsAll = True)

def test_portal_output_path_fields(self):
self.assertMutFieldDoesntContain(
OFile.init_dict(self.res.output['portal_file']).path,
"Amino_Acid_Change", [""])

def test_uncalled_output_path_fields(self):
self.assertMutFieldDoesntContain(
OFile.init_dict(self.res.output['uncalled_file']).path,
"Amino_Acid_Change", [""])
class DontRun____TestSamplesFilloutIndexBatch2Group2(PlutoPreRunTestCase):
"""
Two sample groups
Skip running this test since its covered by the other tests but leave the code here for now
"""
__test__ = False

cwl_file = CWLFile('samples_fillout_index_batch_workflow.cwl')

Expand Down Expand Up @@ -432,9 +615,12 @@ def test_uncalled_output_path_fields(self):
self.assertMutFieldDoesntContain(
OFile.init_dict(self.res.output['uncalled_file']).path,
"Amino_Acid_Change", [""])


class TestSamplesFilloutIndexBatch3Group(PlutoPreRunTestCase):
class DontRun____TestSamplesFilloutIndexBatch3Group(PlutoPreRunTestCase):
"""
Three sample groups,
one group contains a singleton
"""
__test__ = False

cwl_file = CWLFile('samples_fillout_index_batch_workflow.cwl')

Expand Down Expand Up @@ -575,9 +761,13 @@ def test_uncalled_output_path_fields(self):
self.assertMutFieldDoesntContain(
OFile.init_dict(self.res.output['uncalled_file']).path,
"Amino_Acid_Change", [""])


class TestSamplesFilloutIndexBatch4Group(PlutoPreRunTestCase):
class DontRun____TestSamplesFilloutIndexBatch4Group(PlutoPreRunTestCase):
"""
Four sample groups,
Two groups have singletons,
One singleton is clinical sample
"""
__test__ = False

cwl_file = CWLFile('samples_fillout_index_batch_workflow.cwl')

Expand Down Expand Up @@ -720,7 +910,3 @@ def test_uncalled_output_path_fields(self):
OFile.init_dict(self.res.output['uncalled_file']).path,
"Amino_Acid_Change", [""])





0 comments on commit 991da3f

Please sign in to comment.