diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000..71ff580 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,2 @@ +[pytest] +pythonpath =tests pluto diff --git a/tests/datasets.py b/tests/datasets.py new file mode 100644 index 0000000..da54346 --- /dev/null +++ b/tests/datasets.py @@ -0,0 +1,93 @@ +#!/usr/bin/env python +""" +""" +import os + +THIS_DIR = os.path.dirname(os.path.abspath(__file__)) +REF_DIR = os.path.join(os.path.dirname(THIS_DIR), "ref") # ../ref + +FIXTURES_DIR = os.environ.get('FIXTURES_DIR', '/juno/work/ci/helix_filters_01/fixtures') +FACETS_SNPS_VCF = os.environ.get('FACETS_SNPS_FILE', '/juno/work/ci/resources/genomes/GRCh37/facets_snps/dbsnp_137.b37__RmDupsClean__plusPseudo50__DROP_SORT.vcf') +KNOWN_FUSIONS_FILE = os.path.join(REF_DIR, "known_fusions_at_mskcc.txt") +IMPACT_FILE=os.environ.get('IMPACT_file', '/work/ci/helix_filters_01/reference_data/gene_lists/all_IMPACT_genes.tsv') +CONPAIR_MARKERS_BED = os.environ.get("CONPAIR_MARKERS_BED", "/juno/work/ci/helix_filters_01/reference_data/concordance/markers/IMPACT468/FP_tiling_genotypes_for_Conpair.bed") +CONPAIR_MARKERS_TXT = os.environ.get("CONPAIR_MARKERS_TXT", "/juno/work/ci/helix_filters_01/reference_data/concordance/markers/IMPACT468/FP_tiling_genotypes_for_Conpair.txt") + +ARGOS_VERSION_STRING = os.environ.get('ARGOS_VERSION_STRING', '2.x') # TODO: deprecate this +IS_IMPACT = os.environ.get('IS_IMPACT', "True") # TODO: deprecate this +PORTAL_FILE = os.environ.get('PORTAL_FILE', 'data_mutations_extended.txt') # TODO: deprecate this +PORTAL_CNA_FILE = os.environ.get('PORTAL_CNA_FILE', 'data_CNA.txt') # TODO: deprecate this + +REF_FASTA = os.environ.get('REF_FASTA', '/juno/work/ci/resources/genomes/GRCh37/fasta/b37.fasta') +MICROSATELLITES_LIST = os.environ.get("MICROSATELLITES_LIST", "/work/ci/resources/request_files/msisensor/microsatellites.list") +# $ md5sum /work/ci/resources/request_files/msisensor/microsatellites.list +# dc982a3bfe1e33b201b99a8ebf3acd61 /work/ci/resources/request_files/msisensor/microsatellites.list +# $ wc -l /work/ci/resources/request_files/msisensor/microsatellites.list +# 33422661 /work/ci/resources/request_files/msisensor/microsatellites.list + + + +DATA_SETS = { + "Proj_08390_G": { # full sample Argos output + "DIR": os.path.join(FIXTURES_DIR, "Proj_08390_G"), + "MAF_DIR": os.path.join(FIXTURES_DIR, "Proj_08390_G", "maf"), + "BAM_DIR": os.path.join(FIXTURES_DIR, "Proj_08390_G", "bam"), + # "SNP_PILEUP_DIR": os.path.join(FIXTURES_DIR, "Proj_08390_G", "snp_pileup"), + "FACETS_DIR": os.path.join(FIXTURES_DIR, "Proj_08390_G", "facets"), + "FACETS_SUITE_DIR": os.path.join(FIXTURES_DIR, "Proj_08390_G", "facets-suite"), + "INPUTS_DIR": os.path.join(FIXTURES_DIR, "Proj_08390_G", "inputs"), + "QC_DIR": os.path.join(FIXTURES_DIR, "Proj_08390_G", "qc"), + "targets_list": "/juno/work/ci/resources/roslin_resources/targets/HemePACT_v4/b37/HemePACT_v4_b37_targets.ilist", + "analyst_file": "Proj_08390_G.muts.maf", # TODO: deprecate this + "analysis_gene_cna_file": "Proj_08390_G.gene.cna.txt", # TODO: deprecate this + "MAF_FILTER_DIR": os.path.join(FIXTURES_DIR, "Proj_08390_G", "maf_filter"), + "SNP_PILEUP_DIR": os.path.join(FIXTURES_DIR, "Proj_08390_G", "snp-pileup"), + 'REF_FASTA': REF_FASTA, + 'microsatellites_file': MICROSATELLITES_LIST, + "MSI_DIR": os.path.join(FIXTURES_DIR, "Proj_08390_G", "msi"), + "TMB_DIR": os.path.join(FIXTURES_DIR, "Proj_08390_G", "tmb"), + }, + "Proj_1": { # same as Proj_08390_G but both filenames and file contents have been scrubbed; results in different file md5's + "MAF_DIR": os.path.join(FIXTURES_DIR, "Proj_1", "maf"), + "BAM_DIR": os.path.join(FIXTURES_DIR, "Proj_1", "bam"), + "FACETS_DIR": os.path.join(FIXTURES_DIR, "Proj_1", "facets"), + "FACETS_SUITE_DIR": os.path.join(FIXTURES_DIR, "Proj_1", "facets-suite"), + "QC_DIR": os.path.join(FIXTURES_DIR, "Proj_1", "qc"), + "INPUTS_DIR": os.path.join(FIXTURES_DIR, "Proj_1", "inputs"), + 'REF_FASTA': REF_FASTA, + "targets_list": "/juno/work/ci/resources/roslin_resources/targets/HemePACT_v4/b37/HemePACT_v4_b37_targets.ilist", + "MSI_DIR": os.path.join(FIXTURES_DIR, "Proj_1", "msi"), + "TMB_DIR": os.path.join(FIXTURES_DIR, "Proj_1", "tmb"), + }, + "demo":{ # small subset of samples on a full project + "DIR": os.path.join(FIXTURES_DIR, "demo"), + "MAF_DIR": os.path.join(FIXTURES_DIR, "demo", "maf"), + "BAM_DIR": os.path.join(FIXTURES_DIR, "demo", "bam"), + "QC_DIR": os.path.join(FIXTURES_DIR, "demo", "qc"), + "INPUTS_DIR": os.path.join(FIXTURES_DIR, "demo", "inputs"), + "SNP_PILEUP_DIR": os.path.join(FIXTURES_DIR, "demo", "snp-pileup"), + "FACETS_DIR": os.path.join(FIXTURES_DIR, "demo", "facets"), + "targets_list": "/juno/work/ci/resources/roslin_resources/targets/HemePACT_v4/b37/HemePACT_v4_b37_targets.ilist", + 'microsatellites_file': os.path.join(FIXTURES_DIR, "demo", "microsatellites", 'microsatellites.head500000.list'), + # $ md5sum microsatellites.head500000.list + # aa0126e6a916ec82a2837989458918b3 microsatellites.head500000.list + 'REF_FASTA': REF_FASTA + }, + # dataset selected for use with fillout since it has pooled normals + "07618_AG": { + "DIR": os.path.join(FIXTURES_DIR, "07618_AG"), + "BAM_DIR": os.path.join(FIXTURES_DIR, "07618_AG", "bam"), + "MAF_DIR": os.path.join(FIXTURES_DIR, "07618_AG", "maf") + }, + "Fillout01": { + "DIR": os.path.join(FIXTURES_DIR, "Fillout01"), + "BAM_DIR": os.path.join(FIXTURES_DIR, "Fillout01", "bam"), + "MAF_DIR": os.path.join(FIXTURES_DIR, "Fillout01", "maf"), + "VCF_DIR": os.path.join(FIXTURES_DIR, "Fillout01", "vcf"), + "OUTPUT_DIR": os.path.join(FIXTURES_DIR, "Fillout01", "output") + }, + "Conpair_1": { + "BAM_DIR": os.path.join(FIXTURES_DIR, "Conpair_1", "bam"), + "LIKELIHOODS": os.path.join(FIXTURES_DIR, "Conpair_1", "likelihoods"), + } +} diff --git a/tests/test_samples_fillout_index_batch_workflow_cwl.py b/tests/test_samples_fillout_index_batch_workflow_cwl.py index 0d097e4..649bc48 100644 --- a/tests/test_samples_fillout_index_batch_workflow_cwl.py +++ b/tests/test_samples_fillout_index_batch_workflow_cwl.py @@ -3,24 +3,22 @@ """ Test case for the samples_fillout_index_batch_workflow cwl - -$ CWL_ENGINE=Toil PRINT_TESTNAME=T python3 tests/test_samples_fillout_index_batch_workflow_cwl.py TestSamplesFilloutIndexBatch.test_three_groups +example command: +$ CWL_ENGINE=Toil PRINT_COMMAND=T KEEP_TMP=T pytest -n 8 -s tests/test_samples_fillout_index_batch_workflow_cwl.py """ import os import sys -import unittest from typing import Dict, Tuple - -PARENT_DIR = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) -sys.path.insert(0, PARENT_DIR) +from datasets import ( + DATA_SETS, +) from pluto import ( CWLFile, PlutoTestCase, PlutoPreRunTestCase, - DATA_SETS, OFile ) -sys.path.pop(0) + sample1_maf = os.path.join(DATA_SETS['Fillout01']['MAF_DIR'], 'Sample1.FillOutUnitTest01.muts.maf') @@ -37,14 +35,20 @@ class TestSamplesFilloutIndexBatch1Group(PlutoPreRunTestCase): - + # # # # # # # # # # # + # # # # # # # # # # # + # Test setup + cwl_file = CWLFile('samples_fillout_index_batch_workflow.cwl') - + def setUp(self): super().setUp() self.runner_args['use_cache'] = False # do not use cache for samples fillout workflow it breaks on split_vcf_to_mafs def setUpRun(self): + """ + Run the workflow and return the results; output accessible under self.res.output in downstream 'test_' methods + """ sample_group1 = [ { "sample_id": "Sample1", @@ -75,21 +79,25 @@ def setUpRun(self): self.input = { "sample_groups": [sample_group1], "fillout_output_fname": 'output.maf', - "ref_fasta": {"class": "File", "path": self.DATA_SETS['Proj_08390_G']['REF_FASTA']}, + "ref_fasta": {"class": "File", "path": DATA_SETS['Proj_08390_G']['REF_FASTA']}, } output_json, output_dir = self.run_cwl() return(output_json, output_dir) - + def getExpected(self, output_dir): + """ + Return the expected CWL workflow output with the tmpdir output dir path included + Accessible in downstream 'test_' methods under self.res.expected + """ return({ 'output_file': OFile(name = 'output.maf', dir = output_dir), 'filtered_file': OFile(name = 'output.filtered.maf', dir = output_dir), 'portal_file': OFile(name = 'data_mutations_extended.txt', dir = output_dir), 'uncalled_file': OFile(name = 'data_mutations_uncalled.txt', dir = output_dir), }) - + # # # # # # # # # # # # # # # # # # # # # # @@ -105,8 +113,8 @@ def test_CWLDictEqual(self): ('basename', 'data_mutations_uncalled.txt', ['size', 'checksum']) ] self.assertCWLDictEqual( - self.res.output, - self.res.expected, + self.res.output, + self.res.expected, related_keys = strip_related_keys) def test_output_file_num_muts(self): @@ -114,7 +122,7 @@ def test_output_file_num_muts(self): def test_output_file_muts_hash(self): self.assertMutationsHash(OFile.init_dict(self.res.output['output_file']).path, "4732e626d2859e4c2e8a7d4eeca0e0f4") - + def test_filtered_file_num_muts(self): self.assertNumMutations(OFile.init_dict(self.res.output['filtered_file']).path, 96) @@ -136,9 +144,9 @@ def test_uncalled_file_muts_hash(self): def test_portal_output_path_num_muts(self): self.assertEqualNumMutations([ - OFile.init_dict(self.res.output['portal_file']).path, - OFile.init_dict(self.res.output['uncalled_file']).path, - ], + OFile.init_dict(self.res.output['portal_file']).path, + OFile.init_dict(self.res.output['uncalled_file']).path, + ], OFile.init_dict(self.res.output['filtered_file']).path) def test_output_file_fields(self): @@ -158,9 +166,9 @@ def test_uncalled_output_path_fields(self): class TestSamplesFilloutIndexBatch2Group(PlutoPreRunTestCase): - + cwl_file = CWLFile('samples_fillout_index_batch_workflow.cwl') - + def setUp(self): super().setUp() self.runner_args['use_cache'] = False # do not use cache for samples fillout workflow it breaks on split_vcf_to_mafs @@ -197,13 +205,13 @@ def setUpRun(self): self.input = { "sample_groups": [sample_group1, sample_group2], "fillout_output_fname": 'output.maf', - "ref_fasta": {"class": "File", "path": self.DATA_SETS['Proj_08390_G']['REF_FASTA']}, + "ref_fasta": {"class": "File", "path": DATA_SETS['Proj_08390_G']['REF_FASTA']}, } output_json, output_dir = self.run_cwl() return(output_json, output_dir) - + def getExpected(self, output_dir): return({ 'output_file': OFile(name = 'output.maf', dir = output_dir), @@ -211,10 +219,10 @@ def getExpected(self, output_dir): 'portal_file': OFile(name = 'data_mutations_extended.txt', dir = output_dir), 'uncalled_file': OFile(name = 'data_mutations_uncalled.txt', dir = output_dir), }) - + # # # # # # # # # # # # # # # # # # # # # # - + def test_CWLDictEqual(self): """ Test case for running the fillout workflow on a number of samples, each with a bam and maf @@ -227,8 +235,8 @@ def test_CWLDictEqual(self): ('basename', 'data_mutations_uncalled.txt', ['size', 'checksum']) ] self.assertCWLDictEqual( - self.res.output, - self.res.expected, + self.res.output, + self.res.expected, related_keys = strip_related_keys) def test_output_file_num_muts(self): @@ -236,7 +244,7 @@ def test_output_file_num_muts(self): def test_output_file_muts_hash(self): self.assertMutationsHash(OFile.init_dict(self.res.output['output_file']).path, "89b2574c5a8ae02ac44e0bbe897a4bf3") - + def test_filtered_file_num_muts(self): self.assertNumMutations(OFile.init_dict(self.res.output['filtered_file']).path, 68) @@ -258,9 +266,9 @@ def test_uncalled_file_muts_hash(self): def test_portal_output_path_num_muts(self): self.assertEqualNumMutations([ - OFile.init_dict(self.res.output['portal_file']).path, - OFile.init_dict(self.res.output['uncalled_file']).path, - ], + OFile.init_dict(self.res.output['portal_file']).path, + OFile.init_dict(self.res.output['uncalled_file']).path, + ], OFile.init_dict(self.res.output['filtered_file']).path) def test_output_file_fields(self): @@ -280,9 +288,9 @@ def test_uncalled_output_path_fields(self): class TestSamplesFilloutIndexBatch2Group2(PlutoPreRunTestCase): - + cwl_file = CWLFile('samples_fillout_index_batch_workflow.cwl') - + def setUp(self): super().setUp() self.runner_args['use_cache'] = False # do not use cache for samples fillout workflow it breaks on split_vcf_to_mafs @@ -337,13 +345,13 @@ def setUpRun(self): self.input = { "sample_groups": [sample_group1, sample_group2], "fillout_output_fname": 'output.maf', - "ref_fasta": {"class": "File", "path": self.DATA_SETS['Proj_08390_G']['REF_FASTA']}, + "ref_fasta": {"class": "File", "path": DATA_SETS['Proj_08390_G']['REF_FASTA']}, } output_json, output_dir = self.run_cwl() return(output_json, output_dir) - + def getExpected(self, output_dir): return({ 'output_file': OFile(name = 'output.maf', dir = output_dir), @@ -351,7 +359,7 @@ def getExpected(self, output_dir): 'portal_file': OFile(name = 'data_mutations_extended.txt', dir = output_dir), 'uncalled_file': OFile(name = 'data_mutations_uncalled.txt', dir = output_dir), }) - + # # # # # # # # # # # # # # # # # # # # # # @@ -367,15 +375,15 @@ def test_CWLDictEqual(self): ('basename', 'data_mutations_uncalled.txt', ['size', 'checksum']) ] self.assertCWLDictEqual( - self.res.output, - self.res.expected, + self.res.output, + self.res.expected, related_keys = strip_related_keys) def test_output_file_num_muts(self): self.assertNumMutations(OFile.init_dict(self.res.output['output_file']).path, 235) def test_output_file_muts_hash(self): - self.assertMutationsHash(OFile.init_dict(self.res.output['output_file']).path, "4e4c91ef129a853a35b86f7fa6f1268a") + self.assertMutationsHash(OFile.init_dict(self.res.output['output_file']).path, "4e4c91ef129a853a35b86f7fa6f1268a") def test_filtered_file_num_muts(self): self.assertNumMutations(OFile.init_dict(self.res.output['filtered_file']).path, 150) @@ -397,9 +405,9 @@ def test_uncalled_file_muts_hash(self): def test_portal_output_path_num_muts(self): self.assertEqualNumMutations([ - OFile.init_dict(self.res.output['portal_file']).path, - OFile.init_dict(self.res.output['uncalled_file']).path, - ], + OFile.init_dict(self.res.output['portal_file']).path, + OFile.init_dict(self.res.output['uncalled_file']).path, + ], OFile.init_dict(self.res.output['filtered_file']).path) def test_output_file_fields(self): @@ -419,9 +427,9 @@ def test_uncalled_output_path_fields(self): class TestSamplesFilloutIndexBatch3Group(PlutoPreRunTestCase): - + cwl_file = CWLFile('samples_fillout_index_batch_workflow.cwl') - + def setUp(self): super().setUp() self.runner_args['use_cache'] = False # do not use cache for samples fillout workflow it breaks on split_vcf_to_mafs @@ -480,13 +488,13 @@ def setUpRun(self): self.input = { "sample_groups": [sample_group1, sample_group2, sample_group3], "fillout_output_fname": 'output.maf', - "ref_fasta": {"class": "File", "path": self.DATA_SETS['Proj_08390_G']['REF_FASTA']}, + "ref_fasta": {"class": "File", "path": DATA_SETS['Proj_08390_G']['REF_FASTA']}, } output_json, output_dir = self.run_cwl() return(output_json, output_dir) - + def getExpected(self, output_dir): return({ 'output_file': OFile(name = 'output.maf', dir = output_dir), @@ -494,7 +502,7 @@ def getExpected(self, output_dir): 'portal_file': OFile(name = 'data_mutations_extended.txt', dir = output_dir), 'uncalled_file': OFile(name = 'data_mutations_uncalled.txt', dir = output_dir), }) - + # # # # # # # # # # # # # # # # # # # # # # @@ -510,15 +518,15 @@ def test_CWLDictEqual(self): ('basename', 'data_mutations_uncalled.txt', ['size', 'checksum']) ] self.assertCWLDictEqual( - self.res.output, - self.res.expected, + self.res.output, + self.res.expected, related_keys = strip_related_keys) def test_output_file_num_muts(self): self.assertNumMutations(OFile.init_dict(self.res.output['output_file']).path, 144) def test_output_file_muts_hash(self): - self.assertMutationsHash(OFile.init_dict(self.res.output['output_file']).path, "85abef967b1d43112da6a026e80f5cea") + self.assertMutationsHash(OFile.init_dict(self.res.output['output_file']).path, "85abef967b1d43112da6a026e80f5cea") def test_filtered_file_num_muts(self): self.assertNumMutations(OFile.init_dict(self.res.output['filtered_file']).path, 144) @@ -540,9 +548,9 @@ def test_uncalled_file_muts_hash(self): def test_portal_output_path_num_muts(self): self.assertEqualNumMutations([ - OFile.init_dict(self.res.output['portal_file']).path, - OFile.init_dict(self.res.output['uncalled_file']).path, - ], + OFile.init_dict(self.res.output['portal_file']).path, + OFile.init_dict(self.res.output['uncalled_file']).path, + ], OFile.init_dict(self.res.output['filtered_file']).path) def test_output_file_fields(self): @@ -562,9 +570,9 @@ def test_uncalled_output_path_fields(self): class TestSamplesFilloutIndexBatch4Group(PlutoPreRunTestCase): - + cwl_file = CWLFile('samples_fillout_index_batch_workflow.cwl') - + def setUp(self): super().setUp() self.runner_args['use_cache'] = False # do not use cache for samples fillout workflow it breaks on split_vcf_to_mafs @@ -625,12 +633,12 @@ def setUpRun(self): self.input = { "sample_groups": [sample_group1, sample_group2, sample_group3, sample_group4], "fillout_output_fname": 'output.maf', - "ref_fasta": {"class": "File", "path": self.DATA_SETS['Proj_08390_G']['REF_FASTA']}, + "ref_fasta": {"class": "File", "path": DATA_SETS['Proj_08390_G']['REF_FASTA']}, } output_json, output_dir = self.run_cwl() return(output_json, output_dir) - + def getExpected(self, output_dir): return({ 'output_file': OFile(name = 'output.maf', dir = output_dir), @@ -638,7 +646,7 @@ def getExpected(self, output_dir): 'portal_file': OFile(name = 'data_mutations_extended.txt', dir = output_dir), 'uncalled_file': OFile(name = 'data_mutations_uncalled.txt', dir = output_dir), }) - + # # # # # # # # # # # # # # # # # # # # # # @@ -654,15 +662,15 @@ def test_CWLDictEqual(self): ('basename', 'data_mutations_uncalled.txt', ['size', 'checksum']) ] self.assertCWLDictEqual( - self.res.output, - self.res.expected, + self.res.output, + self.res.expected, related_keys = strip_related_keys) def test_output_file_num_muts(self): self.assertNumMutations(OFile.init_dict(self.res.output['output_file']).path, 222) def test_output_file_muts_hash(self): - self.assertMutationsHash(OFile.init_dict(self.res.output['output_file']).path, "610d91b05a37b371a6b1b1615042dcc3") + self.assertMutationsHash(OFile.init_dict(self.res.output['output_file']).path, "610d91b05a37b371a6b1b1615042dcc3") def test_filtered_file_num_muts(self): self.assertNumMutations(OFile.init_dict(self.res.output['filtered_file']).path, 222) @@ -684,9 +692,9 @@ def test_uncalled_file_muts_hash(self): def test_portal_output_path_num_muts(self): self.assertEqualNumMutations([ - OFile.init_dict(self.res.output['portal_file']).path, - OFile.init_dict(self.res.output['uncalled_file']).path, - ], + OFile.init_dict(self.res.output['portal_file']).path, + OFile.init_dict(self.res.output['uncalled_file']).path, + ], OFile.init_dict(self.res.output['filtered_file']).path) def test_output_file_fields(self): @@ -706,5 +714,5 @@ def test_uncalled_output_path_fields(self): -if __name__ == "__main__": - unittest.main() + +