Skip to content

Commit

Permalink
remove input maf filtering on unindexed samples in the fillout workflow
Browse files Browse the repository at this point in the history
  • Loading branch information
stevekm committed Feb 7, 2022
1 parent 852d935 commit d8a8af9
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 21 deletions.
43 changes: 24 additions & 19 deletions cwl/samples_fillout_index_workflow.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ requirements:
SubworkflowFeatureRequirement: {}

inputs:
samples:
samples: # NOTE: in prod, these end up being the research samples
type:
type: array
items:
Expand All @@ -34,7 +34,7 @@ inputs:
secondaryFiles:
- ^.bai

unindexed_samples:
unindexed_samples: # NOTE: in prod, these end up being the clinical samples
type:
type: array
items:
Expand Down Expand Up @@ -65,6 +65,7 @@ inputs:
class: File
path: /juno/work/ci/resources/vep/cache/ExAC_nonTCGA.r0.3.1.sites.vep.vcf.gz

# these are needed for the filter script
is_impact:
type: boolean
default: True
Expand Down Expand Up @@ -98,26 +99,27 @@ steps:
scatter: sample
out: [ cbio_mutation_data_file ]

run_maf_filter_unindexed:
run: maf_filter.cwl
in:
sample: unindexed_samples
maf_file:
valueFrom: ${ return inputs.sample['maf_file']; }
is_impact: is_impact
argos_version_string: argos_version_string
scatter: sample
out: [ cbio_mutation_data_file ]
# NOTE: In prod, the unindexed_samples end up being the clinical samples; we do not want to apply filter to the clinical mutations input files
# run_maf_filter_unindexed:
# run: maf_filter.cwl
# in:
# sample: unindexed_samples
# maf_file:
# valueFrom: ${ return inputs.sample['maf_file']; }
# is_impact: is_impact
# argos_version_string: argos_version_string
# scatter: sample
# out: [ cbio_mutation_data_file ]

# update the samples to use the new filtered maf files and output a single list of samples
merge_samples_replace_mafs:
in:
samples:
source: [ samples, unindexed_samples ]
linkMerge: merge_flattened
maf_files:
source: [ run_maf_filter/cbio_mutation_data_file, run_maf_filter_unindexed/cbio_mutation_data_file ]
linkMerge: merge_flattened
samples: samples
# source: [ samples, unindexed_samples ]
# linkMerge: merge_flattened
maf_files: run_maf_filter/cbio_mutation_data_file
# source: [ run_maf_filter/cbio_mutation_data_file, run_maf_filter_unindexed/cbio_mutation_data_file ]
# linkMerge: merge_flattened
out: [ samples ]
run:
class: ExpressionTool
Expand Down Expand Up @@ -164,7 +166,10 @@ steps:
in:
output_fname: fillout_output_fname
exac_filter: exac_filter
samples: merge_samples_replace_mafs/samples
# samples: merge_samples_replace_mafs/samples
samples:
source: [ merge_samples_replace_mafs/samples, unindexed_samples ]
linkMerge: merge_flattened
bam_files:
source: [ bam_files, run_indexer/bam_indexed ]
linkMerge: merge_flattened
Expand Down
4 changes: 2 additions & 2 deletions tests/test_samples_fillout_index_workflow_cwl.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def test_run_fillout_workflow(self):

# instead of checksum and size, count the number of mutations and take a checksum on the mutation contents
comments, mutations = self.load_mutations(output_file)
self.assertEqual(len(mutations), 117)
self.assertEqual(len(mutations), 23742)

# Need to remove these fields because they are inconsistent on the output maf file;
for mut in mutations:
Expand All @@ -99,7 +99,7 @@ def test_run_fillout_workflow(self):
mut.pop('Variant_Classification')

hash = md5_obj(mutations)
expected_hash = '01af6b281f70e6821addce80a2ec5cf8'
expected_hash = 'c96f641cb134ed99c49aed7d42a0f5af'
self.assertEqual(hash, expected_hash)


Expand Down

0 comments on commit d8a8af9

Please sign in to comment.