Skip to content

Commit

Permalink
Merge branch 'dev'
Browse files Browse the repository at this point in the history
  • Loading branch information
stevekm committed Feb 7, 2022
2 parents 7eb2b0a + 342e6f1 commit 852d935
Show file tree
Hide file tree
Showing 5 changed files with 237 additions and 108 deletions.
114 changes: 83 additions & 31 deletions cwl/samples_fillout_index_workflow.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -18,38 +18,36 @@ requirements:
SubworkflowFeatureRequirement: {}

inputs:
sample_ids:
samples:
type:
type: array
items: string

type: array
items:
type: record
fields:
maf_file: File
sample_id: string # must match sample ID used inside maf file
normal_id: string
bam_files:
type:
type: array
items: File
secondaryFiles:
- ^.bai

maf_files:
unindexed_samples:
type:
type: array
items: File
items:
type: record
fields:
maf_file: File
sample_id: string # must match sample ID used inside maf file
normal_id: string

unindexed_bam_files:
type:
type: array
items: File

unindexed_sample_ids:
type:
type: array
items: string

unindexed_maf_files:
type:
type: array
items: File

ref_fasta:
type: File
secondaryFiles:
Expand All @@ -60,8 +58,8 @@ inputs:
- .sa
- .fai
- ^.dict

exac_filter: # need this to resolve error in subworkflow: Anonymous file object must have 'contents' and 'basename' fields.
# TODO: this needs the .tbi/.csi index file added!!
type: File
default:
class: File
Expand All @@ -85,41 +83,95 @@ steps:
run: index_bam.cwl
in:
bam: unindexed_bam_files
scatter: [ bam ]
scatterMethod: dotproduct
scatter: bam
out: [ bam_indexed ]

# run filter script to apply cBioPortal filters on variants for fillout
run_maf_filter:
run: maf_filter.cwl
in:
maf_file: maf_files
sample: samples
maf_file:
valueFrom: ${ return inputs.sample['maf_file']; }
is_impact: is_impact
argos_version_string: argos_version_string
scatter: [ maf_file ]
scatterMethod: dotproduct
scatter: sample
out: [ cbio_mutation_data_file ]

# run the actual fillout workflow
run_maf_filter_unindexed:
run: maf_filter.cwl
in:
sample: unindexed_samples
maf_file:
valueFrom: ${ return inputs.sample['maf_file']; }
is_impact: is_impact
argos_version_string: argos_version_string
scatter: sample
out: [ cbio_mutation_data_file ]

# update the samples to use the new filtered maf files and output a single list of samples
merge_samples_replace_mafs:
in:
samples:
source: [ samples, unindexed_samples ]
linkMerge: merge_flattened
maf_files:
source: [ run_maf_filter/cbio_mutation_data_file, run_maf_filter_unindexed/cbio_mutation_data_file ]
linkMerge: merge_flattened
out: [ samples ]
run:
class: ExpressionTool
inputs:
samples:
type:
type: array
items:
type: record
fields:
maf_file: File
sample_id: string
normal_id: string
maf_files: File[]
outputs:
samples:
type:
type: array
items:
type: record
fields:
maf_file: File
sample_id: string
normal_id: string
# NOTE: in the line below `var i in inputs.samples`, `i` is an int representing the index position in the array `inputs.samples`
# in Python it would look like ` x = ['a', 'b']; for i in range(len(x)): print(i, x[i]) `
expression: "${
var new_samples = [];
for ( var i in inputs.samples ){
new_samples.push({
'sample_id': inputs.samples[i]['sample_id'],
'normal_id': inputs.samples[i]['normal_id'],
'maf_file': inputs.maf_files[i]
});
};
return {'samples': new_samples};
}"

# run the fillout workflow
run_samples_fillout:
run: samples_fillout_workflow.cwl
in:
output_fname: fillout_output_fname
exac_filter: exac_filter
sample_ids:
source: [ sample_ids, unindexed_sample_ids ]
linkMerge: merge_flattened
samples: merge_samples_replace_mafs/samples
bam_files:
source: [ bam_files, run_indexer/bam_indexed ]
linkMerge: merge_flattened
maf_files:
source: [ run_maf_filter/cbio_mutation_data_file, unindexed_maf_files ]
linkMerge: merge_flattened
ref_fasta: ref_fasta
out: [ output_file ]

outputs:

output_file:
type: File
outputSource: run_samples_fillout/output_file
76 changes: 57 additions & 19 deletions cwl/samples_fillout_workflow.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -13,24 +13,24 @@ requirements:

inputs:
# NOTE: arrays for sample_ids, bam_files, maf_files must all be the same length and in the same order by sample

samples:
type:
type: array
items:
type: record
fields:
maf_file: File
sample_id: string # must match sample ID used inside maf file
normal_id: string
output_fname:
type: [ 'null', string ]
default: "output.maf"
sample_ids:
type:
type: array
items: string
bam_files:
type:
type: array
items: File
secondaryFiles:
- ^.bai
maf_files:
type:
type: array
items: File
ref_fasta:
type: File
secondaryFiles:
Expand All @@ -54,14 +54,44 @@ inputs:
# path: /juno/work/ci/resources/vep/cache

steps:
# create a list of just sample_ids out of the samples record array
create_samples_list:
in:
samples: samples
out: [ sample_ids ]
run:
class: ExpressionTool
inputs:
samples:
type:
type: array
items:
type: record
fields:
sample_id: string
outputs:
sample_ids: string[]
# NOTE: in the line below `var i in inputs.samples`, `i` is an int representing the index position in the array `inputs.samples`
# in Python it would look like ` x = ['a', 'b']; for i in range(len(x)): print(i, x[i]) `
expression: "${
var sample_ids = [];
for ( var i in inputs.samples ){
sample_ids.push(inputs.samples[i]['sample_id']);
};
return {'sample_ids': sample_ids};
}"


# convert all maf input files back to vcf because they are much easier to manipulate that way
# NOTE: This is important; do NOT try to do these complex manipulations on maf format file
maf2vcf:
scatter: [ sample_id, maf_file ]
scatterMethod: dotproduct
scatter: sample
in:
sample_id: sample_ids
maf_file: maf_files
sample: samples
sample_id:
valueFrom: ${ return inputs.sample['sample_id']; }
maf_file:
valueFrom: ${ return inputs.sample['maf_file']; }
ref_fasta: ref_fasta
out:
[ output_file ]
Expand Down Expand Up @@ -116,7 +146,7 @@ steps:
# this will be used as the target regions for fillout
merge_vcfs:
in:
sample_ids: sample_ids
sample_ids: create_samples_list/sample_ids
vcf_gz_files: maf2vcf/output_file
out:
[ merged_vcf, merged_vcf_gz ]
Expand Down Expand Up @@ -163,9 +193,11 @@ steps:
- .tbi

# run GetBaseCountsMultiSample on all the bam files against the target regions (the merged vcf from all samples)
# TODO: convert this to a `scatter` step that runs per-sample in parallel, then merge the outputs
# otherwise we will hit the command line arg length issues
gbcms:
in:
sample_ids: sample_ids
sample_ids: create_samples_list/sample_ids
bam_files: bam_files
targets_vcf: merge_vcfs/merged_vcf
ref_fasta: ref_fasta
Expand Down Expand Up @@ -227,7 +259,6 @@ steps:
# also we are going to add a column called SRC telling the source (which sample) each variant was originally found in
fix_labels_and_merge_vcfs:
in:
sample_ids: sample_ids
fillout_vcf: gbcms/output_file
merged_vcf: merge_vcfs/merged_vcf
merged_vcf_gz: merge_vcfs/merged_vcf_gz
Expand Down Expand Up @@ -316,9 +347,13 @@ steps:

# next we need to split apart the merged fillout vcf back into individual sample maf files
split_vcf_to_mafs:
scatter: [ sample_id ]
scatter: sample
in:
sample_id: sample_ids
sample: samples
sample_id:
valueFrom: ${ return inputs.sample['sample_id']; }
normal_id:
valueFrom: ${ return inputs.sample['normal_id']; }
fillout_vcf: fix_labels_and_merge_vcfs/fillout_sources_vcf
ref_fasta: ref_fasta
exac_filter: exac_filter
Expand All @@ -338,6 +373,7 @@ steps:
set -eu
# convert the multi-sample annotated fillout vcf back into individual sample maf files
sample_id="${ return inputs.sample_id ; }"
normal_id="${ return inputs.normal_id ; }"
ref_fasta="${ return inputs.ref_fasta.path ; }"
input_vcf="${ return inputs.fillout_vcf.path ; }"
exac_filter="${ return inputs.exac_filter.path ; }"
Expand All @@ -364,9 +400,11 @@ steps:
--retain-fmt GT,FL_AD,FL_ADN,FL_ADP,FL_DP,FL_DPN,FL_DPP,FL_RD,FL_RDN,FL_RDP,FL_VF,AD,DP \\
--vep-forks 8 \\
--vcf-tumor-id "\${sample_id}" \\
--tumor-id "\${sample_id}"
--tumor-id "\${sample_id}" \\
--normal-id "\${normal_id}"
inputs:
sample_id: string
normal_id: string
ref_fasta:
type: File
secondaryFiles:
Expand Down
2 changes: 1 addition & 1 deletion pluto
Submodule pluto updated 2 files
+1 −0 settings.py
+4 −2 tools.py
Loading

0 comments on commit 852d935

Please sign in to comment.