diff --git a/cwl/analysis-workflow_wo_facets.cwl b/cwl/analysis-workflow_wo_facets.cwl new file mode 100644 index 0000000..3d8d9e9 --- /dev/null +++ b/cwl/analysis-workflow_wo_facets.cwl @@ -0,0 +1,199 @@ +#!/usr/bin/env cwl-runner + +cwlVersion: v1.0 +class: Workflow + +requirements: + ScatterFeatureRequirement: {} + StepInputExpressionRequirement: {} + InlineJavascriptRequirement: {} + SubworkflowFeatureRequirement: {} + +inputs: + analysis_segment_cna_filename: + type: string + doc: "(ANALYSIS_SEGMENT_CNA_FILE; .seg.cna.txt)" + analysis_sv_filename: + type: string + doc: "(ANALYSIS_SV_FILE; .svs.maf)" + analysis_gene_cna_filename: + type: string + doc: "(ANALYSIS_GENE_CNA_FILENAME; .gene.cna.txt)" + analysis_mutations_filename: + type: string + doc: "(ANALYSIS_MUTATIONS_FILENAME; .muts.maf)" + analysis_mutations_share_filename: + type: string + doc: ".muts.share.maf" + mutation_maf_files: + type: File[] + doc: "analysis_mutations_filename (ANALYSIS_MUTATIONS_FILENAME) cbio_mutation_data_filename (CBIO_MUTATION_DATA_FILENAME): (MAF_DIR)/*.muts.maf" + facets_hisens_seg_files: + type: File[] + doc: "cbio_segment_data_filename (CBIO_SEGMENT_DATA_FILENAME; _data_cna_hg19.seg) analysis_segment_cna_filename (ANALYSIS_SEGMENT_CNA_FILE; .seg.cna.txt): (FACETS_DIR)/*_hisens.seg" + facets_hisens_cncf_files: + type: File[] + doc: "cbio_cna_data_filename (CBIO_CNA_DATA_FILENAME; data_CNA.txt) analysis_gene_cna_filename (ANALYSIS_GENE_CNA_FILENAME; .gene.cna.txt): (FACETS_DIR)/*_hisens.cncf.txt" + mutation_svs_maf_files: + type: File[] + doc: "analysis_sv_filename (ANALYSIS_SV_FILE; .svs.maf): (MAF_DIR)/*.svs.pass.vep.maf" + targets_list: + type: File + argos_version_string: + type: string + doc: "the version label of Roslin / Argos used to run the project analysis (ARGOS_VERSION_STRING)" + is_impact: + default: true + type: boolean + doc: "whether or not the project is an IMPACT project; should be the value 'True' if so, otherwise any other value means 'False' (IS_IMPACT)" + helix_filter_version: + type: string + doc: "the version label of this helix filter repo (HELIX_FILTER_VERSION; git describe --all --long)" + IMPACT_gene_list: + type: File + doc: "TSV file with gene labels and corresponding impact assays" + +steps: + # .gene.cna.txt (analysis_gene_cna_filename) + # generate_cna_data: + # run: copy_number.cwl + # in: + # output_cna_filename: analysis_gene_cna_filename + # output_cna_ascna_filename: + # valueFrom: ${ return inputs.output_cna_filename.replace(/\.[^/.]+$/, "") + '.ascna.txt'; } + # output_cna_scna_filename: + # valueFrom: ${ return inputs.output_cna_filename.replace(/\.[^/.]+$/, "") + '.scna.txt'; } + # targets_list: targets_list + # hisens_cncfs: facets_hisens_cncf_files + # out: + # [ output_cna_file ] + + # .muts.maf (analysis_mutations_filename) + # filter each maf file + muts_maf_filter: + run: maf_filter.cwl + scatter: maf_file + in: + maf_file: mutation_maf_files + argos_version_string: argos_version_string + is_impact: is_impact + analysis_mutations_filename: analysis_mutations_filename # .muts.maf + out: [ analysis_mutations_file ] + # concat all the maf files into a single table + concat_analysis_muts_maf: + run: concat-tables.cwl + in: + input_files: muts_maf_filter/analysis_mutations_file + output_filename: analysis_mutations_filename # .muts.maf + comments: + valueFrom: ${ return true; } + out: + [ output_file ] + # Need to add a version label to the maf file as per Nick's request + add_maf_comment: + run: concat_with_comments.cwl + in: + some_file: concat_analysis_muts_maf/output_file + input_files: + valueFrom: ${ return [ inputs.some_file ]; } + comment_value: helix_filter_version + output_filename: analysis_mutations_filename # .muts.maf + out: + [ output_file ] + # add the AF allele frequency column to the maf + add_af: + run: add_af.cwl + in: + input_file: add_maf_comment/output_file + output_filename: analysis_mutations_filename + out: + [ output_file ] + # label all the mutations that are in a gene covered by an IMPACT assay + add_is_in_impact: + run: add_is_in_impact.cwl + in: + input_file: add_af/output_file + output_filename: analysis_mutations_filename + IMPACT_file: IMPACT_gene_list + out: + [ IMPACT_col_added_file ] + # create a version of the maf with fewer columns; shareable maf .muts.share.maf + filter_maf_cols: + run: maf_col_filter.cwl + in: + input_file: add_is_in_impact/IMPACT_col_added_file + output_filename: analysis_mutations_share_filename # .muts.share.maf + out: + [ output_file ] + + # .seg.cna.txt (analysis_segment_cna_filename) + # need to reduce the number of significant figures in the hisens_segs files + reduce_sig_figs_hisens_segs: + run: reduce_sig_figs.cwl + scatter: input_file + in: + input_file: facets_hisens_seg_files + out: + [output_file] + # concatenate all of the hisens_segs files + concat_hisens_segs: + run: concat.cwl + in: + input_files: reduce_sig_figs_hisens_segs/output_file + out: + [output_file] + # rename the output file + rename_analysis_hisens_segs: + run: cp.cwl + in: + input_file: concat_hisens_segs/output_file + output_filename: analysis_segment_cna_filename # .seg.cna.txt + out: + [output_file] + + # .svs.maf (analysis_sv_filename) + # (MAF_DIR)/*.svs.pass.vep.maf (mutation_svs_maf_files) + generate_analysis_svs_maf: + run: concat_with_comments.cwl + in: + input_files: mutation_svs_maf_files + comment_value: helix_filter_version + out: + [output_file] + rename_analysis_svs_maf: + run: cp.cwl + in: + input_file: generate_analysis_svs_maf/output_file + output_filename: analysis_sv_filename # .svs.maf + out: + [output_file] + + # create the 'analysis' directory and put some files in it + make_analysis_dir: + run: put_in_dir.cwl + in: + + + #gene_cna_file: #generate_cna_data/output_cna_file # .gene.cna.txt + + + muts_maf_file: add_is_in_impact/IMPACT_col_added_file # .muts.maf + muts_share_maf_file: filter_maf_cols/output_file + hisens_segs: rename_analysis_hisens_segs/output_file # .seg.cna.txt + svs_maf_file: rename_analysis_svs_maf/output_file # .svs.maf + output_directory_name: + valueFrom: ${ return "analysis"; } + files: + valueFrom: ${ return [ + inputs.gene_cna_file, + inputs.muts_maf_file, + inputs.muts_share_maf_file, + inputs.hisens_segs, + inputs.svs_maf_file + ]} + out: [ directory ] + +outputs: + analysis_dir: + type: Directory + outputSource: make_analysis_dir/directory diff --git a/cwl/portal-workflow_wo_facets.cwl b/cwl/portal-workflow_wo_facets.cwl new file mode 100644 index 0000000..c076767 --- /dev/null +++ b/cwl/portal-workflow_wo_facets.cwl @@ -0,0 +1,534 @@ +#!/usr/bin/env cwl-runner + +cwlVersion: v1.0 +class: Workflow + +requirements: + ScatterFeatureRequirement: {} + StepInputExpressionRequirement: {} + InlineJavascriptRequirement: {} + SubworkflowFeatureRequirement: {} + +inputs: + project_id: + type: string + doc: "unique identifier for the project (PROJ_ID)" + project_pi: + type: string + doc: "principle investigator for the project (PROJ_PI)" + request_pi: + type: string + doc: "principle investigator who requested the project (REQUEST_PI)" + project_short_name: + type: string + doc: "a short name for the project in cBioPortal (PROJ_SHORT_NAME)" + project_name: + type: string + doc: "a formal name for the project (PROJ_NAME)" + project_description: + type: string + doc: "a description of the project (PROJ_DESC)" + cancer_type: + type: string + doc: "the type of cancer used in the project (CANCER_TYPE)" + cancer_study_identifier: + type: string + doc: "a study identifier for the project to use in cBioPortal (CANCER_STUDY_IDENTIFIER)" + argos_version_string: + type: string + doc: "the version label of Roslin / Argos used to run the project analysis (ARGOS_VERSION_STRING)" + helix_filter_version: + type: string + doc: "the version label of this helix filter repo (HELIX_FILTER_VERSION; git describe --all --long)" + is_impact: + default: true + type: boolean + doc: "whether or not the project is an IMPACT project; should be the value 'True' if so, otherwise any other value means 'False' (IS_IMPACT)" + # TODO: this shouild actually be type: string[] + extra_pi_groups: + type: [ "null", string] + default: null + doc: "a list of other groups to be associated with the project in cBioPortal (EXTRA_PI_GROUPS)" + cbio_segment_data_filename: + type: string + doc: "(CBIO_SEGMENT_DATA_FILENAME; _data_cna_hg19.seg)" + cbio_meta_cna_segments_filename: + type: string + doc: "(cbio_meta_cna_segments_filename; _meta_cna_hg19_seg.txt)" + cbio_cases_sequenced_filename: + type: string + doc: "(CBIO_CASES_SEQUENCED_FILE)" + default: cases_sequenced.txt + cbio_cases_cna_filename: + type: string + default: cases_cna.txt + doc: "(CBIO_CASES_CNA_FILE)" + cbio_cases_cnaseq_filename: + type: string + default: cases_cnaseq.txt + doc: "(CBIO_CASES_CNASEQ_FILE)" + cbio_cases_all_filename: + type: string + default: cases_all.txt + doc: "(CBIO_CASES_ALL_FILE)" + cbio_meta_mutations_filename: + type: string + default: meta_mutations_extended.txt + doc: "(CBIO_META_MUTATIONS_FILE)" + cbio_meta_fusions_filename: + type: string + default: meta_fusions.txt + doc: "(CBIO_META_FUSIONS_FILE)" + cbio_meta_cna_filename: + type: string + default: meta_CNA.txt + doc: "(CBIO_META_CNA_FILE)" + cbio_meta_study_filename: + type: string + default: meta_study.txt + doc: "(CBIO_META_STUDY_FILE)" + cbio_clinical_patient_meta_filename: + type: string + default: meta_clinical_patient.txt + doc: "(CBIO_CLINCAL_PATIENT_META_FILE)" + cbio_clinical_sample_meta_filename: + type: string + default: meta_clinical_sample.txt + doc: "(CBIO_CLINICAL_SAMPLE_META_FILE)" + cbio_clinical_sample_data_filename: + type: string + default: data_clinical_sample.txt + doc: "(CBIO_CLINICAL_SAMPLE_DATA_FILENAME)" + cbio_clinical_patient_data_filename: + type: string + default: data_clinical_patient.txt + doc: "(CBIO_CLINCIAL_PATIENT_DATA_FILENAME)" + cbio_fusion_data_filename: + type: string + default: data_fusions.txt + doc: "(CBIO_FUSION_DATA_FILENAME)" + cbio_mutation_data_filename: + type: string + default: data_mutations_extended.txt + doc: "(CBIO_MUTATION_DATA_FILENAME)" + cbio_cna_data_filename: + type: string + default: data_CNA.txt + doc: "(CBIO_CNA_DATA_FILENAME)" + cbio_cna_ascna_data_filename: + type: string + default: data_CNA.ascna.txt + doc: "(CBIO_CNA_ASCNA_DATA_FILE)" + cbio_cna_scna_data_filename: + type: string + default: data_CNA.scna.txt + doc: "(CBIO_CNA_SCNA_DATA_FILE)" + mutation_maf_files: + type: File[] + doc: "analysis_mutations_filename (ANALYSIS_MUTATIONS_FILENAME) cbio_mutation_data_filename (CBIO_MUTATION_DATA_FILENAME): (MAF_DIR)/*.muts.maf" + facets_hisens_seg_files: + type: File[] + doc: "cbio_segment_data_filename (CBIO_SEGMENT_DATA_FILENAME; _data_cna_hg19.seg) analysis_segment_cna_filename (ANALYSIS_SEGMENT_CNA_FILE; .seg.cna.txt): (FACETS_DIR)/*_hisens.seg" + facets_hisens_cncf_files: + type: File[] + doc: "cbio_cna_data_filename (CBIO_CNA_DATA_FILENAME; data_CNA.txt) analysis_gene_cna_filename (ANALYSIS_GENE_CNA_FILENAME; .gene.cna.txt): (FACETS_DIR)/*_hisens.cncf.txt" + mutation_svs_txt_files: + type: File[] + doc: "cbio_fusion_data_filename (CBIO_FUSION_DATA_FILENAME; data_fusions.txt): (MAF_DIR)/*.svs.pass.vep.portal.txt" + facets_suite_txt_files: + type: + - "null" + - File[] + targets_list: + type: File + known_fusions_file: + type: File + data_clinical_file: + type: File + sample_summary_file: + type: + - "null" + - File + extra_cna_files: + doc: "Extra CNA data files to be merged in with the portal CNA data" + type: + - "null" + - File[] + +steps: + # meta_clinical_sample.txt (cbio_clinical_sample_meta_filename; meta_clinical_sample_file) + generate_meta_clinical_sample: + run: generate_cBioPortal_file.cwl + in: + subcommand: + valueFrom: ${ return "meta_sample" } + cancer_study_id: cancer_study_identifier + sample_data_filename: cbio_clinical_sample_data_filename # data_clinical_sample.txt + output_filename: cbio_clinical_sample_meta_filename + out: + [output_file] + + # data_clinical_patient.txt (cbio_clinical_patient_data_filename; data_clinical_patient_file) + generate_data_clinical_patient: + run: generate_cBioPortal_file.cwl + in: + subcommand: + valueFrom: ${ return "patient" } + data_clinical_file: data_clinical_file + output_filename: cbio_clinical_patient_data_filename + out: + [output_file] + + # data_clinical_sample.txt (cbio_clinical_sample_data_filename) + generate_data_clinical_sample: + run: generate_cBioPortal_file.cwl + in: + subcommand: + valueFrom: ${ return "sample" } + data_clinical_file: data_clinical_file + sample_summary_file: sample_summary_file + output_filename: cbio_clinical_sample_data_filename + project_pi: project_pi + request_pi: request_pi + facets_txt_files: facets_suite_txt_files + out: + [output_file] + + # meta_study.txt (cbio_meta_study_filename; cbio_meta_study_file) + generate_cbio_meta_study: + run: generate_cBioPortal_file.cwl + in: + subcommand: + valueFrom: ${ return "study" } + output_filename: cbio_meta_study_filename + cancer_study_id: cancer_study_identifier + name: project_name + short_name: project_short_name + type_of_cancer: cancer_type + description: project_description + extra_groups: extra_pi_groups + out: + [output_file] + + # meta_clinical_patient.txt (cbio_clinical_patient_meta_filename) + generate_cbio_clinical_patient_meta: + run: generate_cBioPortal_file.cwl + in: + subcommand: + valueFrom: ${ return "meta_patient" } + output_filename: cbio_clinical_patient_meta_filename + cancer_study_id: cancer_study_identifier + patient_data_filename: cbio_clinical_patient_data_filename # data_clinical_patient.txt + out: + [output_file] + + # meta_CNA.txt (cbio_meta_cna_filename) + generate_cbio_meta_cna: + run: generate_cBioPortal_file.cwl + in: + subcommand: + valueFrom: ${ return "meta_cna" } + output_filename: cbio_meta_cna_filename + cancer_study_id: cancer_study_identifier + cna_data_filename: cbio_cna_data_filename # data_CNA.txt + out: + [output_file] + + # meta_fusions.txt (cbio_meta_fusions_filename) + generate_cbio_meta_fusions: + run: generate_cBioPortal_file.cwl + in: + subcommand: + valueFrom: ${ return "meta_fusion" } + output_filename: cbio_meta_fusions_filename + cancer_study_id: cancer_study_identifier + fusion_data_filename: cbio_fusion_data_filename # data_fusions.txt + out: + [output_file] + + # meta_mutations_extended.txt (cbio_meta_mutations_filename) + generate_meta_mutations_extended: + run: generate_cBioPortal_file.cwl + in: + subcommand: + valueFrom: ${ return "meta_mutations" } + output_filename: cbio_meta_mutations_filename + cancer_study_id: cancer_study_identifier + mutations_data_filename: cbio_mutation_data_filename # data_mutations_extended.txt + out: + [output_file] + + # _meta_cna_hg19_seg.txt (cbio_meta_cna_segments_filename) + generate_meta_cna_segments: + run: generate_cBioPortal_file.cwl + in: + subcommand: + valueFrom: ${ return "meta_segments" } + output_filename: cbio_meta_cna_segments_filename + cancer_study_id: cancer_study_identifier + segmented_data_filename: cbio_segment_data_filename # _data_cna_hg19.seg + out: + [output_file] + + # cases_all.txt (cbio_cases_all_filename) + generate_cbio_cases_all: + run: generate_cBioPortal_file.cwl + in: + subcommand: + valueFrom: ${ return "cases_all" } + output_filename: cbio_cases_all_filename + cancer_study_id: cancer_study_identifier + data_clinical_file: data_clinical_file + out: + [output_file] + + # cases_cnaseq.txt + generate_cases_cnaseq: + run: generate_cBioPortal_file.cwl + in: + subcommand: + valueFrom: ${ return "cases_cnaseq" } + output_filename: cbio_cases_cnaseq_filename + cancer_study_id: cancer_study_identifier + data_clinical_file: data_clinical_file + out: + [output_file] + + # cases_cna.txt + generate_cases_cna: + run: generate_cBioPortal_file.cwl + in: + subcommand: + valueFrom: ${ return "cases_cna" } + output_filename: cbio_cases_cna_filename + cancer_study_id: cancer_study_identifier + data_clinical_file: data_clinical_file + out: + [output_file] + + # cases_sequenced.txt (cbio_cases_sequenced_filename) + generate_cases_sequenced: + run: generate_cBioPortal_file.cwl + in: + subcommand: + valueFrom: ${ return "cases_sequenced" } + output_filename: cbio_cases_sequenced_filename + cancer_study_id: cancer_study_identifier + data_clinical_file: data_clinical_file + out: + [output_file] + + # data_CNA.txt (cbio_cna_data_filename) + # data_CNA.ascna.txt (cbio_cna_ascna_data_filename) + # data_CNA.scna.txt, (cbio_cna_scna_data_filename) + # (FACETS_DIR)/*_hisens.cncf.txt (facets_hisens_cncf_files) + # targets_list + + + + # generate_cna_data: + # run: copy_number.cwl + # in: + # output_cna_filename: cbio_cna_data_filename + # output_cna_ascna_filename: cbio_cna_ascna_data_filename + # output_cna_scna_filename: cbio_cna_scna_data_filename + # targets_list: targets_list + # hisens_cncfs: facets_hisens_cncf_files + # out: + # [ output_cna_file, output_cna_ascna_file, output_cna_scna_file ] + + + + + # replace the 'ILLOGICAL' values in the data_CNA.scna.txt file + # and output it as 'data_CNA.txt' instead + # replace_illogical_values: + # run: replace.cwl + # in: + # input_file: # generate_cna_data/output_cna_scna_file # data_CNA.scna.txt + # valueFrom: {"path": "/work/ci/vurals/helix_manual_reruns/test_wo_facets_workflow/data_CNA.scna.txt", "class": "File"} + # output_filename: cbio_cna_data_filename # data_CNA.txt + # out: + # [ output_file ] + + # need to clean the header columns on some of the data_CNA.scna.txt and data_CNA.txt files + # clean_cna_headers: + # run: generate_cBioPortal_file.cwl + # in: + # subcommand: + # valueFrom: ${ return "clean_cna" } + # input_file: replace_illogical_values/output_file + # output_filename: cbio_cna_data_filename + # out: + # [output_file] + # clean_ascna_headers: + # run: generate_cBioPortal_file.cwl + # in: + # subcommand: + # valueFrom: ${ return "clean_cna" } + # input_file: # generate_cna_data/output_cna_ascna_file + # valueFrom: {"path": "/work/ci/vurals/helix_manual_reruns/test_wo_facets_workflow/data_CNA.scna.txt", "class": "File"} + # + # + # output_filename: cbio_cna_ascna_data_filename # data_CNA.ascna.txt + # out: + # [output_file] + # # if there was extra CNA file, merge it in + # merge_cna: + # run: full-outer-join.cwl + # in: + # table1: clean_cna_headers/output_file + # table2: extra_cna_files + # join_key: + # valueFrom: ${ return "Hugo_Symbol" } + # output_filename: + # valueFrom: ${ return "data_CNA.txt" } # data_CNA_merged.txt + # out: + # [ output_file ] + + + + # data_mutations_extended.txt (cbio_mutation_data_filename) + # filter each maf file + muts_maf_filter: + run: maf_filter.cwl + scatter: maf_file + in: + maf_file: mutation_maf_files + argos_version_string: argos_version_string + is_impact: is_impact + cbio_mutation_data_filename: cbio_mutation_data_filename # data_mutations_extended.txt + out: [ cbio_mutation_data_file ] + # concat all the maf files into a single table + concat_cbio_muts_maf: + run: concat-tables.cwl + in: + input_files: muts_maf_filter/cbio_mutation_data_file + output_filename: cbio_mutation_data_filename # data_mutations_extended.txt + comments: + valueFrom: ${ return true; } + out: + [output_file] + + # _data_cna_hg19.seg (cbio_segment_data_filename) + # need to reduce the number of significant figures in the hisens_segs files + # reduce_sig_figs_hisens_segs: + # run: reduce_sig_figs.cwl + # scatter: input_file + # in: + # input_file: facets_hisens_seg_files + # out: + # [output_file] + # # concatenate all of the hisens_segs files + # concat_hisens_segs: + # run: concat.cwl + # in: + # input_files: reduce_sig_figs_hisens_segs/output_file + # out: + # [output_file] + # # rename the hisens_segs concatenated table to something that cBioPortal recognizes + # rename_cbio_hisens_segs: + # run: cp.cwl + # in: + # input_file: concat_hisens_segs/output_file + # output_filename: cbio_segment_data_filename # _data_cna_hg19.seg + # out: + # [output_file] + + # data_fusions.txt (cbio_fusion_data_filename) + # (mutation_svs_txt_files; (MAF_DIR)/*.svs.pass.vep.portal.txt) + # concatenate all the mutation svs files + generate_cbio_fusions_data: + run: concat.cwl + in: + input_files: mutation_svs_txt_files + out: + [output_file] + filter_cbio_fusions: + run: fusion_filter.cwl + in: + fusions_file: generate_cbio_fusions_data/output_file + output_filename: cbio_fusion_data_filename # data_fusions.txt + known_fusions_file: known_fusions_file + out: + [output_file] + + # create a case_list directory + make_case_list_dir: + run: put_in_dir.cwl + in: + cases_all: generate_cbio_cases_all/output_file + cases_cnaseq: generate_cases_cnaseq/output_file + cases_cna: generate_cases_cna/output_file + cases_sequenced: generate_cases_sequenced/output_file + output_directory_name: + valueFrom: ${ return "case_lists"; } + files: + valueFrom: ${return [ + inputs.cases_all, + inputs.cases_cnaseq, + inputs.cases_cna, + inputs.cases_sequenced + ]} + out: [ directory ] + + compile_report: + run: report.cwl + in: + mutation_file: concat_cbio_muts_maf/output_file + samples_file: generate_data_clinical_sample/output_file + patients_file: generate_data_clinical_patient/output_file + out: [ output_file ] + +outputs: + portal_meta_clinical_sample_file: + type: File + outputSource: generate_meta_clinical_sample/output_file # meta_clinical_sample.txt + portal_data_clinical_patient_file: + type: File + outputSource: generate_data_clinical_patient/output_file # data_clinical_patient.txt + portal_data_clinical_sample_file: + type: File + outputSource: generate_data_clinical_sample/output_file # data_clinical_sample.txt + portal_meta_study_file: + type: File + outputSource: generate_cbio_meta_study/output_file # meta_study.txt + portal_clinical_patient_meta_file: + type: File + outputSource: generate_cbio_clinical_patient_meta/output_file # meta_clinical_patient.txt + # portal_meta_cna_file: + # type: File + # outputSource: generate_cbio_meta_cna/output_file # meta_CNA.txt + portal_meta_fusions_file: + type: File + outputSource: generate_cbio_meta_fusions/output_file # meta_fusions.txt + portal_meta_mutations_extended_file: + type: File + outputSource: generate_meta_mutations_extended/output_file # meta_mutations_extended.txt + # portal_meta_cna_segments_file: + # type: File + # outputSource: generate_meta_cna_segments/output_file # _meta_cna_hg19_seg.txt + # portal_cna_data_file: + # type: File + # outputSource: clean_cna_headers/output_file # data_CNA.txt + # portal_cna_ascna_file: + # type: File + # outputSource: clean_ascna_headers/output_file # data_CNA.ascna.txt + portal_muts_file: + type: File + outputSource: concat_cbio_muts_maf/output_file # data_mutations_extended.txt + # portal_hisens_segs: + # type: File + # outputSource: rename_cbio_hisens_segs/output_file # # _data_cna_hg19.seg + portal_fusions_data_file: + type: File + outputSource: filter_cbio_fusions/output_file # data_fusions.txt + portal_case_list_dir: + type: Directory + outputSource: make_case_list_dir/directory + # merged_cna_file: + # type: File + # outputSource: merge_cna/output_file # data_CNA_merged.txt + portal_report: + type: File + outputSource: compile_report/output_file diff --git a/cwl/workflow.cwl b/cwl/workflow.cwl index 589c67b..d03b7b0 100644 --- a/cwl/workflow.cwl +++ b/cwl/workflow.cwl @@ -22,7 +22,10 @@ argos_version_string helix_filter_version is_impact extra_pi_groups - +pairs +normal_bam_files +tumor_bam_files +assay_coverage The following filenames are required: @@ -32,6 +35,7 @@ analysis_sv_filename analysis_segment_cna_filename cbio_segment_data_filename cbio_meta_cna_segments_filename +microsatellites_file The following filenames have default values and are optional: @@ -220,15 +224,28 @@ inputs: type: string default: data_CNA.scna.txt doc: "(CBIO_CNA_SCNA_DATA_FILE)" - mutation_maf_files: - type: File[] - doc: "analysis_mutations_filename (ANALYSIS_MUTATIONS_FILENAME) cbio_mutation_data_filename (CBIO_MUTATION_DATA_FILENAME): (MAF_DIR)/*.muts.maf" + # mutation_maf_files: + # type: File[] + # doc: "analysis_mutations_filename (ANALYSIS_MUTATIONS_FILENAME) cbio_mutation_data_filename (CBIO_MUTATION_DATA_FILENAME): (MAF_DIR)/*.muts.maf" + facets_hisens_seg_files: - type: File[] + type: + - "null" + - File[] + - string doc: "cbio_segment_data_filename (CBIO_SEGMENT_DATA_FILENAME; _data_cna_hg19.seg) analysis_segment_cna_filename (ANALYSIS_SEGMENT_CNA_FILE; .seg.cna.txt): (FACETS_DIR)/*_hisens.seg" + default: [ {"path": "/work/ci/vurals/helix_manual_reruns/test_wo_facets_workflow/hisens.seg", "class": "File"} ] + # valueFrom: ${ return {"path": "/work/ci/vurals/helix_manual_reruns/test_wo_facets_workflow/hisens.seg", "class": "File"}; } + facets_hisens_cncf_files: - type: File[] + type: + - "null" + - File[] doc: "cbio_cna_data_filename (CBIO_CNA_DATA_FILENAME; data_CNA.txt) analysis_gene_cna_filename (ANALYSIS_GENE_CNA_FILENAME; .gene.cna.txt): (FACETS_DIR)/*_hisens.cncf.txt" + default: [ {"path": "/work/ci/vurals/helix_manual_reruns/test_wo_facets_workflow/data_CNA.txt", "class": "File"} ] + # valueFrom: ${ return [{"path": "/work/ci/vurals/helix_manual_reruns/test_wo_facets_workflow/data_CNA.txt", "class": "File"}]; } + + mutation_svs_txt_files: type: File[] doc: "cbio_fusion_data_filename (CBIO_FUSION_DATA_FILENAME; data_fusions.txt): (MAF_DIR)/*.svs.pass.vep.portal.txt" @@ -255,22 +272,67 @@ inputs: type: - "null" - File + pairs: + type: + type: array + items: + - type: record + fields: + pair_maf: File + snp_pileup: File + pair_id: string + tumor_id: string + normal_id: string IMPACT_gene_list: type: File doc: "TSV file with gene labels and corresponding impact assays" + assay_coverage: + type: string + doc: "genome_coverage value; amount of the genome in bp covered by the assay" + + microsatellites_file: + type: File + doc: "Microsatellites list file to use with MSI Sensor" + + normal_bam_files: + type: + type: array + items: File + doc: "Array of normal bam files. Must match the same order of sample pairs in 'pairs' input field" + secondaryFiles: + - ^.bai + tumor_bam_files: + type: + type: array + items: File + doc: "Array of tumor bam files. Must match the same order of sample pairs in 'pairs' input field" + secondaryFiles: + - ^.bai + + + + steps: run_analysis_workflow: - run: analysis-workflow.cwl + run: analysis-workflow_wo_facets.cwl in: analysis_segment_cna_filename: analysis_segment_cna_filename analysis_sv_filename: analysis_sv_filename analysis_gene_cna_filename: analysis_gene_cna_filename analysis_mutations_filename: analysis_mutations_filename analysis_mutations_share_filename: analysis_mutations_share_filename - mutation_maf_files: mutation_maf_files + pair: pairs + mutation_maf_files: + valueFrom: ${ return [inputs.pair[0].pair_maf] } + + facets_hisens_seg_files: facets_hisens_seg_files + # valueFrom: ${ return [{"path": "/work/ci/vurals/helix_manual_reruns/test_wo_facets_workflow/hisens.seg", "class": "File"}]; } facets_hisens_cncf_files: facets_hisens_cncf_files + # valueFrom: ${ return ["/work/ci/vurals/helix_manual_reruns/test_wo_facets_workflow/data_CNA.txt"]; } + + mutation_svs_maf_files: mutation_svs_maf_files targets_list: targets_list argos_version_string: argos_version_string @@ -281,7 +343,7 @@ steps: [ analysis_dir ] run_portal_workflow: - run: portal-workflow.cwl + run: portal-workflow_wo_facets.cwl in: project_id: project_id project_pi: project_pi @@ -314,9 +376,18 @@ steps: cbio_cna_data_filename: cbio_cna_data_filename cbio_cna_ascna_data_filename: cbio_cna_ascna_data_filename cbio_cna_scna_data_filename: cbio_cna_scna_data_filename - mutation_maf_files: mutation_maf_files + + pair: pairs + mutation_maf_files: + valueFrom: ${ return [inputs.pair[0].pair_maf] } + + facets_hisens_seg_files: facets_hisens_seg_files + # valueFrom: ${ return [ {"path": "/work/ci/vurals/helix_manual_reruns/test_wo_facets_workflow/hisens.seg", "class": "File"} ]; } facets_hisens_cncf_files: facets_hisens_cncf_files + # valueFrom: ${ return ["/work/ci/vurals/helix_manual_reruns/test_wo_facets_workflow/data_CNA.txt"] } + + mutation_svs_txt_files: mutation_svs_txt_files targets_list: targets_list known_fusions_file: known_fusions_file @@ -331,37 +402,86 @@ steps: portal_data_clinical_sample_file, # data_clinical_sample.txt portal_meta_study_file, # meta_study.txt portal_clinical_patient_meta_file, # meta_clinical_patient.txt - portal_meta_cna_file, # meta_CNA.txt + # portal_meta_cna_file, # meta_CNA.txt portal_meta_fusions_file, # meta_fusions.txt portal_meta_mutations_extended_file, # meta_mutations_extended.txt - portal_meta_cna_segments_file, # _meta_cna_hg19_seg.txt - portal_cna_data_file, # data_CNA.txt - portal_cna_ascna_file, # data_CNA.ascna.txt + # portal_meta_cna_segments_file, # _meta_cna_hg19_seg.txt + # portal_cna_data_file, # data_CNA.txt + # portal_cna_ascna_file, # data_CNA.ascna.txt portal_muts_file, # data_mutations_extended.txt - portal_hisens_segs, # _data_cna_hg19.seg + # portal_hisens_segs, # _data_cna_hg19.seg portal_fusions_data_file, # data_fusions.txt - portal_case_list_dir + portal_case_list_dir, + # merged_cna_file, # data_CNA_merged.txt -> data_CNA.txt + portal_report ] + + + # run the TMB workflow + run_tmb_workflow: + run: tmb_workflow.cwl + in: + data_clinical_file: run_portal_workflow/portal_data_clinical_sample_file + assay_coverage: assay_coverage + pairs: pairs + out: + [ output_file ] # updated data_clinical_sample_file with the new TMB data + + # run the MSI workflow + run_msi_workflow: + run: msi_workflow.cwl + in: + data_clinical_file: run_portal_workflow/portal_data_clinical_sample_file # run_tmb_workflow/output_file # data_clinical_sample.txt + microsatellites_file: microsatellites_file + pairs: pairs + normal_bam_files: normal_bam_files + tumor_bam_files: tumor_bam_files + out: + [ output_file ] # updated data_clinical_file with MSI scores + + # combine the TMB, MSI results with the data clinical file + merge_data_clinical: + run: merge-tables.cwl + in: + table1: run_tmb_workflow/output_file + table2: run_msi_workflow/output_file + key1: + valueFrom: ${ return "SAMPLE_ID"; } # sample column header from data clinical file + key2: + valueFrom: ${ return "SAMPLE_ID"; } # sample column header from MSI file + output_filename: + valueFrom: ${ return "data_clinical_sample.txt"; } # TODO: should this be passed in? + cBioPortal: + valueFrom: ${ return true; } + out: + [ output_file ] + + + + + + # create the "portal" directory in the output dir and put cBioPortal files in it make_portal_dir: run: put_in_dir.cwl in: portal_meta_clinical_sample_file: run_portal_workflow/portal_meta_clinical_sample_file # meta_clinical_sample.txt portal_data_clinical_patient_file: run_portal_workflow/portal_data_clinical_patient_file # data_clinical_patient.txt - portal_data_clinical_sample_file: run_portal_workflow/portal_data_clinical_sample_file # data_clinical_sample.txt + portal_data_clinical_sample_file: merge_data_clinical/output_file # data_clinical_sample.txt portal_meta_study_file: run_portal_workflow/portal_meta_study_file # meta_study.txt portal_clinical_patient_meta_file: run_portal_workflow/portal_clinical_patient_meta_file # meta_clinical_patient.txt - portal_meta_cna_file: run_portal_workflow/portal_meta_cna_file # meta_CNA.txt + # portal_meta_cna_file: run_portal_workflow/portal_meta_cna_file # meta_CNA.txt portal_meta_fusions_file: run_portal_workflow/portal_meta_fusions_file # meta_fusions.txt portal_meta_mutations_extended_file: run_portal_workflow/portal_meta_mutations_extended_file # meta_mutations_extended.txt - portal_meta_cna_segments_file: run_portal_workflow/portal_meta_cna_segments_file # _meta_cna_hg19_seg.txt - portal_cna_data_file: run_portal_workflow/portal_cna_data_file # data_CNA.txt - portal_cna_ascna_file: run_portal_workflow/portal_cna_ascna_file # data_CNA.ascna.txt + # portal_meta_cna_segments_file: run_portal_workflow/portal_meta_cna_segments_file # _meta_cna_hg19_seg.txt + # portal_cna_data_file: run_portal_workflow/merged_cna_file # data_CNA.txt + # portal_cna_ascna_file: run_portal_workflow/portal_cna_ascna_file # data_CNA.ascna.txt portal_muts_file: run_portal_workflow/portal_muts_file # data_mutations_extended.txt - portal_hisens_segs: run_portal_workflow/portal_hisens_segs # # _data_cna_hg19.seg + # portal_hisens_segs: run_portal_workflow/portal_hisens_segs # # _data_cna_hg19.seg portal_fusions_data_file: run_portal_workflow/portal_fusions_data_file # data_fusions.txt portal_case_list_dir: run_portal_workflow/portal_case_list_dir + portal_report: run_portal_workflow/portal_report output_directory_name: valueFrom: ${ return "portal"; } files: