From b661462ff6a26b72a5dc04ec2fb1b056d8cd80cc Mon Sep 17 00:00:00 2001 From: rklocke Date: Wed, 13 Dec 2023 15:34:50 +0000 Subject: [PATCH 1/5] Wrote numbers of variants from within excel.py write_variants() to JSON file and read this in in code.sh so that these numbers can be uploaded as file details --- requirements.txt | 2 +- .../dnanexus/generate_workbook/utils/excel.py | 35 +++++++++++++++---- src/code.sh | 5 +-- 3 files changed, 32 insertions(+), 10 deletions(-) diff --git a/requirements.txt b/requirements.txt index 82b324a2..9dd46871 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ colour==0.1.5 -openpyxl==3.0.9 +openpyxl==3.1.2 pandas==1.3.5 et-xmlfile==1.1.0 filetype==1.1.0 diff --git a/resources/home/dnanexus/generate_workbook/utils/excel.py b/resources/home/dnanexus/generate_workbook/utils/excel.py index 30d296d1..2e1ac27f 100644 --- a/resources/home/dnanexus/generate_workbook/utils/excel.py +++ b/resources/home/dnanexus/generate_workbook/utils/excel.py @@ -1,5 +1,7 @@ from collections import defaultdict +import json import operator +import os from pathlib import Path import re from string import ascii_uppercase as uppercase @@ -181,7 +183,7 @@ def helios_summary(self) -> None: self.summary.cell(9, 1).value = "Variant totals" - to_bold.extend(["A1", "A2", "A4", "A5", "A6", "A9"]) + to_bold.extend(["A1", "A2", "A4", "A5", "A6", "A9"]) # get sample name from vcf, should only be one but handle everything # list-wise just in case @@ -228,7 +230,7 @@ def helios_summary(self) -> None: # not 4 cols => didn't parse out just sample values in # utils.parse_metrics => skip continue - + # specific metrics lines we want to parse out idxs = [] idxs.append(df[0].eq('Metric (UOM)').idxmax()) @@ -249,7 +251,7 @@ def helios_summary(self) -> None: self.summary.cell(row_count, 2).value = lsl self.summary.cell(row_count, 3).value = usl self.summary.cell(row_count, 4).value = sample - + # perform colouring like in self.colour_metrics(), lazily # catch anything in case of weird values to not break try: @@ -278,10 +280,10 @@ def helios_summary(self) -> None: "WARNING: error in colouring metrics values in " f"summary sheet: {err}.\nContinuing without colouring" ) - + to_bold.append(f"A{row_count}") row_count += 1 - + # do the colouring for colour, idxs in colouring.items(): for idx in idxs: @@ -301,7 +303,7 @@ def helios_summary(self) -> None: start_color='b30000' ) row_count += 2 - + # Parsing of TMB/MSI/Gene Amplifications into summary for _, df in self.additional_files.items(): if df.empty: @@ -327,7 +329,7 @@ def helios_summary(self) -> None: for ref in list(set(self.refs)): self.summary.cell(row_count, 2).value = ref row_count += 1 - + row_count += 2 if self.args.human_filter: @@ -374,6 +376,7 @@ def dias_summary(self) -> None: - sample ID, panel(s), run IDs etc. - formatted tables for them to fill in reporting """ + details_dict = defaultdict() # write titles for summary values self.summary.cell(1, 1).value = "Sample ID:" self.summary.cell(1, 5).value = "Clinical Indication(s):" @@ -394,6 +397,9 @@ def dias_summary(self) -> None: self.summary.cell(1, 6).value = self.args.clinical_indication self.summary.cell(2, 6).value = self.args.panel + if self.args.clinical_indication: + details_dict['clinical_indication'] = self.args.clinical_indication + # write total rows in each sheet count = 34 @@ -406,6 +412,11 @@ def dias_summary(self) -> None: to_bold.append(f"A{count}") count += 1 + if details_dict: + # Write out clinical indication to JSON file + with open('details.json', 'w', encoding='utf8') as details_json: + json.dump(details_dict, details_json) + count += 5 # write genome reference(s) parsed from vcf header @@ -724,6 +735,11 @@ def write_variants(self) -> None: "this may take a few minutes..." ) + if os.path.isfile('details.json'): + with open('details.json', 'r') as details_json: + details_dict = json.load(details_json) + else: + details_dict = defaultdict() with self.writer: # add variants for sheet, vcf in zip(self.args.sheets, self.vcfs): @@ -732,6 +748,7 @@ def write_variants(self) -> None: f"\nWriting {len(vcf)} rows to {sheet} sheet " f"({sheet_no}/{len(self.args.sheets)})" ) + details_dict[sheet] = len(vcf) # timing how long it takes to write because its slow start = timer() @@ -764,6 +781,10 @@ def write_variants(self) -> None: self.set_types(curr_worksheet) self.workbook.save(self.args.output) + if details_dict: + with open('details.json', 'w', encoding='utf8') as details_json: + json.dump(details_dict, details_json) + def write_additional_files(self) -> None: """ diff --git a/src/code.sh b/src/code.sh index d61d5a70..d174d3b0 100755 --- a/src/code.sh +++ b/src/code.sh @@ -4,7 +4,7 @@ set -exo pipefail _dias_report_setup () { # function to handle parsing values and reading - # manifest / g2t etc. for Dias sampels + # manifest / g2t etc. for Dias samples mark-section "Getting output name for Dias" project_id=$DX_PROJECT_CONTEXT_ID @@ -122,7 +122,8 @@ main() { fi mark-section "Uploading output" - output_xlsx=$(dx upload /home/dnanexus/out/xlsx_reports/* --brief) + JSON_DETAILS=$(cat details.json) + output_xlsx=$(dx upload /home/dnanexus/out/xlsx_reports/* --brief --details "$JSON_DETAILS") dx-jobutil-add-output xlsx_report "$output_xlsx" --class=file if [ "$keep_tmp" == true ]; then From 3e0df840d59bc623dd8def6db17dd6a84f48bcfe Mon Sep 17 00:00:00 2001 From: rklocke Date: Fri, 15 Dec 2023 13:58:27 +0000 Subject: [PATCH 2/5] Update writing of details.json --- .../dnanexus/generate_workbook/utils/excel.py | 21 ++++++++++++------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/resources/home/dnanexus/generate_workbook/utils/excel.py b/resources/home/dnanexus/generate_workbook/utils/excel.py index 2e1ac27f..09bd2542 100644 --- a/resources/home/dnanexus/generate_workbook/utils/excel.py +++ b/resources/home/dnanexus/generate_workbook/utils/excel.py @@ -397,8 +397,12 @@ def dias_summary(self) -> None: self.summary.cell(1, 6).value = self.args.clinical_indication self.summary.cell(2, 6).value = self.args.panel + # If clinical indication given as arg, add this to our dict + # Write out the dias clinical indication info to JSON file if self.args.clinical_indication: details_dict['clinical_indication'] = self.args.clinical_indication + with open('details.json', 'w', encoding='utf8') as details_json: + json.dump(details_dict, details_json) # write total rows in each sheet count = 34 @@ -412,10 +416,6 @@ def dias_summary(self) -> None: to_bold.append(f"A{count}") count += 1 - if details_dict: - # Write out clinical indication to JSON file - with open('details.json', 'w', encoding='utf8') as details_json: - json.dump(details_dict, details_json) count += 5 @@ -735,11 +735,16 @@ def write_variants(self) -> None: "this may take a few minutes..." ) + # If details.json already exists (dias summary page written and + # clinical indication is given as arg) then + # open it and read it in so we can add var counts to the dict + # otherwise just make a new empty dict if os.path.isfile('details.json'): - with open('details.json', 'r') as details_json: + with open('details.json', 'r', encoding='utf8') as details_json: details_dict = json.load(details_json) else: details_dict = defaultdict() + with self.writer: # add variants for sheet, vcf in zip(self.args.sheets, self.vcfs): @@ -781,9 +786,9 @@ def write_variants(self) -> None: self.set_types(curr_worksheet) self.workbook.save(self.args.output) - if details_dict: - with open('details.json', 'w', encoding='utf8') as details_json: - json.dump(details_dict, details_json) + # Write out dict to file + with open('details.json', 'w', encoding='utf8') as details_json: + json.dump(details_dict, details_json) def write_additional_files(self) -> None: From beb44669b9aef25e97d47fc871fd7d0eabdaf389 Mon Sep 17 00:00:00 2001 From: rklocke Date: Fri, 15 Dec 2023 13:58:51 +0000 Subject: [PATCH 3/5] Updated app version in dxapp.json --- dxapp.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dxapp.json b/dxapp.json index e24d0db0..0b7dc959 100644 --- a/dxapp.json +++ b/dxapp.json @@ -3,7 +3,7 @@ "title": "eggd_generate_variant_workbook", "summary": "Create Excel workbook from VEP annotated vcf", "dxapi": "1.0.0", - "version": "2.5.0", + "version": "2.6.0", "whatsNew": "* v2.0.0 Rewrite of previous app to generate xlsx file from a VEP annotated VCF(s); * v2.0.1 Bug fix to correctly treat CHROM as string values; * v2.0.2 Bug fix for ACMG report template structure; * v2.0.3 Bug fixes for issues with hyperlinks, changed app name to eggd_generate_variant_workbook; * v2.1.0 Handle VCFs from GATK gCNV and Illumina TSO500, readability tweaks to variant sheets; * v2.1.1 Bug fix for typing of numeric values in hyperlinks; * v2.2.0 Added ability to pass in non VCF files (tsvs/csvs and images) to additional sheets, optional adding of links to DECIPHER with --decipher; * v2.3.0 Added conditional colouring of cells in variant sheets, new 'basic' summary sheet; * v2.4.0 Added handling for duplicate annotation in VEP fields (i.e. cosmic, CGC, etc..); v2.5.0 Better parsing of CombinedVariantOutput files as additional files", "authorizedUsers": [ "org-emee_1" From 59390ea722b21e01780595407677fae68f7f841a Mon Sep 17 00:00:00 2001 From: rklocke Date: Fri, 15 Dec 2023 13:59:17 +0000 Subject: [PATCH 4/5] Updated whatsNew in dxapp.json --- dxapp.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dxapp.json b/dxapp.json index 0b7dc959..37cb389d 100644 --- a/dxapp.json +++ b/dxapp.json @@ -4,7 +4,7 @@ "summary": "Create Excel workbook from VEP annotated vcf", "dxapi": "1.0.0", "version": "2.6.0", - "whatsNew": "* v2.0.0 Rewrite of previous app to generate xlsx file from a VEP annotated VCF(s); * v2.0.1 Bug fix to correctly treat CHROM as string values; * v2.0.2 Bug fix for ACMG report template structure; * v2.0.3 Bug fixes for issues with hyperlinks, changed app name to eggd_generate_variant_workbook; * v2.1.0 Handle VCFs from GATK gCNV and Illumina TSO500, readability tweaks to variant sheets; * v2.1.1 Bug fix for typing of numeric values in hyperlinks; * v2.2.0 Added ability to pass in non VCF files (tsvs/csvs and images) to additional sheets, optional adding of links to DECIPHER with --decipher; * v2.3.0 Added conditional colouring of cells in variant sheets, new 'basic' summary sheet; * v2.4.0 Added handling for duplicate annotation in VEP fields (i.e. cosmic, CGC, etc..); v2.5.0 Better parsing of CombinedVariantOutput files as additional files", + "whatsNew": "* v2.0.0 Rewrite of previous app to generate xlsx file from a VEP annotated VCF(s); * v2.0.1 Bug fix to correctly treat CHROM as string values; * v2.0.2 Bug fix for ACMG report template structure; * v2.0.3 Bug fixes for issues with hyperlinks, changed app name to eggd_generate_variant_workbook; * v2.1.0 Handle VCFs from GATK gCNV and Illumina TSO500, readability tweaks to variant sheets; * v2.1.1 Bug fix for typing of numeric values in hyperlinks; * v2.2.0 Added ability to pass in non VCF files (tsvs/csvs and images) to additional sheets, optional adding of links to DECIPHER with --decipher; * v2.3.0 Added conditional colouring of cells in variant sheets, new 'basic' summary sheet; * v2.4.0 Added handling for duplicate annotation in VEP fields (i.e. cosmic, CGC, etc..); * v2.5.0 Better parsing of CombinedVariantOutput files as additional files; * v2.6.0 Add variant counts as DNAnexus file details to the .xlsx workbook", "authorizedUsers": [ "org-emee_1" ], From 9cdfcf68bd07c871c4812479d8742482e552ddba Mon Sep 17 00:00:00 2001 From: rklocke Date: Fri, 15 Dec 2023 14:30:44 +0000 Subject: [PATCH 5/5] Updated README to include dx file details --- Readme.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Readme.md b/Readme.md index 10bb5ea6..c9ba4bcc 100644 --- a/Readme.md +++ b/Readme.md @@ -198,6 +198,16 @@ This is the source code for an app that runs on the DNAnexus Platform. For more information about how to run or modify it, see https://documentation.dnanexus.com/. + +## File details +The app will also add `details` metadata, in terms of variant counts, to the output xlsx report DNAnexus file. Example file details if `-isummary=Dias`, `-iclinical_indication=R208.1_Inherited breast cancer and ovarian cancer_P` and an `-ifilter` is provided, with `-ikeep_filtered=True`: +``` + "clinical_indication": "R208.1_Inherited breast cancer and ovarian cancer_P", + "included": 10, + "excluded": 255 +``` +Note: if `-isummary` not Dias, if `-ikeep_filtered=False`, and if `-iclinical_indication` not provided then the only details added to the file would be `"included": 10`. In this case, if no filtering is performed either then only `"variants": 265` would be added as details. + #### This app was made by EMEE GLH [bcftools]: https://samtools.github.io/bcftools/bcftools.html#filter