diff --git a/aidiva/helper_modules/create_result_vcf.py b/aidiva/helper_modules/create_result_vcf.py index 8567613..1b8e299 100644 --- a/aidiva/helper_modules/create_result_vcf.py +++ b/aidiva/helper_modules/create_result_vcf.py @@ -6,11 +6,11 @@ def write_header(out_file, single): out_file.write("##fileformat=VCFv4.1\n") if not single: - out_file.write("##INFO=\n") + out_file.write("##INFO=\n") out_file.write("##INFO=\n") out_file.write("##INFO=\n") else: - out_file.write("##INFO=\n") + out_file.write("##INFO=\n") out_file.write("##INFO=\n") out_file.write("##INFO=\n") @@ -43,35 +43,104 @@ def write_header(out_file, single): def write_result_vcf(input_data, vcf_file, single): input_data = input_data.sort_values(["CHROM", "POS"], ascending=[True, True]) input_data = input_data.reset_index(drop=True) + colnames = input_data.columns with open(vcf_file, "w") as out: write_header(out, single) for row in input_data.itertuples(): - if str(row.AIDIVA_SCORE) == "nan": - aidiva_score = "" + if ("AIDIVA_SCORE" in colnames): + if (str(row.AIDIVA_SCORE) == "nan"): + aidiva_score = "." + else: + aidiva_score = str(row.AIDIVA_SCORE) else: - aidiva_score = str(row.AIDIVA_SCORE) + aidiva_score = "." - if str(row.FINAL_AIDIVA_SCORE) == "nan": - final_aidiva_score = "" + if ("FINAL_AIDIVA_SCORE" in colnames): + if (str(row.FINAL_AIDIVA_SCORE) == "nan"): + final_aidiva_score = "." + else: + final_aidiva_score = str(row.FINAL_AIDIVA_SCORE) else: - final_aidiva_score = str(row.FINAL_AIDIVA_SCORE) + final_aidiva_score = "." - if str(row.HPO_RELATEDNESS) == "nan": - hpo_relatedness = "" + if ("HPO_RELATEDNESS" in colnames): + if (str(row.HPO_RELATEDNESS) == "nan"): + hpo_relatedness = "." + else: + hpo_relatedness = str(row.HPO_RELATEDNESS) else: - hpo_relatedness = str(row.HPO_RELATEDNESS) + hpo_relatedness = "." - if str(row.HPO_RELATEDNESS_INTERACTING) == "nan": - hpo_relatedness_interacting = "" + if ("HPO_RELATEDNESS_INTERACTING" in colnames): + if (str(row.HPO_RELATEDNESS_INTERACTING) == "nan"): + hpo_relatedness_interacting = "." + else: + hpo_relatedness_interacting = str(row.HPO_RELATEDNESS_INTERACTING) else: - hpo_relatedness_interacting = str(row.HPO_RELATEDNESS_INTERACTING) + hpo_relatedness_interacting = "." + + if ("FILTER_PASSED" in colnames): + if (str(row.FILTER_PASSED) == "nan"): + filter_passed = "." + else: + filter_passed = str(row.FILTER_PASSED) + else: + filter_passed = "." + + if ("DOMINANT" in colnames): + if (str(row.DOMINANT) == "nan"): + dominant = "." + else: + dominant = str(row.DOMINANT) + else: + dominant = "." + + if ("DOMINANT_DENOVO" in colnames): + if (str(row.DOMINANT_DENOVO) == "nan"): + dominant_denovo = "." + else: + dominant_denovo = str(row.DOMINANT_DENOVO) + else: + dominant_denovo = "." + + if ("RECESSIVE" in colnames): + if (str(row.RECESSIVE) == "nan"): + recessive = "." + else: + recessive = str(row.RECESSIVE) + else: + recessive = "." + + if ("XLINKED" in colnames): + if (str(row.XLINKED) == "nan"): + xlinked = "." + else: + xlinked = str(row.XLINKED) + else: + xlinked = "." + + if ("COMPOUND" in colnames): + if (str(row.COMPOUND) == "nan"): + compound = "." + else: + compound = str(row.COMPOUND) + else: + compound = "." + + if ("INHERITANCE" in colnames): + if (str(row.INHERITANCE) == "nan") or (str(row.INHERITANCE) == ""): + inheritance_comment = "." + else: + inheritance_comment = str(row.INHERITANCE) + else: + inheritance_comment = "." if not single: - info_entry = "AIDIVA=" + aidiva_score + "," + final_aidiva_score + "," + hpo_relatedness + "," + hpo_relatedness_interacting + "," + str(row.FILTER_PASSED) + ";AIDIVA_INHERITANCE=" + str(row.DOMINANT) + "," + str(row.DOMINANT_DENOVO) + "," + str(row.RECESSIVE) + "," + str(row.XLINKED) + "," + str(row.COMPOUND) + ";AIDIVA_INHERITANCE_COMMENT=" + str(row.INHERITANCE) + info_entry = "AIDIVA=" + aidiva_score + "," + final_aidiva_score + "," + hpo_relatedness + "," + hpo_relatedness_interacting + "," + filter_passed + ";AIDIVA_INHERITANCE=" + dominant + "," + dominant_denovo + "," + recessive + "," + xlinked + "," + compound + ";AIDIVA_INHERITANCE_COMMENT=" + inheritance_comment else: - info_entry = "AIDIVA=" + aidiva_score + "," + final_aidiva_score + "," + hpo_relatedness + "," + hpo_relatedness_interacting + "," + str(row.FILTER_PASSED) + ";AIDIVA_INHERITANCE=" + str(row.RECESSIVE) + "," + str(row.COMPOUND) + ";AIDIVA_INHERITANCE_COMMENT=" + str(row.INHERITANCE) + info_entry = "AIDIVA=" + aidiva_score + "," + final_aidiva_score + "," + hpo_relatedness + "," + hpo_relatedness_interacting + "," + filter_passed + ";AIDIVA_INHERITANCE=" + recessive + "," + compound + ";AIDIVA_INHERITANCE_COMMENT=" + inheritance_comment out.write(str(row.CHROM).strip() + "\t" + str(row.POS) + "\t" + "." + "\t" + str(row.REF) + "\t" + str(row.ALT) + "\t" + "." + "\t" + "." + "\t" + info_entry + "\n") diff --git a/aidiva/run_AIdiva.py b/aidiva/run_AIdiva.py index 47bd70a..38f01ad 100644 --- a/aidiva/run_AIdiva.py +++ b/aidiva/run_AIdiva.py @@ -107,7 +107,9 @@ write_result.write_result_vcf(prioritized_data, str(working_directory + output_filename + ".vcf"), bool(family_type == "SINGLE")) write_result.write_result_vcf(prioritized_data[prioritized_data["FILTER_PASSED"] == 1], str(working_directory + output_filename + "_filtered.vcf"), bool(family_type == "SINGLE")) prioritized_data.to_csv(str(working_directory + output_filename + ".csv"), sep="\t", index=False) - prioritized_data[prioritized_data["FILTER_PASSED"] == 1].to_csv(str(working_directory + output_filename + "_passed_filters.csv"), sep="\t", index=False) + prioritized_data[prioritized_data["FILTER_PASSED"] == 1].to_csv(str(working_directory + output_filename + "_filtered.csv"), sep="\t", index=False) print("Pipeline successfully finsished!") else: + write_result.write_result_vcf(input_data_snp, str(working_directory + output_filename + ".vcf"), bool(family_type == "SINGLE")) + write_result.write_result_vcf(input_data_snp, str(working_directory + output_filename + "_filtered.vcf"), bool(family_type == "SINGLE")) print("ERROR: The given input files were empty!") diff --git a/aidiva/run_annotation_and_AIdiva.py b/aidiva/run_annotation_and_AIdiva.py index 37aaead..9441a14 100644 --- a/aidiva/run_annotation_and_AIdiva.py +++ b/aidiva/run_annotation_and_AIdiva.py @@ -51,7 +51,7 @@ scoring_model_indel = os.path.dirname(os.path.abspath(__file__)) + "/../data/" + configuration["Analysis-Input"]["scoring-model-indel"] # obtain number of threads to use during computation - num_cores = configuration["VEP-Annotation"]["num-threads"] + num_cores = int(configuration["VEP-Annotation"]["num-threads"]) # parse disease and inheritance information if args.hpo_list is not None: @@ -133,11 +133,11 @@ # prioritize and filter variants print("Filter variants and finalize score...") - prioritized_data = prio.prioritize_variants(predicted_data, hpo_resources_folder, family_file, family_type, hpo_file, gene_exclusion_file, num_cores) + prioritized_data = prio.prioritize_variants(predicted_data, hpo_resources_folder, num_cores, family_file, family_type, hpo_file, gene_exclusion_file) ## TODO: create additional output files according to the inheritance information (only filtered data) write_result.write_result_vcf(prioritized_data, str(working_directory + input_filename + "_aidiva_result.vcf"), bool(family_type == "SINGLE")) write_result.write_result_vcf(prioritized_data[prioritized_data["FILTER_PASSED"] == 1], str(working_directory + input_filename + "_aidiva_result_filtered.vcf"), bool(family_type == "SINGLE")) prioritized_data.to_csv(str(working_directory + input_filename + "_aidiva_result.csv"), sep="\t", index=False) - prioritized_data[prioritized_data["FILTER_PASSED"] == 1].to_csv(str(working_directory + input_filename + "_aidiva_result_filt.csv"), sep="\t", index=False) + prioritized_data[prioritized_data["FILTER_PASSED"] == 1].to_csv(str(working_directory + input_filename + "_aidiva_result_filtered.csv"), sep="\t", index=False) print("Pipeline successfully finsished!") diff --git a/aidiva/variant_prioritization/prioritize_variants.py b/aidiva/variant_prioritization/prioritize_variants.py index 6957244..919ac9a 100644 --- a/aidiva/variant_prioritization/prioritize_variants.py +++ b/aidiva/variant_prioritization/prioritize_variants.py @@ -325,7 +325,7 @@ def add_inheritance_mode(variant, variant_columns): if variant["XLINKED"] == 1: inheritance_list.append("XLINKED") - inheritance_mode = ";".join(inheritance_list) + inheritance_mode = "&".join(inheritance_list) return inheritance_mode