From d766f841f7645cb7d162d01f8ae77c7095e5f670 Mon Sep 17 00:00:00 2001 From: Jethro Rainford Date: Fri, 20 Sep 2024 14:05:19 +0100 Subject: [PATCH] separate alt het variants out --- src/vcf_qc.py | 17 +++++++++++++---- tests/test_vcf_qc.py | 12 +++++++++++- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/src/vcf_qc.py b/src/vcf_qc.py index cbbb825..d63b52b 100644 --- a/src/vcf_qc.py +++ b/src/vcf_qc.py @@ -102,10 +102,12 @@ def get_het_hom_counts(vcf) -> dict: sample = samples[0] - counts = {"het": [], "hom": [], "x_het": [], "x_hom": []} + counts = {"het": [], "hom": [], "alt_het": [], "x_het": [], "x_hom": []} variant_count = autosome_count = x_variant_count = 0 + gts = [] + for record in vcf_handle.fetch(): sample_fields = record.samples[sample] @@ -113,6 +115,8 @@ def get_het_hom_counts(vcf) -> dict: f"{record.chrom}-{record.pos}-{record.ref}-{','.join(record.alts)}" ) + gts.append(sample_fields["GT"]) + missing_fields = [ x for x in ["AD", "DP", "GT"] if not x in sample_fields ] @@ -136,7 +140,7 @@ def get_het_hom_counts(vcf) -> dict: non_ref_aaf = non_ref_depth / informative_total_depth - if len(set(sample_fields["GT"])) == 1: + if sample_fields["GT"] == (1, 1): # homozygous variant if is_autosome(record.chrom): autosome_count += 1 @@ -145,8 +149,8 @@ def get_het_hom_counts(vcf) -> dict: if re.match(r"(chr)?x", record.chrom, re.IGNORECASE): x_variant_count += 1 counts["x_hom"].append(non_ref_aaf) - else: - # heterozygous variant + elif sample_fields["GT"] == (0, 1): + # standard ref alt heterozygous variant if is_autosome(record.chrom): autosome_count += 1 counts["het"].append(non_ref_aaf) @@ -154,6 +158,10 @@ def get_het_hom_counts(vcf) -> dict: if re.match(r"(chr)?x", record.chrom, re.IGNORECASE): x_variant_count += 1 counts["x_het"].append(non_ref_aaf) + else: + # heterozygous alternate variant + if is_autosome(record.chrom): + counts["alt_het"].append(non_ref_aaf) # handy print for the logs for sense checking print( @@ -209,6 +217,7 @@ def calculate_ratios(counts) -> dict: print( f"\nTotal het counts: {len(counts['het'])}\n" f"Total hom counts: {len(counts['hom'])}\n" + f"Total alt het counts: {len(counts['alt_het'])}\n" f"Total x het counts: {len(counts['x_het'])}\n" f"Total x hom counts: {len(counts['x_hom'])}\n" ) diff --git a/tests/test_vcf_qc.py b/tests/test_vcf_qc.py index b318be1..232cf47 100644 --- a/tests/test_vcf_qc.py +++ b/tests/test_vcf_qc.py @@ -143,6 +143,7 @@ def test_aaf_calculated_correctly_for_hets_and_homs(self): expected_values = { "het": [0.45454545454545453, 0.4952076677316294], "hom": [1.0, 1.0], + "alt_het": [], "x_het": [], "x_hom": [], } @@ -176,6 +177,7 @@ def test_x_chrom_variants_additionally_returned_separate(self): expected_values = { "het": [0.45454545454545453, 0.4952076677316294], "hom": [1.0, 1.0], + "alt_het": [], "x_het": [0.6, 0.8], "x_hom": [1.0, 1.0], } @@ -195,7 +197,13 @@ def test_reference_calls_are_skipped(self): os.path.join(TEST_DATA_DIR, "test_w_ref.vcf") ) - expected_values = {"het": [], "hom": [], "x_het": [], "x_hom": []} + expected_values = { + "het": [], + "hom": [], + "alt_het": [], + "x_het": [], + "x_hom": [], + } self.assertEqual(calculated_values, expected_values) @@ -221,6 +229,7 @@ def test_het_and_hom_ratios_calculated_correctly(self): counts = { "het": [0.5, 0.45, 0.55], "hom": [1.0, 1.0, 1.0, 1.0, 0.95], + "alt_het": [], "x_het": [], "x_hom": [], } @@ -240,6 +249,7 @@ def test_x_het_hom_calculated_correctly(self): counts = { "het": [0.5, 0.45, 0.55], "hom": [1.0, 1.0, 1.0, 1.0, 0.95], + "alt_het": [], "x_het": [0.45, 0.55], "x_hom": [1.0, 0.98], }