Skip to content

Commit

Permalink
separate alt het variants out
Browse files Browse the repository at this point in the history
  • Loading branch information
jethror1 committed Sep 20, 2024
1 parent 5db01f1 commit d766f84
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 5 deletions.
17 changes: 13 additions & 4 deletions src/vcf_qc.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,17 +102,21 @@ def get_het_hom_counts(vcf) -> dict:

sample = samples[0]

counts = {"het": [], "hom": [], "x_het": [], "x_hom": []}
counts = {"het": [], "hom": [], "alt_het": [], "x_het": [], "x_hom": []}

variant_count = autosome_count = x_variant_count = 0

gts = []

for record in vcf_handle.fetch():
sample_fields = record.samples[sample]

printable_var = (
f"{record.chrom}-{record.pos}-{record.ref}-{','.join(record.alts)}"
)

gts.append(sample_fields["GT"])

missing_fields = [
x for x in ["AD", "DP", "GT"] if not x in sample_fields
]
Expand All @@ -136,7 +140,7 @@ def get_het_hom_counts(vcf) -> dict:

non_ref_aaf = non_ref_depth / informative_total_depth

if len(set(sample_fields["GT"])) == 1:
if sample_fields["GT"] == (1, 1):
# homozygous variant
if is_autosome(record.chrom):
autosome_count += 1
Expand All @@ -145,15 +149,19 @@ def get_het_hom_counts(vcf) -> dict:
if re.match(r"(chr)?x", record.chrom, re.IGNORECASE):
x_variant_count += 1
counts["x_hom"].append(non_ref_aaf)
else:
# heterozygous variant
elif sample_fields["GT"] == (0, 1):
# standard ref alt heterozygous variant
if is_autosome(record.chrom):
autosome_count += 1
counts["het"].append(non_ref_aaf)

if re.match(r"(chr)?x", record.chrom, re.IGNORECASE):
x_variant_count += 1
counts["x_het"].append(non_ref_aaf)
else:
# heterozygous alternate variant
if is_autosome(record.chrom):
counts["alt_het"].append(non_ref_aaf)

# handy print for the logs for sense checking
print(
Expand Down Expand Up @@ -209,6 +217,7 @@ def calculate_ratios(counts) -> dict:
print(
f"\nTotal het counts: {len(counts['het'])}\n"
f"Total hom counts: {len(counts['hom'])}\n"
f"Total alt het counts: {len(counts['alt_het'])}\n"
f"Total x het counts: {len(counts['x_het'])}\n"
f"Total x hom counts: {len(counts['x_hom'])}\n"
)
Expand Down
12 changes: 11 additions & 1 deletion tests/test_vcf_qc.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ def test_aaf_calculated_correctly_for_hets_and_homs(self):
expected_values = {
"het": [0.45454545454545453, 0.4952076677316294],
"hom": [1.0, 1.0],
"alt_het": [],
"x_het": [],
"x_hom": [],
}
Expand Down Expand Up @@ -176,6 +177,7 @@ def test_x_chrom_variants_additionally_returned_separate(self):
expected_values = {
"het": [0.45454545454545453, 0.4952076677316294],
"hom": [1.0, 1.0],
"alt_het": [],
"x_het": [0.6, 0.8],
"x_hom": [1.0, 1.0],
}
Expand All @@ -195,7 +197,13 @@ def test_reference_calls_are_skipped(self):
os.path.join(TEST_DATA_DIR, "test_w_ref.vcf")
)

expected_values = {"het": [], "hom": [], "x_het": [], "x_hom": []}
expected_values = {
"het": [],
"hom": [],
"alt_het": [],
"x_het": [],
"x_hom": [],
}

self.assertEqual(calculated_values, expected_values)

Expand All @@ -221,6 +229,7 @@ def test_het_and_hom_ratios_calculated_correctly(self):
counts = {
"het": [0.5, 0.45, 0.55],
"hom": [1.0, 1.0, 1.0, 1.0, 0.95],
"alt_het": [],
"x_het": [],
"x_hom": [],
}
Expand All @@ -240,6 +249,7 @@ def test_x_het_hom_calculated_correctly(self):
counts = {
"het": [0.5, 0.45, 0.55],
"hom": [1.0, 1.0, 1.0, 1.0, 0.95],
"alt_het": [],
"x_het": [0.45, 0.55],
"x_hom": [1.0, 0.98],
}
Expand Down

0 comments on commit d766f84

Please sign in to comment.