diff --git a/src/vcf_qc.py b/src/vcf_qc.py index 4c168cd..ad54ed8 100644 --- a/src/vcf_qc.py +++ b/src/vcf_qc.py @@ -111,6 +111,8 @@ def get_het_hom_counts(vcf) -> dict: 'x_hom': [] } + variants = autosomes = x_variants = 0 + for record in vcf_handle.fetch(): sample_fields = record.samples[sample] @@ -122,6 +124,8 @@ def get_het_hom_counts(vcf) -> dict: if sample_fields['GT'] == (0, 0): continue + variants += 1 + # using the sum of all allele depths instead of the format AD # field to be the informative read depths supporting each allele informative_total_depth = sum(sample_fields['AD']) @@ -132,16 +136,20 @@ def get_het_hom_counts(vcf) -> dict: if len(set(sample_fields['GT'])) == 1: # homozygous variant if is_autosome(record.chrom): + autosomes += 1 counts['hom'].append(non_ref_aaf) if re.match(r'(chr)?x', record.chrom, re.IGNORECASE): + x_variants += 1 counts['x_hom'].append(non_ref_aaf) else: # heterozygous variant if is_autosome(record.chrom): + autosomes += 1 counts['het'].append(non_ref_aaf) if re.match(r'(chr)?x', record.chrom, re.IGNORECASE): + x_variants += 1 counts['x_het'].append(non_ref_aaf) # handy print for the logs for sense checking @@ -152,6 +160,11 @@ def get_het_hom_counts(vcf) -> dict: f"\tAAF: {non_ref_aaf}" ) + print( + f"\nTotal variants: {variants}\nTotal autosome variants: {autosomes}\n" + f"Total X variants: {x_variants}\n" + ) + return {k: sorted(v) for k, v in counts.items()} @@ -199,7 +212,7 @@ def calculate_ratios(counts) -> dict: ) for field, value in ratios.items(): - print(f"{field}\t{value}") + print(f"{field}:\t{value}") return ratios @@ -254,7 +267,7 @@ def upload_output_file(outfile) -> None: outfile : str name of file to upload """ - print(f"Uploading {outfile}") + print(f"\nUploading {outfile}") url_file = dxpy.upload_local_file( filename=outfile,