-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
#480 - annotation and transcript version pages
- Loading branch information
Showing
9 changed files
with
289 additions
and
111 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
42 changes: 42 additions & 0 deletions
42
genes/management/commands/fix_retrieve_transcript_version_sequence_info.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
import logging | ||
|
||
from django.core.management import BaseCommand | ||
|
||
from genes.models import TranscriptVersion, GeneAnnotationImport, TranscriptVersionSequenceInfo | ||
from snpdb.models import GenomeBuild | ||
|
||
|
||
class Command(BaseCommand): | ||
def handle(self, *args, **options): | ||
|
||
have_tvi = set() | ||
for tvi in TranscriptVersionSequenceInfo.objects.all(): | ||
accession = f"{tvi.transcript_id}.{tvi.version}" | ||
have_tvi.add(accession) | ||
|
||
tv_lengths_37 = {} | ||
tv_lengths_38 = {} | ||
for tv in TranscriptVersion.objects.filter(transcript__annotation_consortium='R', alignment_gap=False, | ||
genome_build=GenomeBuild.grch37()): | ||
tv_lengths_37[tv.accession] = tv.length | ||
|
||
for tv in TranscriptVersion.objects.filter(transcript__annotation_consortium='R', alignment_gap=False, | ||
genome_build=GenomeBuild.grch38()): | ||
tv_lengths_38[tv.accession] = tv.length | ||
|
||
different_lengths = set() | ||
for accession in set(tv_lengths_37) & set(tv_lengths_38): | ||
length_37 = tv_lengths_37[accession] | ||
length_38 = tv_lengths_38[accession] | ||
if length_37 != length_38: | ||
different_lengths.add(accession) | ||
|
||
only_one_build = set(tv_lengths_37) ^ set(tv_lengths_38) | ||
# Imports w/o GFFs (only from genePred) don't have alignment info (gap_count) so we can't detect alignment gaps | ||
no_gff_imports = GeneAnnotationImport.objects.filter(filename__contains='genePred') | ||
no_gff_tvs = {tv.accession for tv in TranscriptVersion.objects.filter(import_source__in=no_gff_imports)} | ||
|
||
need_to_retrieve = (different_lengths | only_one_build | no_gff_tvs) - have_tvi | ||
num_to_retrieve = len(need_to_retrieve) | ||
logging.info("Retrieving %d Transcript Version Sequence Info records (takes ~1min per 1000)", num_to_retrieve) | ||
TranscriptVersionSequenceInfo.get_refseq_transcript_versions(need_to_retrieve) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
26 changes: 26 additions & 0 deletions
26
genes/migrations/0040_one_off_transcript_version_sequence_info_data.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
# Generated by Django 3.2.6 on 2021-09-14 12:08 | ||
|
||
from django.db import migrations | ||
|
||
from manual.operations.manual_operations import ManualOperation | ||
|
||
|
||
def _test_has_transcript_versions(apps): | ||
""" Don't need to run this for new deployments """ | ||
|
||
TranscriptVersion = apps.get_model("genes", "TranscriptVersion") | ||
return TranscriptVersion.objects.exists() | ||
|
||
class Migration(migrations.Migration): | ||
|
||
dependencies = [ | ||
('genes', '0039_transcriptversionsequenceinfo_transcriptversionsequenceinfofastafileimport'), | ||
] | ||
|
||
operations = [ | ||
ManualOperation.operation_other(args=[ | ||
"*** BEFORE fix_retrieve_transcript_version_sequence_info - import_refseq_transcript_fasta - see annotation page"], | ||
test=_test_has_transcript_versions), | ||
ManualOperation(task_id=ManualOperation.task_id_manage(["fix_retrieve_transcript_version_sequence_info"]), | ||
test=_test_has_transcript_versions) | ||
] |
Oops, something went wrong.