Skip to content

Commit

Permalink
refactor!: rearrange app architecture (#196)
Browse files Browse the repository at this point in the history
* Remove gene-normalizer dependency
  * Removed `get_mapped_mane_data` (#194), which was the only reason why we needed gene-normalizer
* Rearranges app architecture
  * handlers
    * `SeqRepoAccess`
      * `get_fasta_file` is now a method
  * mappers
    * `AlignmentMapper`
    * `MANETranscript`
    * `ExonGenomicCoordsMapper`
      * These methods were originally in `CoolSeqTool`
  * data_sources
    * `MANETranscriptMappings`
    * `TranscriptMappings`
    * `UTADatabase`
  • Loading branch information
korikuzma authored Oct 12, 2023
1 parent 3aa73de commit 1424f1b
Show file tree
Hide file tree
Showing 33 changed files with 1,255 additions and 1,632 deletions.
1 change: 0 additions & 1 deletion Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ hgvs = "*"
pydantic = "*"
fastapi = "*"
uvicorn = "*"
gene-normalizer = ">=0.1.34, != 0.2.0, != 0.2.1, != 0.2.2, != 0.2.3, != 0.2.4, != 0.2.5, != 0.2.6, != 0.2.7, != 0.2.8"
"ga4gh.vrs" = "*"

[dev-packages]
Expand Down
3 changes: 0 additions & 3 deletions cool_seq_tool/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
"""The cool_seq_tool package"""
from os import environ
from pathlib import Path
import logging

Expand All @@ -13,5 +12,3 @@
logger.setLevel(logging.DEBUG)

LOG_FN = "cool_seq_tool.log"

from .app import CoolSeqTool # noqa: E402, F401, I202
638 changes: 18 additions & 620 deletions cool_seq_tool/app.py

Large diffs are not rendered by default.

8 changes: 0 additions & 8 deletions cool_seq_tool/data_sources/__init__.py

This file was deleted.

49 changes: 0 additions & 49 deletions cool_seq_tool/data_sources/gene_normalizer.py

This file was deleted.

2 changes: 2 additions & 0 deletions cool_seq_tool/handlers/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
"""Module for extending clients"""
from .seqrepo_access import SeqRepoAccess
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,15 @@
import logging
from typing import Optional, List, Tuple, Union
from os import environ
from pathlib import Path

from ga4gh.vrs.dataproxy import SeqRepoDataProxy

from cool_seq_tool.schemas import ResidueMode
from cool_seq_tool.data_sources.residue_mode import get_inter_residue_pos
from cool_seq_tool.utils import get_inter_residue_pos


logger = logging.getLogger("cool_seq_tool")
logger = logging.getLogger(__name__)


class SeqRepoAccess(SeqRepoDataProxy):
Expand Down Expand Up @@ -139,3 +140,65 @@ def ac_to_chromosome(self, ac: str) -> Tuple[Optional[str], Optional[str]]:
return None, f"Unable to get chromosome for {ac}"
else:
return aliases, None

def get_fasta_file(
self, sequence_id: str, outfile_path: Path
) -> None:
"""Retrieve FASTA file containing sequence for requested sequence ID.
:param sequence_id: accession ID, sans namespace, eg `NM_152263.3`
:param outfile_path: path to save file to
:return: None, but saves sequence data to `outfile_path` if successful
:raise: KeyError if SeqRepo doesn't have sequence data for the given ID
"""
sequence = self.get_reference_sequence(sequence_id)[0]
if not sequence:
raise KeyError

REFSEQ_PREFIXES = [
"NC_",
"AC_",
"NZ_",
"NT_",
"NW_",
"NG_",
"NM_",
"XM_",
"NR_",
"XR_",
"NP_",
"AP_",
"XP_",
"YP_",
"WP_"
]
ENSEMBL_PREFIXES = [
"ENSE",
"ENSFM",
"ENSG",
"ENSGT",
"ENSP",
"ENSR",
"ENST"
]

if sequence_id[:3] in REFSEQ_PREFIXES:
aliases = self.translate_identifier(
sequence_id, ["ensembl", "ga4gh"]
)
header = f">refseq:{sequence_id}|{'|'.join(aliases[0])}"
elif sequence_id[:4] in ENSEMBL_PREFIXES:
aliases = self.translate_identifier(
sequence_id, ["refseq", "ga4gh"]
)
header = f">ensembl:{sequence_id}|{'|'.join(aliases[0])}"
else:
aliases = self.translate_identifier(
sequence_id, ["ensembl", "refseq", "ga4gh"]
)
header = f">gnl|ID|{sequence_id}|{'|'.join(aliases[0])}"

LINE_LENGTH = 60
file_data = [header] + [sequence[i: i + LINE_LENGTH]
for i in range(0, len(sequence), LINE_LENGTH)]
text = "\n".join(file_data)
outfile_path.write_text(text)
4 changes: 4 additions & 0 deletions cool_seq_tool/mappers/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
"""Module for mapping data"""
from .alignment import AlignmentMapper
from .mane_transcript import MANETranscript
from .exon_genomic_coords import ExonGenomicCoordsMapper
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
from typing import Optional, Tuple, Dict

from cool_seq_tool.schemas import AnnotationLayer, Assembly, ResidueMode
from cool_seq_tool.data_sources import SeqRepoAccess, TranscriptMappings, \
UTADatabase
from cool_seq_tool.handlers.seqrepo_access import SeqRepoAccess
from cool_seq_tool.sources import TranscriptMappings, UTADatabase


class AlignmentMapper:
Expand Down
Loading

0 comments on commit 1424f1b

Please sign in to comment.