Skip to content

Commit

Permalink
refactor!: get_gene_mane_data sorted by desc MANE_Status
Browse files Browse the repository at this point in the history
* Now returns MANE Select and then MANE Plus Clinical
  • Loading branch information
korikuzma committed Oct 16, 2023
1 parent cf06d99 commit d87955d
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 20 deletions.
10 changes: 2 additions & 8 deletions cool_seq_tool/mappers/mane_transcript.py
Original file line number Diff line number Diff line change
Expand Up @@ -737,7 +737,6 @@ async def get_mane_transcript(
mane_data = self.mane_transcript_mappings.get_gene_mane_data(g["gene"])
if not mane_data:
return None
mane_data_len = len(mane_data)

# Transcript Priority (Must pass validation checks):
# 1. MANE Select
Expand All @@ -746,9 +745,7 @@ async def get_mane_transcript(
# a. If there is a tie, choose the first-published transcript among
# those transcripts meeting criterion
mane_transcripts = set()
for i in range(mane_data_len):
index = mane_data_len - i - 1
current_mane_data = mane_data[index]
for current_mane_data in mane_data:
mane_transcripts |= set(
(current_mane_data["RefSeq_nuc"], current_mane_data["Ensembl_nuc"])
)
Expand Down Expand Up @@ -957,11 +954,8 @@ async def g_to_mane_c(
mane_data = self.mane_transcript_mappings.get_gene_mane_data(gene)
if not mane_data:
return None
mane_data_len = len(mane_data)

for i in range(mane_data_len):
index = mane_data_len - i - 1
current_mane_data = mane_data[index]
for current_mane_data in mane_data:
mane_c_ac = current_mane_data["RefSeq_nuc"]

# Liftover to GRCh38
Expand Down
8 changes: 4 additions & 4 deletions cool_seq_tool/sources/mane_transcript_mappings.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,9 @@ def _load_mane_transcript_data(self) -> pl.DataFrame:
def get_gene_mane_data(self, gene_symbol: str) -> Optional[List[Dict]]:
"""Return MANE Transcript data for a gene.
:param str gene_symbol: HGNC Gene Symbol
:return: MANE Transcript data (Transcript accessions,
gene, and location information)
:return: List of MANE Transcript data (Transcript accessions,
gene, and location information). Sorted list: MANE Select and then MANE Plus
Clinical
"""
data = self.df.filter(pl.col("symbol") == gene_symbol.upper())

Expand All @@ -40,8 +41,7 @@ def get_gene_mane_data(self, gene_symbol: str) -> Optional[List[Dict]]:
)
return None

# Ordering: MANE Plus Clinical (If it exists), MANE Select
data = data.sort(by="MANE_status", descending=False)
data = data.sort(by="MANE_status", descending=True)
return data.to_dicts()

def get_mane_from_transcripts(self, transcripts: List[str]) -> List[Dict]:
Expand Down
16 changes: 8 additions & 8 deletions tests/sources/test_mane_transcript_mappings.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,23 +97,23 @@ def test_get_gene_mane_data(
# MANE Select
actual = test_mane_transcript_mappings.get_gene_mane_data("BRAF")
assert len(actual) == 2
assert actual[0] == braf_plus_clinical
assert actual[1] == braf_select
assert actual[0] == braf_select
assert actual[1] == braf_plus_clinical

actual = test_mane_transcript_mappings.get_gene_mane_data("braf")
assert len(actual) == 2
assert actual[0] == braf_plus_clinical
assert actual[1] == braf_select
assert actual[0] == braf_select
assert actual[1] == braf_plus_clinical

# MANE Select and MANE Plus Clinical
actual = test_mane_transcript_mappings.get_gene_mane_data("ERCC6")
assert len(actual) == 2
assert actual[0] == ercc6_plus_clinical
assert actual[1] == ercc6_select
assert actual[0] == ercc6_select
assert actual[1] == ercc6_plus_clinical

actual = test_mane_transcript_mappings.get_gene_mane_data("ercc6")
assert actual[0] == ercc6_plus_clinical
assert actual[1] == ercc6_select
assert actual[0] == ercc6_select
assert actual[1] == ercc6_plus_clinical

# No Matches
actual = test_mane_transcript_mappings.get_gene_mane_data("BRAFF")
Expand Down

0 comments on commit d87955d

Please sign in to comment.