From 9334cdc3f9e6e7de0a86117059dc9d8cafcd1f84 Mon Sep 17 00:00:00 2001 From: Adibvafa Date: Fri, 20 Sep 2024 18:03:35 -0400 Subject: [PATCH] Add a list of possible stop symbols. --- CodonTransformer/CodonPrediction.py | 3 ++- CodonTransformer/CodonUtils.py | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CodonTransformer/CodonPrediction.py b/CodonTransformer/CodonPrediction.py index 41288de..6d3e1b1 100644 --- a/CodonTransformer/CodonPrediction.py +++ b/CodonTransformer/CodonPrediction.py @@ -23,6 +23,7 @@ from CodonTransformer.CodonData import get_merged_seq from CodonTransformer.CodonUtils import ( AMINO_ACIDS, + STOP_SYMBOLS, INDEX2TOKEN, NUM_ORGANISMS, ORGANISM2ID, @@ -148,7 +149,7 @@ def predict_dna_sequence( raise ValueError("Protein sequence cannot be empty.") # Ensure the protein sequence contains only valid amino acids - if not all(aminoacid in AMINO_ACIDS for aminoacid in protein): + if not all(aminoacid in AMINO_ACIDS + STOP_SYMBOLS for aminoacid in protein): raise ValueError("Invalid amino acid found in protein sequence.") # Validate temperature diff --git a/CodonTransformer/CodonUtils.py b/CodonTransformer/CodonUtils.py index dd91e3f..84d8bea 100644 --- a/CodonTransformer/CodonUtils.py +++ b/CodonTransformer/CodonUtils.py @@ -38,6 +38,7 @@ "W", # Tryptophan "Y", # Tyrosine ] +STOP_SYMBOLS = ["_", "*"] # Stop codon symbols # Dictionary ambiguous amino acids to standard amino acids AMBIGUOUS_AMINOACID_MAP: Dict[str, str] = {