Skip to content

Commit

Permalink
Better handling for SMILES
Browse files Browse the repository at this point in the history
  • Loading branch information
wukevin committed Sep 10, 2024
1 parent 60a60ff commit 4e5d78a
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 2 deletions.
4 changes: 2 additions & 2 deletions chai_lab/data/parsing/input_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

def constituents_of_modified_fasta(x: str) -> list[str] | None:
"""
Accepts RNA/DNA inputs: 'agtc', 'AGT[ASP]TG', etc
Accepts RNA/DNA inputs: 'agtc', 'AGT[ASP]TG', etc. Does not accept SMILES strings.
Returns constituents, e.g, [A, G, T, ASP, T, G] or None if string is incorrect
"""
x = x.strip().upper()
Expand Down Expand Up @@ -67,7 +67,7 @@ def identify_potential_entity_types(sequence: str) -> list[EntityType]:
if "U" not in one_letter_constituents:
possible_entity_types.append(EntityType.PROTEIN)

ascii_symbols = string.ascii_letters + string.digits + ".-+=#$%:/\\[]()@"
ascii_symbols = string.ascii_letters + string.digits + ".-+=#$%:/\\[]()<>@"
if set.issubset(set(sequence.upper()), set(ascii_symbols)):
possible_entity_types.append(EntityType.LIGAND)
return possible_entity_types
1 change: 1 addition & 0 deletions tests/example_inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
"[O-]S(=O)(=O)[O-]",
"CC1=C(C(CCC1)(C)C)/C=C/C(=C/C=C/C(=C/C=O)/C)/C",
"CCC1=C(c2cc3c(c(c4n3[Mg]56[n+]2c1cc7n5c8c(c9[n+]6c(c4)C(C9CCC(=O)OC/C=C(\C)/CCC[C@H](C)CCC[C@H](C)CCCC(C)C)C)[C@H](C(=O)c8c7C)C(=O)OC)C)C=C)C=O",
r"C=CC1=C(C)/C2=C/c3c(C)c(CCC(=O)O)c4n3[Fe@TB16]35<-N2=C1/C=c1/c(C)c(C=C)/c(n13)=C/C1=N->5/C(=C\4)C(CCC(=O)O)=C1C",
# different ions
"[Mg+2]",
"[Na+]",
Expand Down

0 comments on commit 4e5d78a

Please sign in to comment.