Skip to content

Commit

Permalink
unicode tweaks -
Browse files Browse the repository at this point in the history
Python 2.x can't accept unicode strings other than \u, so
check whether a unicode string is ASCII or not.
  • Loading branch information
rocky committed Apr 23, 2023
1 parent c21eebc commit c355f2e
Showing 1 changed file with 21 additions and 9 deletions.
30 changes: 21 additions & 9 deletions xdis/cross_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@
Defines types from one set of Python versions that don't exist in
another set of Pythons
"""
# From
# https://stackoverflow.com/questions/196345/how-to-check-if-a-string-in-python-is-in-ascii
def is_ascii(s: str) -> bool:
"""Check if the characters in string s are in ASCII, U+0-U+7F."""
return len(s) == len(s.encode())


class LongTypeForPython3(int):
Expand All @@ -28,9 +33,9 @@ class LongTypeForPython3(int):
def __init__(self, value):
self.value = value

def __repr__(self):
def __repr__(self) -> str:
"""
Replacement __str__ and str() for Python3.
Replacement repr() and str() for Python3.
This ensures we get the "L" suffix on long types.
"""
return f"""{self.value}L"""
Expand All @@ -45,15 +50,22 @@ class UnicodeForPython3(str):
def __init__(self, value):
self.value = value

def __repr__(self):
"""
Replacement __str__ and str() for Python3.
This ensures we get the "u" suffix on unicode types.
def __repr__(self) -> str:
r"""
Replacement repr() and str() for Python3.
This ensures we get the "u" suffix on unicode types,
and also \u when the string is not ASCII representable
"""
try:
value = self.value.decode("utf-8")
utf8_value = self.value.decode("utf-8")
# Do we need to handle utf-16 and utf-32?
except UnicodeDecodeError:
return f"""u'{str(self.value)[1:]}'"""
else:
return f"""u'{str(value)}'"""

if is_ascii(utf8_value):
return f"""u'{utf8_value}'"""

# Turn the unicode character into its Unicode code point,
# but strip of the leading "0x".
unicode_codepoint = hex(ord(utf8_value))[len("0x") :]
return rf"""u'\u{unicode_codepoint}'"""

0 comments on commit c355f2e

Please sign in to comment.