From c355f2e06a0ccd6069328ca082d36eb9ff88697b Mon Sep 17 00:00:00 2001
From: rocky <rocky@dusytfeet.com>
Date: Sun, 23 Apr 2023 06:05:54 -0400
Subject: [PATCH] unicode tweaks -

Python 2.x can't accept unicode strings other than \u, so
check whether a unicode string is ASCII or not.
---
 xdis/cross_types.py | 30 +++++++++++++++++++++---------
 1 file changed, 21 insertions(+), 9 deletions(-)

diff --git a/xdis/cross_types.py b/xdis/cross_types.py
index 0f1c0195..c6203798 100644
--- a/xdis/cross_types.py
+++ b/xdis/cross_types.py
@@ -17,6 +17,11 @@
 Defines types from one set of Python versions that don't exist in
 another set of Pythons
 """
+# From
+# https://stackoverflow.com/questions/196345/how-to-check-if-a-string-in-python-is-in-ascii
+def is_ascii(s: str) -> bool:
+    """Check if the characters in string s are in ASCII, U+0-U+7F."""
+    return len(s) == len(s.encode())
 
 
 class LongTypeForPython3(int):
@@ -28,9 +33,9 @@ class LongTypeForPython3(int):
     def __init__(self, value):
         self.value = value
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         """
-        Replacement __str__ and str() for Python3.
+        Replacement repr() and str() for Python3.
         This ensures we get the "L" suffix on long types.
         """
         return f"""{self.value}L"""
@@ -45,15 +50,22 @@ class UnicodeForPython3(str):
     def __init__(self, value):
         self.value = value
 
-    def __repr__(self):
-        """
-        Replacement __str__ and str() for Python3.
-        This ensures we get the "u" suffix on unicode types.
+    def __repr__(self) -> str:
+        r"""
+        Replacement repr() and str() for Python3.
+        This ensures we get the "u" suffix on unicode types,
+        and also \u when the string is not ASCII representable
         """
         try:
-            value = self.value.decode("utf-8")
+            utf8_value = self.value.decode("utf-8")
             # Do we need to handle utf-16 and utf-32?
         except UnicodeDecodeError:
             return f"""u'{str(self.value)[1:]}'"""
-        else:
-            return f"""u'{str(value)}'"""
+
+        if is_ascii(utf8_value):
+            return f"""u'{utf8_value}'"""
+
+        # Turn the unicode character into its Unicode code point,
+        # but strip of the leading "0x".
+        unicode_codepoint = hex(ord(utf8_value))[len("0x") :]
+        return rf"""u'\u{unicode_codepoint}'"""