Skip to content

Commit

Permalink
gh-125660: Reject invalid unicode escapes for Python implementation o…
Browse files Browse the repository at this point in the history
…f JSON decoder (GH-125683)
  • Loading branch information
nineteendo authored Oct 18, 2024
1 parent d358425 commit df75136
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 4 deletions.
9 changes: 5 additions & 4 deletions Lib/json/decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,17 +50,18 @@ def __reduce__(self):
}


HEXDIGITS = re.compile(r'[0-9A-Fa-f]{4}', FLAGS)
STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
BACKSLASH = {
'"': '"', '\\': '\\', '/': '/',
'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t',
}

def _decode_uXXXX(s, pos):
esc = s[pos + 1:pos + 5]
if len(esc) == 4 and esc[1] not in 'xX':
def _decode_uXXXX(s, pos, _m=HEXDIGITS.match):
esc = _m(s, pos + 1)
if esc is not None:
try:
return int(esc, 16)
return int(esc.group(), 16)
except ValueError:
pass
msg = "Invalid \\uXXXX escape"
Expand Down
10 changes: 10 additions & 0 deletions Lib/test/test_json/test_scanstring.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,11 @@ def test_bad_escapes(self):
'"\\u012z"',
'"\\u0x12"',
'"\\u0X12"',
'"\\u{0}"'.format("\uff10" * 4),
'"\\u 123"',
'"\\u-123"',
'"\\u+123"',
'"\\u1_23"',
'"\\ud834\\"',
'"\\ud834\\u"',
'"\\ud834\\ud"',
Expand All @@ -127,6 +132,11 @@ def test_bad_escapes(self):
'"\\ud834\\udd2z"',
'"\\ud834\\u0x20"',
'"\\ud834\\u0X20"',
'"\\ud834\\u{0}"'.format("\uff10" * 4),
'"\\ud834\\u 123"',
'"\\ud834\\u-123"',
'"\\ud834\\u+123"',
'"\\ud834\\u1_23"',
]
for s in bad_escapes:
with self.assertRaises(self.JSONDecodeError, msg=s):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Reject invalid unicode escapes for Python implementation of :func:`json.loads`.

0 comments on commit df75136

Please sign in to comment.