Skip to content

Commit

Permalink
Add test that lone surrogates are not detected or fixed
Browse files Browse the repository at this point in the history
Upgrade simplejson to 3.3.0 since prior to that, decoding json with lone surrogates would raise “JSONDecodeError: Unpaired high surrogate”
  • Loading branch information
yonran committed Oct 17, 2023
1 parent 2a2ce87 commit 532fba7
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 9 deletions.
13 changes: 5 additions & 8 deletions remoteobjects/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,11 @@
# simplejson >=3.12
from simplejson.errors import errmsg
except ImportError:
try:
# simplejson >=3.1.0, <3.12, before this commit:
# https://github.com/simplejson/simplejson/commit/0d36c5cd16055d55e6eceaf252f072a9339e0746
from simplejson.scanner import errmsg
except ImportError:
# simplejson >=1.1,<3.1.0, before this commit:
# https://github.com/simplejson/simplejson/commit/104b40fcf6aa39d9ba7b240c3c528d1f85e86ef2
from simplejson.decoder import errmsg
# simplejson >=3.1.0, <3.12, since this commit:
# https://github.com/simplejson/simplejson/commit/104b40fcf6aa39d9ba7b240c3c528d1f85e86ef2
# and before this commit
# https://github.com/simplejson/simplejson/commit/0d36c5cd16055d55e6eceaf252f072a9339e0746
from simplejson.scanner import errmsg
from simplejson.scanner import py_make_scanner
from six import unichr, text_type
import sys
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@
provides=['remoteobjects'],
python_requires='>=2.7, !=3.0.*, !=3.1.*, !=3.2.*',
install_requires=[
'simplejson>=2.0.0',
'simplejson>=3.3.0',
'httplib2>=0.5.0',
'python-dateutil>=2.1',
'six~=1.16.0',
Expand Down
35 changes: 35 additions & 0 deletions tests/test_http.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import unittest

from remoteobjects import fields, http
from six import PY2
from tests import test_dataobject
from tests import utils

Expand Down Expand Up @@ -81,6 +82,40 @@ class BasicMost(self.cls):
# Bad characters are replaced with the unicode Replacement Character 0xFFFD.
self.assertEqual(b.value, u"image by \ufffdrew Example")
h.request.assert_called_once_with(**request)
h.reset_mock()

# since simplejson 3.3.0, lone surrogates are passed through
# https://github.com/simplejson/simplejson/commit/35816bfe2d0ddeb5ddcc68239683cbb35b7e3ff2
content = """{"name": "lone surrogate \\ud800", "value": "\\udc00 lone surrogate"}"""
h = utils.mock_http(request, content)
b = BasicMost.get('http://example.com/ohhai', http=h)
# Lone surrogates are passed through as lone surrogates in the python unicode value
self.assertEqual(b.name, u"lone surrogate \ud800")
self.assertEqual(b.value, u"\udc00 lone surrogate")
h.request.assert_called_once_with(**request)

content = u"""{"name": "100 \u20AC", "value": "13000 \u00A5"}""".encode('utf-8')
h = utils.mock_http(request, content)
b = BasicMost.get('http://example.com/ohhai', http=h)
# JSON containing non-ascii UTF-8 should be decoded to unicode strings
self.assertEqual(b.name, u"100 \u20AC")
self.assertEqual(b.value, u"13000 \u00A5")
h.request.assert_called_once_with(**request)

content = b"""{"name": "lone surrogate \xed\xa0\x80", "value": "\xed\xb0\x80 lone surrogate"}"""
h = utils.mock_http(request, content)
b = BasicMost.get('http://example.com/ohhai', http=h)
# Lone surrogates are passed through as lone surrogates in the python unicode value
if PY2:
# in python2, our JSONDecoder does not detect naked lone surrogates
self.assertEqual(b.name, u"lone surrogate \ud800")
self.assertEqual(b.value, u"\udc00 lone surrogate")
else:
# in python3, bytes.decode replaces lone surrogates with replacement char
self.assertEqual(b.name, u"lone surrogate \ufffd\ufffd\ufffd")
self.assertEqual(b.value, u"\ufffd\ufffd\ufffd lone surrogate")

h.request.assert_called_once_with(**request)

def test_post(self):

Expand Down

0 comments on commit 532fba7

Please sign in to comment.