Skip to content

Commit

Permalink
gh-124188: Fix PyErr_ProgramTextObject() (GH-124189)
Browse files Browse the repository at this point in the history
* Detect source file encoding.
* Use the "replace" error handler even for UTF-8 (default) encoding.
* Remove the BOM.
* Fix detection of too long lines if they contain NUL.
* Return the head rather than the tail for truncated long lines.
  • Loading branch information
serhiy-storchaka authored Sep 24, 2024
1 parent 3c83f99 commit e2f7107
Show file tree
Hide file tree
Showing 6 changed files with 328 additions and 117 deletions.
10 changes: 7 additions & 3 deletions Lib/test/support/script_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,9 +234,13 @@ def make_script(script_dir, script_basename, source, omit_suffix=False):
if not omit_suffix:
script_filename += os.extsep + 'py'
script_name = os.path.join(script_dir, script_filename)
# The script should be encoded to UTF-8, the default string encoding
with open(script_name, 'w', encoding='utf-8') as script_file:
script_file.write(source)
if isinstance(source, str):
# The script should be encoded to UTF-8, the default string encoding
with open(script_name, 'w', encoding='utf-8') as script_file:
script_file.write(source)
else:
with open(script_name, 'wb') as script_file:
script_file.write(source)
importlib.invalidate_caches()
return script_name

Expand Down
5 changes: 4 additions & 1 deletion Lib/test/test_compiler_codegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,5 +152,8 @@ def g():

def test_syntax_error__return_not_in_function(self):
snippet = "return 42"
with self.assertRaisesRegex(SyntaxError, "'return' outside function"):
with self.assertRaisesRegex(SyntaxError, "'return' outside function") as cm:
self.codegen_test(snippet, None)
self.assertIsNone(cm.exception.text)
self.assertEqual(cm.exception.offset, 1)
self.assertEqual(cm.exception.end_offset, 10)
166 changes: 129 additions & 37 deletions Lib/test/test_eof.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""test script for a few new invalid token catches"""

import sys
from codecs import BOM_UTF8
from test import support
from test.support import os_helper
from test.support import script_helper
Expand All @@ -11,67 +12,158 @@ class EOFTestCase(unittest.TestCase):
def test_EOF_single_quote(self):
expect = "unterminated string literal (detected at line 1) (<string>, line 1)"
for quote in ("'", "\""):
try:
with self.assertRaises(SyntaxError) as cm:
eval(f"""{quote}this is a test\
""")
except SyntaxError as msg:
self.assertEqual(str(msg), expect)
self.assertEqual(msg.offset, 1)
else:
raise support.TestFailed
self.assertEqual(str(cm.exception), expect)
self.assertEqual(cm.exception.offset, 1)

def test_EOFS(self):
expect = ("unterminated triple-quoted string literal (detected at line 1) (<string>, line 1)")
try:
eval("""'''this is a test""")
except SyntaxError as msg:
self.assertEqual(str(msg), expect)
self.assertEqual(msg.offset, 1)
else:
raise support.TestFailed
expect = ("unterminated triple-quoted string literal (detected at line 3) (<string>, line 1)")
with self.assertRaises(SyntaxError) as cm:
eval("""ä = '''thîs is \na \ntest""")
self.assertEqual(str(cm.exception), expect)
self.assertEqual(cm.exception.text, "ä = '''thîs is ")
self.assertEqual(cm.exception.offset, 5)

with self.assertRaises(SyntaxError) as cm:
eval("""ä = '''thîs is \na \ntest""".encode())
self.assertEqual(str(cm.exception), expect)
self.assertEqual(cm.exception.text, "ä = '''thîs is ")
self.assertEqual(cm.exception.offset, 5)

with self.assertRaises(SyntaxError) as cm:
eval(BOM_UTF8 + """ä = '''thîs is \na \ntest""".encode())
self.assertEqual(str(cm.exception), expect)
self.assertEqual(cm.exception.text, "ä = '''thîs is ")
self.assertEqual(cm.exception.offset, 5)

with self.assertRaises(SyntaxError) as cm:
eval("""# coding: latin1\nä = '''thîs is \na \ntest""".encode('latin1'))
self.assertEqual(str(cm.exception), "unterminated triple-quoted string literal (detected at line 4) (<string>, line 2)")
self.assertEqual(cm.exception.text, "ä = '''thîs is ")
self.assertEqual(cm.exception.offset, 5)

def test_EOFS_with_file(self):
expect = ("(<string>, line 1)")
with os_helper.temp_dir() as temp_dir:
file_name = script_helper.make_script(temp_dir, 'foo', """'''this is \na \ntest""")
rc, out, err = script_helper.assert_python_failure(file_name)
self.assertIn(b'unterminated triple-quoted string literal (detected at line 3)', err)
file_name = script_helper.make_script(temp_dir, 'foo',
"""ä = '''thîs is \na \ntest""")
rc, out, err = script_helper.assert_python_failure('-X', 'utf8', file_name)
err = err.decode().splitlines()
self.assertEqual(err[-3:], [
" ä = '''thîs is ",
' ^',
'SyntaxError: unterminated triple-quoted string literal (detected at line 3)'])

file_name = script_helper.make_script(temp_dir, 'foo',
"""ä = '''thîs is \na \ntest""".encode())
rc, out, err = script_helper.assert_python_failure('-X', 'utf8', file_name)
err = err.decode().splitlines()
self.assertEqual(err[-3:], [
" ä = '''thîs is ",
' ^',
'SyntaxError: unterminated triple-quoted string literal (detected at line 3)'])

file_name = script_helper.make_script(temp_dir, 'foo',
BOM_UTF8 + """ä = '''thîs is \na \ntest""".encode())
rc, out, err = script_helper.assert_python_failure('-X', 'utf8', file_name)
err = err.decode().splitlines()
self.assertEqual(err[-3:], [
" ä = '''thîs is ",
' ^',
'SyntaxError: unterminated triple-quoted string literal (detected at line 3)'])

file_name = script_helper.make_script(temp_dir, 'foo',
"""# coding: latin1\nä = '''thîs is \na \ntest""".encode('latin1'))
rc, out, err = script_helper.assert_python_failure('-X', 'utf8', file_name)
err = err.decode().splitlines()
self.assertEqual(err[-3:], [
" ä = '''thîs is ",
' ^',
'SyntaxError: unterminated triple-quoted string literal (detected at line 4)'])

@warnings_helper.ignore_warnings(category=SyntaxWarning)
def test_eof_with_line_continuation(self):
expect = "unexpected EOF while parsing (<string>, line 1)"
try:
with self.assertRaises(SyntaxError) as cm:
compile('"\\Xhh" \\', '<string>', 'exec')
except SyntaxError as msg:
self.assertEqual(str(msg), expect)
else:
raise support.TestFailed
self.assertEqual(str(cm.exception), expect)

def test_line_continuation_EOF(self):
"""A continuation at the end of input must be an error; bpo2180."""
expect = 'unexpected EOF while parsing (<string>, line 1)'
with self.assertRaises(SyntaxError) as excinfo:
exec('x = 5\\')
self.assertEqual(str(excinfo.exception), expect)
with self.assertRaises(SyntaxError) as excinfo:
with self.assertRaises(SyntaxError) as cm:
exec('ä = 5\\')
self.assertEqual(str(cm.exception), expect)
self.assertEqual(cm.exception.text, 'ä = 5\\\n')
self.assertEqual(cm.exception.offset, 7)

with self.assertRaises(SyntaxError) as cm:
exec('ä = 5\\'.encode())
self.assertEqual(str(cm.exception), expect)
self.assertEqual(cm.exception.text, 'ä = 5\\\n')
self.assertEqual(cm.exception.offset, 7)

with self.assertRaises(SyntaxError) as cm:
exec('# coding:latin1\nä = 5\\'.encode('latin1'))
self.assertEqual(str(cm.exception),
'unexpected EOF while parsing (<string>, line 2)')
self.assertEqual(cm.exception.text, 'ä = 5\\\n')
self.assertEqual(cm.exception.offset, 7)

with self.assertRaises(SyntaxError) as cm:
exec(BOM_UTF8 + 'ä = 5\\'.encode())
self.assertEqual(str(cm.exception), expect)
self.assertEqual(cm.exception.text, 'ä = 5\\\n')
self.assertEqual(cm.exception.offset, 7)

with self.assertRaises(SyntaxError) as cm:
exec('\\')
self.assertEqual(str(excinfo.exception), expect)
self.assertEqual(str(cm.exception), expect)

@unittest.skipIf(not sys.executable, "sys.executable required")
def test_line_continuation_EOF_from_file_bpo2180(self):
"""Ensure tok_nextc() does not add too many ending newlines."""
with os_helper.temp_dir() as temp_dir:
file_name = script_helper.make_script(temp_dir, 'foo', '\\')
rc, out, err = script_helper.assert_python_failure(file_name)
self.assertIn(b'unexpected EOF while parsing', err)
self.assertIn(b'line 1', err)
self.assertIn(b'\\', err)

file_name = script_helper.make_script(temp_dir, 'foo', 'y = 6\\')
rc, out, err = script_helper.assert_python_failure(file_name)
self.assertIn(b'unexpected EOF while parsing', err)
self.assertIn(b'line 1', err)
self.assertIn(b'y = 6\\', err)
rc, out, err = script_helper.assert_python_failure('-X', 'utf8', file_name)
err = err.decode().splitlines()
self.assertEqual(err[-2:], [
' \\',
'SyntaxError: unexpected EOF while parsing'])
self.assertEqual(err[-3][-8:], ', line 1', err)

file_name = script_helper.make_script(temp_dir, 'foo', 'ä = 6\\')
rc, out, err = script_helper.assert_python_failure('-X', 'utf8', file_name)
err = err.decode().splitlines()
self.assertEqual(err[-3:], [
' ä = 6\\',
' ^',
'SyntaxError: unexpected EOF while parsing'])
self.assertEqual(err[-4][-8:], ', line 1', err)

file_name = script_helper.make_script(temp_dir, 'foo',
'# coding:latin1\n'
'ä = 7\\'.encode('latin1'))
rc, out, err = script_helper.assert_python_failure('-X', 'utf8', file_name)
err = err.decode().splitlines()
self.assertEqual(err[-3:], [
' ä = 7\\',
' ^',
'SyntaxError: unexpected EOF while parsing'])
self.assertEqual(err[-4][-8:], ', line 2', err)

file_name = script_helper.make_script(temp_dir, 'foo',
BOM_UTF8 + 'ä = 8\\'.encode())
rc, out, err = script_helper.assert_python_failure('-X', 'utf8', file_name)
err = err.decode().splitlines()
self.assertEqual(err[-3:], [
' ä = 8\\',
' ^',
'SyntaxError: unexpected EOF while parsing'])
self.assertEqual(err[-4][-8:], ', line 1', err)


if __name__ == "__main__":
unittest.main()
Loading

0 comments on commit e2f7107

Please sign in to comment.