From 7f58ab2e27c6b486fa4beb229865c502c7d5055a Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 14 Nov 2023 22:02:28 +0200 Subject: [PATCH] gh-111942: Fix SystemError in the TextIOWrapper constructor (#112061) In non-debug more the check for the "errors" argument is skipped, and then PyUnicode_AsUTF8() can fail, but its result was not checked. Co-authored-by: Victor Stinner (cherry picked from commit 9302f05f9af07332c414b3c19003efd1b1763cf3) --- Lib/test/test_io.py | 4 +--- .../2023-11-14-18-43-55.gh-issue-111942.x1pnrj.rst | 2 ++ Modules/_io/textio.c | 8 ++++++-- 3 files changed, 9 insertions(+), 5 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-11-14-18-43-55.gh-issue-111942.x1pnrj.rst diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index 0f4a56001c0f379..196b7d2b1429ab9 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -2732,9 +2732,7 @@ def test_constructor(self): if support.Py_DEBUG or sys.flags.dev_mode or self.is_C: with self.assertRaises(UnicodeEncodeError): t.__init__(b, encoding="utf-8", errors='\udcfe') - if support.Py_DEBUG or sys.flags.dev_mode: - # TODO: If encoded to UTF-8, should also be checked for - # embedded null characters. + if support.Py_DEBUG or sys.flags.dev_mode or self.is_C: with self.assertRaises(ValueError): t.__init__(b, encoding="utf-8", errors='replace\0') with self.assertRaises(TypeError): diff --git a/Misc/NEWS.d/next/Library/2023-11-14-18-43-55.gh-issue-111942.x1pnrj.rst b/Misc/NEWS.d/next/Library/2023-11-14-18-43-55.gh-issue-111942.x1pnrj.rst new file mode 100644 index 000000000000000..ca58a6fa5d6ae1c --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-11-14-18-43-55.gh-issue-111942.x1pnrj.rst @@ -0,0 +1,2 @@ +Fix SystemError in the TextIOWrapper constructor with non-encodable "errors" +argument in non-debug mode. diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index d4797ca59ebea23..b58944e547eac54 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -1119,6 +1119,10 @@ _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer, else if (io_check_errors(errors)) { return -1; } + const char *errors_str = _PyUnicode_AsUTF8NoNUL(errors); + if (errors_str == NULL) { + return -1; + } if (validate_newline(newline) < 0) { return -1; @@ -1191,11 +1195,11 @@ _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer, /* Build the decoder object */ _PyIO_State *state = find_io_state_by_def(Py_TYPE(self)); self->state = state; - if (_textiowrapper_set_decoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0) + if (_textiowrapper_set_decoder(self, codec_info, errors_str) != 0) goto error; /* Build the encoder object */ - if (_textiowrapper_set_encoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0) + if (_textiowrapper_set_encoder(self, codec_info, errors_str) != 0) goto error; /* Finished sorting out the codec details */