diff --git a/Doc/library/email.errors.rst b/Doc/library/email.errors.rst index 511ad163583197..7e51f744673179 100644 --- a/Doc/library/email.errors.rst +++ b/Doc/library/email.errors.rst @@ -59,6 +59,12 @@ The following exception classes are defined in the :mod:`email.errors` module: :class:`~email.mime.image.MIMEImage`). +.. exception:: HeaderWriteError() + + Raised when an error occurs when the :mod:`~email.generator` outputs + headers. + + Here is the list of the defects that the :class:`~email.parser.FeedParser` can find while parsing messages. Note that the defects are added to the message where the problem was found, so for example, if a message nested inside a diff --git a/Doc/library/email.policy.rst b/Doc/library/email.policy.rst index 8e7076259810f5..8617b2ed5b31b3 100644 --- a/Doc/library/email.policy.rst +++ b/Doc/library/email.policy.rst @@ -229,6 +229,24 @@ added matters. To illustrate:: .. versionadded:: 3.6 + + .. attribute:: verify_generated_headers + + If ``True`` (the default), the generator will raise + :exc:`~email.errors.HeaderWriteError` instead of writing a header + that is improperly folded or delimited, such that it would + be parsed as multiple headers or joined with adjacent data. + Such headers can be generated by custom header classes or bugs + in the ``email`` module. + + As it's a security feature, this defaults to ``True`` even in the + :class:`~email.policy.Compat32` policy. + For backwards compatible, but unsafe, behavior, it must be set to + ``False`` explicitly. + + .. versionadded:: 3.8.20 + + The following :class:`Policy` method is intended to be called by code using the email library to create policy instances with custom settings: diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index bc9c9b6241d48a..04035b2612d7af 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -92,6 +92,8 @@ ASPECIALS = TSPECIALS | set("*'%") ATTRIBUTE_ENDS = ASPECIALS | WSP EXTENDED_ATTRIBUTE_ENDS = ATTRIBUTE_ENDS - set('%') +NLSET = {'\n', '\r'} +SPECIALSNL = SPECIALS | NLSET def quote_string(value): return '"'+str(value).replace('\\', '\\\\').replace('"', r'\"')+'"' @@ -2611,6 +2613,13 @@ def _refold_parse_tree(parse_tree, *, policy): wrap_as_ew_blocked -= 1 continue tstr = str(part) + if not want_encoding: + if part.token_type == 'ptext': + # Encode if tstr contains special characters. + want_encoding = not SPECIALSNL.isdisjoint(tstr) + else: + # Encode if tstr contains newlines. + want_encoding = not NLSET.isdisjoint(tstr) try: tstr.encode(encoding) charset = encoding diff --git a/Lib/email/_policybase.py b/Lib/email/_policybase.py index c9cbadd2a80c48..d1f48211f90970 100644 --- a/Lib/email/_policybase.py +++ b/Lib/email/_policybase.py @@ -157,6 +157,13 @@ class Policy(_PolicyBase, metaclass=abc.ABCMeta): message_factory -- the class to use to create new message objects. If the value is None, the default is Message. + verify_generated_headers + -- if true, the generator verifies that each header + they are properly folded, so that a parser won't + treat it as multiple headers, start-of-body, or + part of another header. + This is a check against custom Header & fold() + implementations. """ raise_on_defect = False @@ -165,6 +172,7 @@ class Policy(_PolicyBase, metaclass=abc.ABCMeta): max_line_length = 78 mangle_from_ = False message_factory = None + verify_generated_headers = True def handle_defect(self, obj, defect): """Based on policy, either raise defect or call register_defect. diff --git a/Lib/email/errors.py b/Lib/email/errors.py index d28a6800104bab..1a0d5c63e60d90 100644 --- a/Lib/email/errors.py +++ b/Lib/email/errors.py @@ -29,6 +29,10 @@ class CharsetError(MessageError): """An illegal charset was given.""" +class HeaderWriteError(MessageError): + """Error while writing headers.""" + + # These are parsing defects which the parser was able to work around. class MessageDefect(ValueError): """Base class for a message defect.""" diff --git a/Lib/email/generator.py b/Lib/email/generator.py index ae670c2353c858..6deb95ba8a13c7 100644 --- a/Lib/email/generator.py +++ b/Lib/email/generator.py @@ -14,12 +14,14 @@ from copy import deepcopy from io import StringIO, BytesIO from email.utils import _has_surrogates +from email.errors import HeaderWriteError UNDERSCORE = '_' NL = '\n' # XXX: no longer used by the code below. NLCRE = re.compile(r'\r\n|\r|\n') fcre = re.compile(r'^From ', re.MULTILINE) +NEWLINE_WITHOUT_FWSP = re.compile(r'\r\n[^ \t]|\r[^ \n\t]|\n[^ \t]') @@ -219,7 +221,19 @@ def _dispatch(self, msg): def _write_headers(self, msg): for h, v in msg.raw_items(): - self.write(self.policy.fold(h, v)) + folded = self.policy.fold(h, v) + if self.policy.verify_generated_headers: + linesep = self.policy.linesep + if not folded.endswith(self.policy.linesep): + raise HeaderWriteError( + f'folded header does not end with {linesep!r}: {folded!r}') + folded_no_linesep = folded + if folded.endswith(linesep): + folded_no_linesep = folded[:-len(linesep)] + if NEWLINE_WITHOUT_FWSP.search(folded_no_linesep): + raise HeaderWriteError( + f'folded header contains newline: {folded!r}') + self.write(folded) # A blank line always separates headers from body self.write(self._NL) diff --git a/Lib/test/test_email/test_generator.py b/Lib/test/test_email/test_generator.py index c1aeaefab775ff..cdf1075bab8794 100644 --- a/Lib/test/test_email/test_generator.py +++ b/Lib/test/test_email/test_generator.py @@ -5,6 +5,7 @@ from email.message import EmailMessage from email.generator import Generator, BytesGenerator from email import policy +import email.errors from test.test_email import TestEmailBase, parameterize @@ -215,6 +216,44 @@ def test_rfc2231_wrapping_switches_to_default_len_if_too_narrow(self): g.flatten(msg) self.assertEqual(s.getvalue(), self.typ(expected)) + def test_keep_encoded_newlines(self): + msg = self.msgmaker(self.typ(textwrap.dedent("""\ + To: nobody + Subject: Bad subject=?UTF-8?Q?=0A?=Bcc: injection@example.com + + None + """))) + expected = textwrap.dedent("""\ + To: nobody + Subject: Bad subject=?UTF-8?Q?=0A?=Bcc: injection@example.com + + None + """) + s = self.ioclass() + g = self.genclass(s, policy=self.policy.clone(max_line_length=80)) + g.flatten(msg) + self.assertEqual(s.getvalue(), self.typ(expected)) + + def test_keep_long_encoded_newlines(self): + msg = self.msgmaker(self.typ(textwrap.dedent("""\ + To: nobody + Subject: Bad subject =?UTF-8?Q?=0A?=Bcc: injection@example.com + + None + """))) + expected = textwrap.dedent("""\ + To: nobody + Subject: Bad subject \n\ + =?utf-8?q?=0A?=Bcc: + injection@example.com + + None + """) + s = self.ioclass() + g = self.genclass(s, policy=self.policy.clone(max_line_length=30)) + g.flatten(msg) + self.assertEqual(s.getvalue(), self.typ(expected)) + class TestGenerator(TestGeneratorBase, TestEmailBase): @@ -223,6 +262,29 @@ class TestGenerator(TestGeneratorBase, TestEmailBase): ioclass = io.StringIO typ = str + def test_verify_generated_headers(self): + """gh-121650: by default the generator prevents header injection""" + class LiteralHeader(str): + name = 'Header' + def fold(self, **kwargs): + return self + + for text in ( + 'Value\r\nBad Injection\r\n', + 'NoNewLine' + ): + with self.subTest(text=text): + message = message_from_string( + "Header: Value\r\n\r\nBody", + policy=self.policy, + ) + + del message['Header'] + message['Header'] = LiteralHeader(text) + + with self.assertRaises(email.errors.HeaderWriteError): + message.as_string() + class TestBytesGenerator(TestGeneratorBase, TestEmailBase): diff --git a/Lib/test/test_email/test_headerregistry.py b/Lib/test/test_email/test_headerregistry.py index 08634daa7f6173..d7c1878940df1d 100644 --- a/Lib/test/test_email/test_headerregistry.py +++ b/Lib/test/test_email/test_headerregistry.py @@ -1546,6 +1546,22 @@ def test_set_message_header_from_group(self): class TestFolding(TestHeaderBase): + def test_address_display_names(self): + """Test the folding and encoding of address headers.""" + for name, result in ( + ('Foo Bar, France', '"Foo Bar, France"'), + ('Foo Bar (France)', '"Foo Bar (France)"'), + ('Foo Bar, España', 'Foo =?utf-8?q?Bar=2C_Espa=C3=B1a?='), + ('Foo Bar (España)', 'Foo Bar =?utf-8?b?KEVzcGHDsWEp?='), + ('Foo, Bar España', '=?utf-8?q?Foo=2C_Bar_Espa=C3=B1a?='), + ('Foo, Bar [España]', '=?utf-8?q?Foo=2C_Bar_=5BEspa=C3=B1a=5D?='), + ('Foo Bär, France', 'Foo =?utf-8?q?B=C3=A4r=2C?= France'), + ('Foo Bär ', 'Foo =?utf-8?q?B=C3=A4r_=3CFrance=3E?='), + ): + h = self.make_header('To', Address(name, addr_spec='a@b.com')) + self.assertEqual(h.fold(policy=policy.default), + 'To: %s \n' % result) + def test_short_unstructured(self): h = self.make_header('subject', 'this is a test') self.assertEqual(h.fold(policy=policy.default), diff --git a/Lib/test/test_email/test_policy.py b/Lib/test/test_email/test_policy.py index 6999e4af10d9b8..76198392e5234e 100644 --- a/Lib/test/test_email/test_policy.py +++ b/Lib/test/test_email/test_policy.py @@ -26,6 +26,7 @@ class PolicyAPITests(unittest.TestCase): 'raise_on_defect': False, 'mangle_from_': True, 'message_factory': None, + 'verify_generated_headers': True, } # These default values are the ones set on email.policy.default. # If any of these defaults change, the docs must be updated. @@ -265,6 +266,31 @@ def test_short_maxlen_error(self): with self.assertRaises(email.errors.HeaderParseError): policy.fold("Subject", subject) + def test_verify_generated_headers(self): + """Turning protection off allows header injection""" + policy = email.policy.default.clone(verify_generated_headers=False) + for text in ( + 'Header: Value\r\nBad: Injection\r\n', + 'Header: NoNewLine' + ): + with self.subTest(text=text): + message = email.message_from_string( + "Header: Value\r\n\r\nBody", + policy=policy, + ) + class LiteralHeader(str): + name = 'Header' + def fold(self, **kwargs): + return self + + del message['Header'] + message['Header'] = LiteralHeader(text) + + self.assertEqual( + message.as_string(), + f"{text}\nBody", + ) + # XXX: Need subclassing tests. # For adding subclassed objects, make sure the usual rules apply (subclass # wins), but that the order still works (right overrides left). diff --git a/Misc/NEWS.d/next/Library/2019-07-09-11-20-21.bpo-37482.auzvev.rst b/Misc/NEWS.d/next/Library/2019-07-09-11-20-21.bpo-37482.auzvev.rst new file mode 100644 index 00000000000000..e09ff63eedc43b --- /dev/null +++ b/Misc/NEWS.d/next/Library/2019-07-09-11-20-21.bpo-37482.auzvev.rst @@ -0,0 +1 @@ +Fix serialization of display name in originator or destination address fields with both encoded words and special chars. diff --git a/Misc/NEWS.d/next/Library/2024-07-27-16-10-41.gh-issue-121650.nf6oc9.rst b/Misc/NEWS.d/next/Library/2024-07-27-16-10-41.gh-issue-121650.nf6oc9.rst new file mode 100644 index 00000000000000..83dd28d4ac575b --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-07-27-16-10-41.gh-issue-121650.nf6oc9.rst @@ -0,0 +1,5 @@ +:mod:`email` headers with embedded newlines are now quoted on output. The +:mod:`~email.generator` will now refuse to serialize (write) headers that +are unsafely folded or delimited; see +:attr:`~email.policy.Policy.verify_generated_headers`. (Contributed by Bas +Bloemsaat and Petr Viktorin in :gh:`121650`.)