Skip to content

Commit

Permalink
email: Fix RFC 2047 header decoding with line folding
Browse files Browse the repository at this point in the history
  • Loading branch information
srinivasreddy committed Dec 20, 2024
1 parent 39e69a7 commit a4e1f04
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 0 deletions.
2 changes: 2 additions & 0 deletions Lib/email/header.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,8 @@ def decode_header(header):
for n, w in enumerate(words):
if n>1 and w[1] and words[n-2][1] and words[n-1][0].isspace():
droplist.append(n-1)
if n < len(words):
words[n] = (words[n][0].lstrip(), words[n][1], words[n][2])
for d in reversed(droplist):
del words[d]

Expand Down
75 changes: 75 additions & 0 deletions Lib/test/test_email/test_message.py
Original file line number Diff line number Diff line change
Expand Up @@ -1055,5 +1055,80 @@ def test_string_payload_with_multipart_content_type(self):
self.assertEqual(list(attachments), [])


class TestHeaderDecoding(unittest.TestCase):
def test_encoded_word_splitting(self):
# Test case with accented characters that forces line splitting
address = "Bérénice-Amélie Rosemonde Dûbois-Bénard <[email protected]>"
message = EmailMessage()
message["From"] = address
message_bytes = message.as_bytes()

# Test with default policy
parsed = message_from_bytes(message_bytes, policy=policy.default)
self.assertEqual(str(parsed["From"].addresses[0]), address)
self.assertEqual(parsed["From"].addresses[0].display_name,
"Bérénice-Amélie Rosemonde Dûbois-Bénard")

def test_multiple_encoded_words(self):
# Test multiple encoded-words in sequence
headers = [
("From", "André von Müller <[email protected]>"),
("To", "José García López <[email protected]>"),
("Subject", "Re: études à l'université"),
]

message = EmailMessage()
for header, value in headers:
message[header] = value
message_bytes = message.as_bytes()

parsed = message_from_bytes(message_bytes, policy=policy.default)
for header, value in headers:
with self.subTest(header=header):
self.assertEqual(str(parsed[header]), value)

def test_long_encoded_words(self):
# Test very long names that force multiple encoded-word splits
long_name = "Maximilian-Friedrich von Württemberg-Höchstadt III"
address = f"{long_name} <[email protected]>"

message = EmailMessage()
message["From"] = address
message_bytes = message.as_bytes()

parsed = message_from_bytes(message_bytes, policy=policy.default)
self.assertEqual(str(parsed["From"].addresses[0]), address)
self.assertEqual(parsed["From"].addresses[0].display_name, long_name)

def test_mixed_ascii_and_encoded(self):
# Test mixing ASCII and encoded-words
address = 'ACME Corp (アクメ) <[email protected]>'
message = EmailMessage()
message["From"] = address
message_bytes = message.as_bytes()

parsed = message_from_bytes(message_bytes, policy=policy.default)
self.assertEqual(str(parsed["From"].addresses[0]), address)
self.assertEqual(parsed["From"].addresses[0].display_name, 'ACME Corp (アクメ)')

def test_whitespace_handling(self):
# Test various whitespace scenarios between encoded-words
headers = [
("From", "María José <[email protected]>"), # Double space
("To", "André\tvon\tMüller <[email protected]>"), # Tabs
("Cc", "José\n García <[email protected]>"), # Newline
]

message = EmailMessage()
for header, value in headers:
message[header] = value
message_bytes = message.as_bytes()

parsed = message_from_bytes(message_bytes, policy=policy.default)
for header, value in headers:
with self.subTest(header=header):
self.assertEqual(str(parsed[header]), value)


if __name__ == '__main__':
unittest.main()

0 comments on commit a4e1f04

Please sign in to comment.