diff --git a/Lib/http/cookiejar.py b/Lib/http/cookiejar.py index ed0dd4e394b23a..8a808f42e00a47 100644 --- a/Lib/http/cookiejar.py +++ b/Lib/http/cookiejar.py @@ -1798,9 +1798,10 @@ def load(self, filename=None, ignore_discard=False, ignore_expires=False): if self.filename is not None: filename = self.filename else: raise ValueError(MISSING_FILENAME_TEXT) - # We use latin-1 here because WSGI uses latin-1 for HTTP headers too. - # See gh-87888 for more info. - with open(filename, encoding="latin1") as f: + # cookie value should be ASCII, but cookiejar file may contain + # non-ASCII comments or invalid cookies. + # We use "surrogateescape" error handler to read them. + with open(filename, encoding="ascii", errors="surrogateescape") as f: self._really_load(f, filename, ignore_discard, ignore_expires) def revert(self, filename=None, @@ -1894,7 +1895,7 @@ def save(self, filename=None, ignore_discard=False, ignore_expires=False): with os.fdopen( os.open(filename, os.O_CREAT | os.O_WRONLY | os.O_TRUNC, 0o600), - 'w', encoding="latin1", + 'w', encoding="ascii", errors="surrogateescape", ) as f: # There really isn't an LWP Cookies 2.0 format, but this indicates # that there is extra information in here (domain_dot and @@ -2088,7 +2089,7 @@ def save(self, filename=None, ignore_discard=False, ignore_expires=False): with os.fdopen( os.open(filename, os.O_CREAT | os.O_WRONLY | os.O_TRUNC, 0o600), - 'w', encoding="latin1", + 'w', encoding="ascii", errors="surrogateescape", ) as f: f.write(NETSCAPE_HEADER_TEXT) now = time.time() diff --git a/Misc/NEWS.d/next/Library/2022-05-06-20-32-41.gh-issue-87888.2_R3zS.rst b/Misc/NEWS.d/next/Library/2022-05-06-20-32-41.gh-issue-87888.2_R3zS.rst index fc794696b2e348..7f89bd9706d1ab 100644 --- a/Misc/NEWS.d/next/Library/2022-05-06-20-32-41.gh-issue-87888.2_R3zS.rst +++ b/Misc/NEWS.d/next/Library/2022-05-06-20-32-41.gh-issue-87888.2_R3zS.rst @@ -1,2 +1,2 @@ Changed encoding used by :class:`http.cookiejar.FileCookieJar` and its -subclasses from locale encoding to "latin-1". +subclasses from locale encoding to "ASCII" with "surrogateescape" error handler for reading.