From 7f15063ad1d0f39f3df43fc54cfa1bc31fe0cbb6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Thu, 25 Jul 2024 17:53:17 +0200
Subject: [PATCH 01/16] improve performances of `fnmatch.translate`

---
 Lib/fnmatch.py | 94 ++++++++++++++++++++++++--------------------------
 1 file changed, 46 insertions(+), 48 deletions(-)

diff --git a/Lib/fnmatch.py b/Lib/fnmatch.py
index 73acb1fe8d4106..f54d2324ae0b7b 100644
--- a/Lib/fnmatch.py
+++ b/Lib/fnmatch.py
@@ -9,6 +9,7 @@
 The function translate(PATTERN) returns a regular expression
 corresponding to PATTERN.  (It does not compile it.)
 """
+
 import os
 import posixpath
 import re
@@ -77,23 +78,38 @@ def translate(pat):
     There is no way to quote meta-characters.
     """
 
-    STAR = object()
-    parts = _translate(pat, STAR, '.')
-    return _join_translated_parts(parts, STAR)
+    parts, indices = _translate(pat, '.')
+    return _join_translated_parts(parts, indices)
 
+_set_ops_re = re.compile(r'([&~|])')
 
-def _translate(pat, STAR, QUESTION_MARK):
+def _translate(pat, QUESTION_MARK):
     res = []
     add = res.append
+    indices = []
+    pending = []  # pending characters to escape
+
     i, n = 0, len(pat)
     while i < n:
         c = pat[i]
         i = i+1
         if c == '*':
+            if pending:
+                add(re.escape(''.join(pending)))
+                pending = []
+            # store the position of the wildcard
+            indices.append(len(parts))
+            add('*')
             # compress consecutive `*` into one
-            if (not res) or res[-1] is not STAR:
-                add(STAR)
+            while i < n and pat[i] == '*':
+                i += 1
         elif c == '?':
+            # Handling '?' one at a time seems to more efficient
+            # even if there are consecutive '?' that could have
+            # been written directly.
+            if pending:
+                add(re.escape(''.join(pending)))
+                pending = []
             add(QUESTION_MARK)
         elif c == '[':
             j = i
@@ -104,8 +120,11 @@ def _translate(pat, STAR, QUESTION_MARK):
             while j < n and pat[j] != ']':
                 j = j+1
             if j >= n:
-                add('\\[')
+                pending.append('[')
             else:
+                if pending:
+                    add(re.escape(''.join(pending)))
+                    pending = []
                 stuff = pat[i:j]
                 if '-' not in stuff:
                     stuff = stuff.replace('\\', r'\\')
@@ -133,8 +152,6 @@ def _translate(pat, STAR, QUESTION_MARK):
                     # Hyphens that create ranges shouldn't be escaped.
                     stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-')
                                      for s in chunks)
-                # Escape set operations (&&, ~~ and ||).
-                stuff = re.sub(r'([&~|])', r'\\\1', stuff)
                 i = j+1
                 if not stuff:
                     # Empty range: never match.
@@ -143,50 +160,31 @@ def _translate(pat, STAR, QUESTION_MARK):
                     # Negated empty range: match any character.
                     add('.')
                 else:
+                    # Escape set operations (&&, ~~ and ||).
+                    stuff = _set_ops_re.sub(r'\\\1', stuff)
                     if stuff[0] == '!':
                         stuff = '^' + stuff[1:]
                     elif stuff[0] in ('^', '['):
                         stuff = '\\' + stuff
                     add(f'[{stuff}]')
         else:
-            add(re.escape(c))
-    assert i == n
-    return res
-
-
-def _join_translated_parts(inp, STAR):
-    # Deal with STARs.
-    res = []
-    add = res.append
-    i, n = 0, len(inp)
-    # Fixed pieces at the start?
-    while i < n and inp[i] is not STAR:
-        add(inp[i])
-        i += 1
-    # Now deal with STAR fixed STAR fixed ...
-    # For an interior `STAR fixed` pairing, we want to do a minimal
-    # .*? match followed by `fixed`, with no possibility of backtracking.
-    # Atomic groups ("(?>...)") allow us to spell that directly.
-    # Note: people rely on the undocumented ability to join multiple
-    # translate() results together via "|" to build large regexps matching
-    # "one of many" shell patterns.
-    while i < n:
-        assert inp[i] is STAR
-        i += 1
-        if i == n:
-            add(".*")
-            break
-        assert inp[i] is not STAR
-        fixed = []
-        while i < n and inp[i] is not STAR:
-            fixed.append(inp[i])
-            i += 1
-        fixed = "".join(fixed)
-        if i == n:
-            add(".*")
-            add(fixed)
-        else:
-            add(f"(?>.*?{fixed})")
+            pending.append(c)
+    if pending:
+        add(re.escape(''.join(pending)))
     assert i == n
-    res = "".join(res)
+    return parts, indices
+
+
+def _join_translated_parts(parts, indices):
+    if not indices:
+        return fr'(?s:{"".join(parts)})\Z'
+    iter_indices = iter(indices)
+    i, j = 0, next(iter_indices)
+    buffer = parts[i:j]
+    i = j + 1
+    for j in iter_indices:
+        buffer.append(f'(?>.*?{"".join(parts[i:j])})')
+        i = j + 1
+    buffer.append(f'.*{"".join(parts[i:])}')
+    res = ''.join(buffer)
     return fr'(?s:{res})\Z'

From 83d0904e3ef34cbc3a3980813e4c9e4d9989791f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Thu, 25 Jul 2024 18:05:12 +0200
Subject: [PATCH 02/16] add tests

---
 Lib/test/test_fnmatch.py | 65 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 64 insertions(+), 1 deletion(-)

diff --git a/Lib/test/test_fnmatch.py b/Lib/test/test_fnmatch.py
index 10ed496d4e2f37..a4bf4c56783e71 100644
--- a/Lib/test/test_fnmatch.py
+++ b/Lib/test/test_fnmatch.py
@@ -1,5 +1,4 @@
 """Test cases for the fnmatch module."""
-
 import unittest
 import os
 import string
@@ -250,6 +249,70 @@ def test_translate(self):
         self.assertTrue(re.match(fatre, 'cbabcaxc'))
         self.assertFalse(re.match(fatre, 'dabccbad'))
 
+    def test_translate_wildcards(self):
+        for pattern, expect in [
+            ('ab*', r'(?s:ab.*)\Z'),
+            ('ab*cd', r'(?s:ab.*cd)\Z'),
+            ('ab*cd*', r'(?s:ab(?>.*?cd).*)\Z'),
+            ('ab*cd*12', r'(?s:ab(?>.*?cd).*12)\Z'),
+            ('ab*cd*12*', r'(?s:ab(?>.*?cd)(?>.*?12).*)\Z'),
+            ('ab*cd*12*34', r'(?s:ab(?>.*?cd)(?>.*?12).*34)\Z'),
+            ('ab*cd*12*34*', r'(?s:ab(?>.*?cd)(?>.*?12)(?>.*?34).*)\Z'),
+        ]:
+            translated = translate(pattern)
+            self.assertEqual(translated, expect, pattern)
+
+        for pattern, expect in [
+            ('*ab', r'(?s:.*ab)\Z'),
+            ('*ab*', r'(?s:(?>.*?ab).*)\Z'),
+            ('*ab*cd', r'(?s:(?>.*?ab).*cd)\Z'),
+            ('*ab*cd*', r'(?s:(?>.*?ab)(?>.*?cd).*)\Z'),
+            ('*ab*cd*12', r'(?s:(?>.*?ab)(?>.*?cd).*12)\Z'),
+            ('*ab*cd*12*', r'(?s:(?>.*?ab)(?>.*?cd)(?>.*?12).*)\Z'),
+            ('*ab*cd*12*34', r'(?s:(?>.*?ab)(?>.*?cd)(?>.*?12).*34)\Z'),
+            ('*ab*cd*12*34*', r'(?s:(?>.*?ab)(?>.*?cd)(?>.*?12)(?>.*?34).*)\Z'),
+        ]:
+            translated = translate(pattern)
+            self.assertEqual(translated, expect, pattern)
+
+    def test_translate_expressions(self):
+        for pattern, expect in [
+            ('[', r'(?s:\[)\Z'),
+            ('[!', r'(?s:\[!)\Z'),
+            ('[]', r'(?s:\[\])\Z'),
+            ('[abc', r'(?s:\[abc)\Z'),
+            ('[!abc', r'(?s:\[!abc)\Z'),
+            ('[abc]', r'(?s:[abc])\Z'),
+            ('[!abc]', r'(?s:[^abc])\Z'),
+            ('[!abc][!def]', r'(?s:[^abc][^def])\Z'),
+            # with [[
+            ('[[', r'(?s:\[\[)\Z'),
+            ('[[a', r'(?s:\[\[a)\Z'),
+            ('[[]', r'(?s:[\[])\Z'),
+            ('[[]a', r'(?s:[\[]a)\Z'),
+            ('[[]]', r'(?s:[\[]\])\Z'),
+            ('[[]a]', r'(?s:[\[]a\])\Z'),
+            ('[[a]', r'(?s:[\[a])\Z'),
+            ('[[a]]', r'(?s:[\[a]\])\Z'),
+            ('[[a]b', r'(?s:[\[a]b)\Z'),
+            # backslashes
+            ('[\\', r'(?s:\[\\)\Z'),
+            (r'[\]', r'(?s:[\\])\Z'),
+            (r'[\\]', r'(?s:[\\\\])\Z'),
+        ]:
+            translated = translate(pattern)
+            self.assertEqual(translated, expect, pattern)
+
+    def test_indices_locations(self):
+        from fnmatch import _translate
+
+        blocks = ['a^b', '***', '?', '?', '[a-z]', '[1-9]', '*', '++', '[[a']
+        parts, indices = _translate(''.join(blocks), '.')
+        expect_parts = [r'a\^b', '*', '.', '.', '[a-z]', '[1-9]', '*', r'\+\+\[\[a']
+        self.assertListEqual(parts, expect_parts)
+        self.assertListEqual(indices, [1, 6])
+
+
 class FilterTestCase(unittest.TestCase):
 
     def test_filter(self):

From 275a1c78624444cc0c10625e219f0791c30168c4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Thu, 25 Jul 2024 18:06:57 +0200
Subject: [PATCH 03/16] blurb

---
 Lib/fnmatch.py                                               | 5 ++---
 Lib/test/test_fnmatch.py                                     | 1 +
 .../Library/2024-07-25-18-06-51.gh-issue-122288.-_xxOR.rst   | 2 ++
 3 files changed, 5 insertions(+), 3 deletions(-)
 create mode 100644 Misc/NEWS.d/next/Library/2024-07-25-18-06-51.gh-issue-122288.-_xxOR.rst

diff --git a/Lib/fnmatch.py b/Lib/fnmatch.py
index f54d2324ae0b7b..301c0f7963ef20 100644
--- a/Lib/fnmatch.py
+++ b/Lib/fnmatch.py
@@ -9,7 +9,6 @@
 The function translate(PATTERN) returns a regular expression
 corresponding to PATTERN.  (It does not compile it.)
 """
-
 import os
 import posixpath
 import re
@@ -98,7 +97,7 @@ def _translate(pat, QUESTION_MARK):
                 add(re.escape(''.join(pending)))
                 pending = []
             # store the position of the wildcard
-            indices.append(len(parts))
+            indices.append(len(res))
             add('*')
             # compress consecutive `*` into one
             while i < n and pat[i] == '*':
@@ -172,7 +171,7 @@ def _translate(pat, QUESTION_MARK):
     if pending:
         add(re.escape(''.join(pending)))
     assert i == n
-    return parts, indices
+    return res, indices
 
 
 def _join_translated_parts(parts, indices):
diff --git a/Lib/test/test_fnmatch.py b/Lib/test/test_fnmatch.py
index a4bf4c56783e71..4a53883811918b 100644
--- a/Lib/test/test_fnmatch.py
+++ b/Lib/test/test_fnmatch.py
@@ -1,4 +1,5 @@
 """Test cases for the fnmatch module."""
+
 import unittest
 import os
 import string
diff --git a/Misc/NEWS.d/next/Library/2024-07-25-18-06-51.gh-issue-122288.-_xxOR.rst b/Misc/NEWS.d/next/Library/2024-07-25-18-06-51.gh-issue-122288.-_xxOR.rst
new file mode 100644
index 00000000000000..830a4c21c73e1c
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-07-25-18-06-51.gh-issue-122288.-_xxOR.rst
@@ -0,0 +1,2 @@
+Improve the performances of :func:`fnmatch.translate` by a factor 1.3. Patch
+by Bénédikt Tran.

From e60d057bccf37c24b4723eddee7cc66dc9c9c48e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Thu, 25 Jul 2024 18:20:06 +0200
Subject: [PATCH 04/16] fix usages

---
 Lib/fnmatch.py           | 6 +++---
 Lib/glob.py              | 3 ++-
 Lib/test/test_fnmatch.py | 2 +-
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/Lib/fnmatch.py b/Lib/fnmatch.py
index 301c0f7963ef20..2d71478cf641f8 100644
--- a/Lib/fnmatch.py
+++ b/Lib/fnmatch.py
@@ -77,12 +77,12 @@ def translate(pat):
     There is no way to quote meta-characters.
     """
 
-    parts, indices = _translate(pat, '.')
+    parts, indices = _translate(pat, '*', '.')
     return _join_translated_parts(parts, indices)
 
 _set_ops_re = re.compile(r'([&~|])')
 
-def _translate(pat, QUESTION_MARK):
+def _translate(pat, STAR, QUESTION_MARK):
     res = []
     add = res.append
     indices = []
@@ -98,7 +98,7 @@ def _translate(pat, QUESTION_MARK):
                 pending = []
             # store the position of the wildcard
             indices.append(len(res))
-            add('*')
+            add(STAR)
             # compress consecutive `*` into one
             while i < n and pat[i] == '*':
                 i += 1
diff --git a/Lib/glob.py b/Lib/glob.py
index 574e5ad51b601d..7aa70cd67576bd 100644
--- a/Lib/glob.py
+++ b/Lib/glob.py
@@ -312,7 +312,8 @@ def translate(pat, *, recursive=False, include_hidden=False, seps=None):
             if part:
                 if not include_hidden and part[0] in '*?':
                     results.append(r'(?!\.)')
-                results.extend(fnmatch._translate(part, f'{not_sep}*', not_sep))
+                parts, _ = fnmatch._translate(part, f'{not_sep}*', not_sep)
+                results.extend(parts)
             if idx < last_part_idx:
                 results.append(any_sep)
     res = ''.join(results)
diff --git a/Lib/test/test_fnmatch.py b/Lib/test/test_fnmatch.py
index 4a53883811918b..df5d57322f4886 100644
--- a/Lib/test/test_fnmatch.py
+++ b/Lib/test/test_fnmatch.py
@@ -308,7 +308,7 @@ def test_indices_locations(self):
         from fnmatch import _translate
 
         blocks = ['a^b', '***', '?', '?', '[a-z]', '[1-9]', '*', '++', '[[a']
-        parts, indices = _translate(''.join(blocks), '.')
+        parts, indices = _translate(''.join(blocks), '*', '.')
         expect_parts = [r'a\^b', '*', '.', '.', '[a-z]', '[1-9]', '*', r'\+\+\[\[a']
         self.assertListEqual(parts, expect_parts)
         self.assertListEqual(indices, [1, 6])

From 03217d7b6dec482a4caa3c2d79b5fbb2041da75b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Thu, 25 Jul 2024 18:43:14 +0200
Subject: [PATCH 05/16] keep legacy version for glob

---
 Lib/glob.py | 75 +++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 73 insertions(+), 2 deletions(-)

diff --git a/Lib/glob.py b/Lib/glob.py
index 7aa70cd67576bd..6f060e359415de 100644
--- a/Lib/glob.py
+++ b/Lib/glob.py
@@ -312,14 +312,85 @@ def translate(pat, *, recursive=False, include_hidden=False, seps=None):
             if part:
                 if not include_hidden and part[0] in '*?':
                     results.append(r'(?!\.)')
-                parts, _ = fnmatch._translate(part, f'{not_sep}*', not_sep)
-                results.extend(parts)
+                results.extend(_translate(part, f'{not_sep}*', not_sep))
             if idx < last_part_idx:
                 results.append(any_sep)
     res = ''.join(results)
     return fr'(?s:{res})\Z'
 
 
+def _translate(pat, STAR, QUESTION_MARK):
+    res = []
+    add = res.append
+    i, n = 0, len(pat)
+    while i < n:
+        c = pat[i]
+        i = i+1
+        if c == '*':
+            # compress consecutive `*` into one
+            if (not res) or res[-1] is not STAR:
+                add(STAR)
+        elif c == '?':
+            add(QUESTION_MARK)
+        elif c == '[':
+            j = i
+            if j < n and pat[j] == '!':
+                j = j+1
+            if j < n and pat[j] == ']':
+                j = j+1
+            while j < n and pat[j] != ']':
+                j = j+1
+            if j >= n:
+                add('\\[')
+            else:
+                stuff = pat[i:j]
+                if '-' not in stuff:
+                    stuff = stuff.replace('\\', r'\\')
+                else:
+                    chunks = []
+                    k = i+2 if pat[i] == '!' else i+1
+                    while True:
+                        k = pat.find('-', k, j)
+                        if k < 0:
+                            break
+                        chunks.append(pat[i:k])
+                        i = k+1
+                        k = k+3
+                    chunk = pat[i:j]
+                    if chunk:
+                        chunks.append(chunk)
+                    else:
+                        chunks[-1] += '-'
+                    # Remove empty ranges -- invalid in RE.
+                    for k in range(len(chunks)-1, 0, -1):
+                        if chunks[k-1][-1] > chunks[k][0]:
+                            chunks[k-1] = chunks[k-1][:-1] + chunks[k][1:]
+                            del chunks[k]
+                    # Escape backslashes and hyphens for set difference (--).
+                    # Hyphens that create ranges shouldn't be escaped.
+                    stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-')
+                                     for s in chunks)
+                # Escape set operations (&&, ~~ and ||).
+                stuff = re.sub(r'([&~|])', r'\\\1', stuff)
+                i = j+1
+                if not stuff:
+                    # Empty range: never match.
+                    add('(?!)')
+                elif stuff == '!':
+                    # Negated empty range: match any character.
+                    add('.')
+                else:
+                    if stuff[0] == '!':
+                        stuff = '^' + stuff[1:]
+                    elif stuff[0] in ('^', '['):
+                        stuff = '\\' + stuff
+                    add(f'[{stuff}]')
+        else:
+            add(re.escape(c))
+    assert i == n
+    return res
+
+
 @functools.lru_cache(maxsize=512)
 def _compile_pattern(pat, sep, case_sensitive, recursive=True):
     """Compile given glob pattern to a re.Pattern object (observing case

From 804da13fae3ee48f93a5064273af3401a3c8334d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Thu, 25 Jul 2024 18:48:18 +0200
Subject: [PATCH 06/16] actually not needed...

---
 Lib/glob.py | 74 +----------------------------------------------------
 1 file changed, 1 insertion(+), 73 deletions(-)

diff --git a/Lib/glob.py b/Lib/glob.py
index 6f060e359415de..fe082444c0ec2a 100644
--- a/Lib/glob.py
+++ b/Lib/glob.py
@@ -312,85 +312,13 @@ def translate(pat, *, recursive=False, include_hidden=False, seps=None):
             if part:
                 if not include_hidden and part[0] in '*?':
                     results.append(r'(?!\.)')
-                results.extend(_translate(part, f'{not_sep}*', not_sep))
+                results.extend(fnmatch._translate(part, f'{not_sep}*', not_sep)[0])
             if idx < last_part_idx:
                 results.append(any_sep)
     res = ''.join(results)
     return fr'(?s:{res})\Z'
 
 
-def _translate(pat, STAR, QUESTION_MARK):
-    res = []
-    add = res.append
-    i, n = 0, len(pat)
-    while i < n:
-        c = pat[i]
-        i = i+1
-        if c == '*':
-            # compress consecutive `*` into one
-            if (not res) or res[-1] is not STAR:
-                add(STAR)
-        elif c == '?':
-            add(QUESTION_MARK)
-        elif c == '[':
-            j = i
-            if j < n and pat[j] == '!':
-                j = j+1
-            if j < n and pat[j] == ']':
-                j = j+1
-            while j < n and pat[j] != ']':
-                j = j+1
-            if j >= n:
-                add('\\[')
-            else:
-                stuff = pat[i:j]
-                if '-' not in stuff:
-                    stuff = stuff.replace('\\', r'\\')
-                else:
-                    chunks = []
-                    k = i+2 if pat[i] == '!' else i+1
-                    while True:
-                        k = pat.find('-', k, j)
-                        if k < 0:
-                            break
-                        chunks.append(pat[i:k])
-                        i = k+1
-                        k = k+3
-                    chunk = pat[i:j]
-                    if chunk:
-                        chunks.append(chunk)
-                    else:
-                        chunks[-1] += '-'
-                    # Remove empty ranges -- invalid in RE.
-                    for k in range(len(chunks)-1, 0, -1):
-                        if chunks[k-1][-1] > chunks[k][0]:
-                            chunks[k-1] = chunks[k-1][:-1] + chunks[k][1:]
-                            del chunks[k]
-                    # Escape backslashes and hyphens for set difference (--).
-                    # Hyphens that create ranges shouldn't be escaped.
-                    stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-')
-                                     for s in chunks)
-                # Escape set operations (&&, ~~ and ||).
-                stuff = re.sub(r'([&~|])', r'\\\1', stuff)
-                i = j+1
-                if not stuff:
-                    # Empty range: never match.
-                    add('(?!)')
-                elif stuff == '!':
-                    # Negated empty range: match any character.
-                    add('.')
-                else:
-                    if stuff[0] == '!':
-                        stuff = '^' + stuff[1:]
-                    elif stuff[0] in ('^', '['):
-                        stuff = '\\' + stuff
-                    add(f'[{stuff}]')
-        else:
-            add(re.escape(c))
-    assert i == n
-    return res
-
-
 @functools.lru_cache(maxsize=512)
 def _compile_pattern(pat, sep, case_sensitive, recursive=True):
     """Compile given glob pattern to a re.Pattern object (observing case

From baa6ce37b3f5908d311e7342f7314896c07edb34 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Sat, 17 Aug 2024 12:44:44 +0200
Subject: [PATCH 07/16] reduce the number of calls to `str.join`

---
 Lib/fnmatch.py           | 25 +++++++++----------------
 Lib/test/test_fnmatch.py |  6 ++++--
 2 files changed, 13 insertions(+), 18 deletions(-)

diff --git a/Lib/fnmatch.py b/Lib/fnmatch.py
index 2d71478cf641f8..34899facba5f63 100644
--- a/Lib/fnmatch.py
+++ b/Lib/fnmatch.py
@@ -81,21 +81,18 @@ def translate(pat):
     return _join_translated_parts(parts, indices)
 
 _set_ops_re = re.compile(r'([&~|])')
+_re_escape = functools.lru_cache(maxsize=32768)(re.escape)
 
 def _translate(pat, STAR, QUESTION_MARK):
     res = []
     add = res.append
     indices = []
-    pending = []  # pending characters to escape
 
     i, n = 0, len(pat)
     while i < n:
         c = pat[i]
         i = i+1
         if c == '*':
-            if pending:
-                add(re.escape(''.join(pending)))
-                pending = []
             # store the position of the wildcard
             indices.append(len(res))
             add(STAR)
@@ -106,9 +103,6 @@ def _translate(pat, STAR, QUESTION_MARK):
             # Handling '?' one at a time seems to more efficient
             # even if there are consecutive '?' that could have
             # been written directly.
-            if pending:
-                add(re.escape(''.join(pending)))
-                pending = []
             add(QUESTION_MARK)
         elif c == '[':
             j = i
@@ -119,11 +113,8 @@ def _translate(pat, STAR, QUESTION_MARK):
             while j < n and pat[j] != ']':
                 j = j+1
             if j >= n:
-                pending.append('[')
+                add('\\[')
             else:
-                if pending:
-                    add(re.escape(''.join(pending)))
-                    pending = []
                 stuff = pat[i:j]
                 if '-' not in stuff:
                     stuff = stuff.replace('\\', r'\\')
@@ -167,9 +158,7 @@ def _translate(pat, STAR, QUESTION_MARK):
                         stuff = '\\' + stuff
                     add(f'[{stuff}]')
         else:
-            pending.append(c)
-    if pending:
-        add(re.escape(''.join(pending)))
+            add(_re_escape(c))
     assert i == n
     return res, indices
 
@@ -180,10 +169,14 @@ def _join_translated_parts(parts, indices):
     iter_indices = iter(indices)
     i, j = 0, next(iter_indices)
     buffer = parts[i:j]
+    append, extend = buffer.append, buffer.extend
     i = j + 1
     for j in iter_indices:
-        buffer.append(f'(?>.*?{"".join(parts[i:j])})')
+        append('(?>.*?')
+        extend(parts[i:j])
+        append(')')
         i = j + 1
-    buffer.append(f'.*{"".join(parts[i:])}')
+    append('.*')
+    extend(parts[i:])
     res = ''.join(buffer)
     return fr'(?s:{res})\Z'
diff --git a/Lib/test/test_fnmatch.py b/Lib/test/test_fnmatch.py
index df5d57322f4886..6dc3dc583070f6 100644
--- a/Lib/test/test_fnmatch.py
+++ b/Lib/test/test_fnmatch.py
@@ -309,9 +309,11 @@ def test_indices_locations(self):
 
         blocks = ['a^b', '***', '?', '?', '[a-z]', '[1-9]', '*', '++', '[[a']
         parts, indices = _translate(''.join(blocks), '*', '.')
-        expect_parts = [r'a\^b', '*', '.', '.', '[a-z]', '[1-9]', '*', r'\+\+\[\[a']
+        expect_parts = ['a', r'\^', 'b', '*',
+                        '.', '.', '[a-z]', '[1-9]', '*',
+                        r'\+', r'\+', r'\[', r'\[', 'a']
         self.assertListEqual(parts, expect_parts)
-        self.assertListEqual(indices, [1, 6])
+        self.assertListEqual(indices, [3, 8])
 
 
 class FilterTestCase(unittest.TestCase):

From 80b22e005087f23cac1551d0daabae4cc00f3f34 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Sun, 18 Aug 2024 12:09:32 +0200
Subject: [PATCH 08/16] micro-optimization on `re.sub`

---
 Lib/fnmatch.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Lib/fnmatch.py b/Lib/fnmatch.py
index 34899facba5f63..30b3de2c64b5a8 100644
--- a/Lib/fnmatch.py
+++ b/Lib/fnmatch.py
@@ -80,7 +80,7 @@ def translate(pat):
     parts, indices = _translate(pat, '*', '.')
     return _join_translated_parts(parts, indices)
 
-_set_ops_re = re.compile(r'([&~|])')
+_re_setops_sub = re.compile(r'([&~|])').sub
 _re_escape = functools.lru_cache(maxsize=32768)(re.escape)
 
 def _translate(pat, STAR, QUESTION_MARK):
@@ -151,7 +151,7 @@ def _translate(pat, STAR, QUESTION_MARK):
                     add('.')
                 else:
                     # Escape set operations (&&, ~~ and ||).
-                    stuff = _set_ops_re.sub(r'\\\1', stuff)
+                    stuff = _re_setops_sub(r'\\\1', stuff)
                     if stuff[0] == '!':
                         stuff = '^' + stuff[1:]
                     elif stuff[0] in ('^', '['):

From 7a9a87ce23a5599274a5426ac7103d001eb6c073 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Mon, 19 Aug 2024 09:25:45 +0200
Subject: [PATCH 09/16] address Barney's review

---
 Lib/fnmatch.py           | 11 +++++++++--
 Lib/test/test_fnmatch.py | 15 +++++++++------
 2 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/Lib/fnmatch.py b/Lib/fnmatch.py
index 30b3de2c64b5a8..483e1445f280df 100644
--- a/Lib/fnmatch.py
+++ b/Lib/fnmatch.py
@@ -167,11 +167,18 @@ def _join_translated_parts(parts, indices):
     if not indices:
         return fr'(?s:{"".join(parts)})\Z'
     iter_indices = iter(indices)
-    i, j = 0, next(iter_indices)
-    buffer = parts[i:j]
+    j = next(iter_indices)
+    buffer = parts[:j]  # fixed pieces at the start
     append, extend = buffer.append, buffer.extend
     i = j + 1
     for j in iter_indices:
+        # Now deal with STAR fixed STAR fixed ...
+        # For an interior `STAR fixed` pairing, we want to do a minimal
+        # .*? match followed by `fixed`, with no possibility of backtracking.
+        # Atomic groups ("(?>...)") allow us to spell that directly.
+        # Note: people rely on the undocumented ability to join multiple
+        # translate() results together via "|" to build large regexps matching
+        # "one of many" shell patterns.
         append('(?>.*?')
         extend(parts[i:j])
         append(')')
diff --git a/Lib/test/test_fnmatch.py b/Lib/test/test_fnmatch.py
index 6dc3dc583070f6..68526cf11ed318 100644
--- a/Lib/test/test_fnmatch.py
+++ b/Lib/test/test_fnmatch.py
@@ -260,8 +260,9 @@ def test_translate_wildcards(self):
             ('ab*cd*12*34', r'(?s:ab(?>.*?cd)(?>.*?12).*34)\Z'),
             ('ab*cd*12*34*', r'(?s:ab(?>.*?cd)(?>.*?12)(?>.*?34).*)\Z'),
         ]:
-            translated = translate(pattern)
-            self.assertEqual(translated, expect, pattern)
+            with self.subTest(pattern):
+                translated = translate(pattern)
+                self.assertEqual(translated, expect, pattern)
 
         for pattern, expect in [
             ('*ab', r'(?s:.*ab)\Z'),
@@ -273,8 +274,9 @@ def test_translate_wildcards(self):
             ('*ab*cd*12*34', r'(?s:(?>.*?ab)(?>.*?cd)(?>.*?12).*34)\Z'),
             ('*ab*cd*12*34*', r'(?s:(?>.*?ab)(?>.*?cd)(?>.*?12)(?>.*?34).*)\Z'),
         ]:
-            translated = translate(pattern)
-            self.assertEqual(translated, expect, pattern)
+            with self.subTest(pattern):
+                translated = translate(pattern)
+                self.assertEqual(translated, expect, pattern)
 
     def test_translate_expressions(self):
         for pattern, expect in [
@@ -301,8 +303,9 @@ def test_translate_expressions(self):
             (r'[\]', r'(?s:[\\])\Z'),
             (r'[\\]', r'(?s:[\\\\])\Z'),
         ]:
-            translated = translate(pattern)
-            self.assertEqual(translated, expect, pattern)
+            with self.subTest(pattern):
+                translated = translate(pattern)
+                self.assertEqual(translated, expect, pattern)
 
     def test_indices_locations(self):
         from fnmatch import _translate

From 90539bcd17d3bb3eaf65cbfa18be236a88accc94 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Fri, 23 Aug 2024 10:50:29 +0200
Subject: [PATCH 10/16] Update
 Misc/NEWS.d/next/Library/2024-07-25-18-06-51.gh-issue-122288.-_xxOR.rst

---
 .../next/Library/2024-07-25-18-06-51.gh-issue-122288.-_xxOR.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Misc/NEWS.d/next/Library/2024-07-25-18-06-51.gh-issue-122288.-_xxOR.rst b/Misc/NEWS.d/next/Library/2024-07-25-18-06-51.gh-issue-122288.-_xxOR.rst
index 830a4c21c73e1c..26a18afca945d9 100644
--- a/Misc/NEWS.d/next/Library/2024-07-25-18-06-51.gh-issue-122288.-_xxOR.rst
+++ b/Misc/NEWS.d/next/Library/2024-07-25-18-06-51.gh-issue-122288.-_xxOR.rst
@@ -1,2 +1,2 @@
-Improve the performances of :func:`fnmatch.translate` by a factor 1.3. Patch
+Improve the performances of :func:`fnmatch.translate` by a factor 1.7. Patch
 by Bénédikt Tran.

From 1d52949cc8300437001c8342cb27441f1d8e3cbe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Tue, 27 Aug 2024 19:06:53 +0200
Subject: [PATCH 11/16] use lower-case parameter names

---
 Lib/fnmatch.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Lib/fnmatch.py b/Lib/fnmatch.py
index 483e1445f280df..f1a841305b8a0a 100644
--- a/Lib/fnmatch.py
+++ b/Lib/fnmatch.py
@@ -83,7 +83,7 @@ def translate(pat):
 _re_setops_sub = re.compile(r'([&~|])').sub
 _re_escape = functools.lru_cache(maxsize=32768)(re.escape)
 
-def _translate(pat, STAR, QUESTION_MARK):
+def _translate(pat, star, question_mark):
     res = []
     add = res.append
     indices = []
@@ -95,7 +95,7 @@ def _translate(pat, STAR, QUESTION_MARK):
         if c == '*':
             # store the position of the wildcard
             indices.append(len(res))
-            add(STAR)
+            add(star)
             # compress consecutive `*` into one
             while i < n and pat[i] == '*':
                 i += 1
@@ -103,7 +103,7 @@ def _translate(pat, STAR, QUESTION_MARK):
             # Handling '?' one at a time seems to more efficient
             # even if there are consecutive '?' that could have
             # been written directly.
-            add(QUESTION_MARK)
+            add(question_mark)
         elif c == '[':
             j = i
             if j < n and pat[j] == '!':

From 02264371c101929b1e7774ce0c103127887c0bb2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Wed, 28 Aug 2024 11:35:33 +0200
Subject: [PATCH 12/16] rename variable `indices` to `star_indices`

---
 Lib/fnmatch.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/Lib/fnmatch.py b/Lib/fnmatch.py
index f1a841305b8a0a..18fcda79f2608e 100644
--- a/Lib/fnmatch.py
+++ b/Lib/fnmatch.py
@@ -86,7 +86,7 @@ def translate(pat):
 def _translate(pat, star, question_mark):
     res = []
     add = res.append
-    indices = []
+    star_indices = []
 
     i, n = 0, len(pat)
     while i < n:
@@ -94,7 +94,7 @@ def _translate(pat, star, question_mark):
         i = i+1
         if c == '*':
             # store the position of the wildcard
-            indices.append(len(res))
+            star_indices.append(len(res))
             add(star)
             # compress consecutive `*` into one
             while i < n and pat[i] == '*':
@@ -160,18 +160,18 @@ def _translate(pat, star, question_mark):
         else:
             add(_re_escape(c))
     assert i == n
-    return res, indices
+    return res, star_indices
 
 
-def _join_translated_parts(parts, indices):
-    if not indices:
+def _join_translated_parts(parts, star_indices):
+    if not star_indices:
         return fr'(?s:{"".join(parts)})\Z'
-    iter_indices = iter(indices)
-    j = next(iter_indices)
+    iter_star_indices = iter(star_indices)
+    j = next(iter_star_indices)
     buffer = parts[:j]  # fixed pieces at the start
     append, extend = buffer.append, buffer.extend
     i = j + 1
-    for j in iter_indices:
+    for j in iter_star_indices:
         # Now deal with STAR fixed STAR fixed ...
         # For an interior `STAR fixed` pairing, we want to do a minimal
         # .*? match followed by `fixed`, with no possibility of backtracking.

From 01a51734dcd8031935432fb36997353e764fd616 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Wed, 28 Aug 2024 11:35:48 +0200
Subject: [PATCH 13/16] remove ambiguous comment about '?' case

---
 Lib/fnmatch.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/Lib/fnmatch.py b/Lib/fnmatch.py
index 18fcda79f2608e..5b95e6c8e6f01a 100644
--- a/Lib/fnmatch.py
+++ b/Lib/fnmatch.py
@@ -100,9 +100,6 @@ def _translate(pat, star, question_mark):
             while i < n and pat[i] == '*':
                 i += 1
         elif c == '?':
-            # Handling '?' one at a time seems to more efficient
-            # even if there are consecutive '?' that could have
-            # been written directly.
             add(question_mark)
         elif c == '[':
             j = i

From bb6c3eef37c79f7544e2d5c38c6f7ee19417ad52 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Wed, 28 Aug 2024 11:36:48 +0200
Subject: [PATCH 14/16] change the cache size for `re.escape`

The rationale for this change is as follows:

re.escape() is only used to cache single Unicode characters
in shell patterns; we may heuristically assume that they are
ISO-8859-1 encodable, thereby requiring a cache of size 256.
To allow non-traditional glyphs (or alphabets with a small
number of common glyphs), we double the cache size.
---
 Lib/fnmatch.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Lib/fnmatch.py b/Lib/fnmatch.py
index 5b95e6c8e6f01a..31bc2aa7585ccf 100644
--- a/Lib/fnmatch.py
+++ b/Lib/fnmatch.py
@@ -81,7 +81,7 @@ def translate(pat):
     return _join_translated_parts(parts, indices)
 
 _re_setops_sub = re.compile(r'([&~|])').sub
-_re_escape = functools.lru_cache(maxsize=32768)(re.escape)
+_re_escape = functools.lru_cache(maxsize=512)(re.escape)
 
 def _translate(pat, star, question_mark):
     res = []

From c14ce4f7abc18ca5c427c1c3da4d4131f3f3984e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Fri, 18 Oct 2024 05:14:30 +0200
Subject: [PATCH 15/16] Update Lib/fnmatch.py

Co-authored-by: Barney Gale <barney.gale@gmail.com>
---
 Lib/fnmatch.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Lib/fnmatch.py b/Lib/fnmatch.py
index 31bc2aa7585ccf..865baea23467ea 100644
--- a/Lib/fnmatch.py
+++ b/Lib/fnmatch.py
@@ -77,8 +77,8 @@ def translate(pat):
     There is no way to quote meta-characters.
     """
 
-    parts, indices = _translate(pat, '*', '.')
-    return _join_translated_parts(parts, indices)
+    parts, star_indices = _translate(pat, '*', '.')
+    return _join_translated_parts(parts, star_indices)
 
 _re_setops_sub = re.compile(r'([&~|])').sub
 _re_escape = functools.lru_cache(maxsize=512)(re.escape)

From 38d342701d84370b2b2f713934f848d153f091ac Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?=
 <10796600+picnixz@users.noreply.github.com>
Date: Tue, 22 Oct 2024 14:34:55 +0200
Subject: [PATCH 16/16] Update Lib/test/test_fnmatch.py

---
 Lib/test/test_fnmatch.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Lib/test/test_fnmatch.py b/Lib/test/test_fnmatch.py
index 68526cf11ed318..9f360e1dc10f47 100644
--- a/Lib/test/test_fnmatch.py
+++ b/Lib/test/test_fnmatch.py
@@ -307,16 +307,16 @@ def test_translate_expressions(self):
                 translated = translate(pattern)
                 self.assertEqual(translated, expect, pattern)
 
-    def test_indices_locations(self):
+    def test_star_indices_locations(self):
         from fnmatch import _translate
 
         blocks = ['a^b', '***', '?', '?', '[a-z]', '[1-9]', '*', '++', '[[a']
-        parts, indices = _translate(''.join(blocks), '*', '.')
+        parts, star_indices = _translate(''.join(blocks), '*', '.')
         expect_parts = ['a', r'\^', 'b', '*',
                         '.', '.', '[a-z]', '[1-9]', '*',
                         r'\+', r'\+', r'\[', r'\[', 'a']
         self.assertListEqual(parts, expect_parts)
-        self.assertListEqual(indices, [3, 8])
+        self.assertListEqual(star_indices, [3, 8])
 
 
 class FilterTestCase(unittest.TestCase):