From 9c7d2b9d83e0ad4e8ffeb8535d8c0a31a0a6a0b8 Mon Sep 17 00:00:00 2001
From: Protobuf Team Bot <protobuf-github-bot@google.com>
Date: Thu, 14 Dec 2023 09:19:05 -0800
Subject: [PATCH] Make the utf8_range implementation just in C

PiperOrigin-RevId: 590961088
---
 php/ext/google/protobuf/config.m4       |   2 +-
 python/convert.c                        |   2 +-
 ruby/.gitignore                         |   4 +-
 ruby/Rakefile                           |   2 +-
 ruby/ext/google/protobuf_c/extconf.rb   |   2 +-
 ruby/lib/google/tasks/ffi.rake          |   4 +-
 third_party/utf8_range/BUILD.bazel      |  15 +-
 third_party/utf8_range/CMakeLists.txt   |   6 +-
 third_party/utf8_range/utf8_range.c     | 467 ++++++++++++++++++++++++
 third_party/utf8_range/utf8_range.h     |  17 +-
 third_party/utf8_range/utf8_validity.cc | 432 +---------------------
 third_party/utf8_range/utf8_validity.h  |   2 +
 upb/wire/internal/decode.h              |  21 +-
 13 files changed, 499 insertions(+), 477 deletions(-)
 create mode 100644 third_party/utf8_range/utf8_range.c

diff --git a/php/ext/google/protobuf/config.m4 b/php/ext/google/protobuf/config.m4
index c5a665b46770..5e5fbf6c083f 100644
--- a/php/ext/google/protobuf/config.m4
+++ b/php/ext/google/protobuf/config.m4
@@ -4,7 +4,7 @@ if test "$PHP_PROTOBUF" != "no"; then
 
   PHP_NEW_EXTENSION(
     protobuf,
-    arena.c array.c convert.c def.c map.c message.c names.c php-upb.c protobuf.c third_party/utf8_range/naive.c third_party/utf8_range/range2-neon.c third_party/utf8_range/range2-sse.c,
+    arena.c array.c convert.c def.c map.c message.c names.c php-upb.c protobuf.c third_party/utf8_range/utf8_range.c,
     $ext_shared, , -std=gnu99 -I@ext_srcdir@/third_party/utf8_range)
   PHP_ADD_BUILD_DIR($ext_builddir/third_party/utf8_range)
 
diff --git a/python/convert.c b/python/convert.c
index 2105c98a634b..0b26bdcabf57 100644
--- a/python/convert.c
+++ b/python/convert.c
@@ -241,7 +241,7 @@ bool PyUpb_PyToUpb(PyObject* obj, const upb_FieldDef* f, upb_MessageValue* val,
         // Use the object's bytes if they are valid UTF-8.
         char* ptr;
         if (PyBytes_AsStringAndSize(obj, &ptr, &size) < 0) return false;
-        if (utf8_range2((const unsigned char*)ptr, size) != 0) {
+        if (!utf8_range_IsValid(ptr, size)) {
           // Invalid UTF-8.  Try to convert the message to a Python Unicode
           // object, even though we know this will fail, just to get the
           // idiomatic Python error message.
diff --git a/ruby/.gitignore b/ruby/.gitignore
index 143b48e92c87..555af6ccb08b 100644
--- a/ruby/.gitignore
+++ b/ruby/.gitignore
@@ -8,8 +8,6 @@ pkg/
 tmp/
 tests/google/
 ext/google/protobuf_c/third_party/utf8_range/utf8_range.h
-ext/google/protobuf_c/third_party/utf8_range/range2-sse.c
-ext/google/protobuf_c/third_party/utf8_range/range2-neon.c
-ext/google/protobuf_c/third_party/utf8_range/naive.c
+ext/google/protobuf_c/third_party/utf8_range/utf8_range.c
 ext/google/protobuf_c/third_party/utf8_range/LICENSE
 lib/google/protobuf/*_pb.rb
\ No newline at end of file
diff --git a/ruby/Rakefile b/ruby/Rakefile
index 860bbc35feb3..9b50e6cb65ef 100644
--- a/ruby/Rakefile
+++ b/ruby/Rakefile
@@ -75,7 +75,7 @@ task :copy_third_party do
     # We need utf8_range in-tree.
     utf8_root = '../third_party/utf8_range'
     %w[
-      utf8_range.h naive.c range2-neon.c range2-neon.c range2-sse.c LICENSE
+      utf8_range.h utf8_range.c LICENSE
     ].each do |file|
       FileUtils.cp File.join(utf8_root, file),
                    "ext/google/protobuf_c/third_party/utf8_range"
diff --git a/ruby/ext/google/protobuf_c/extconf.rb b/ruby/ext/google/protobuf_c/extconf.rb
index 4bb49bb21570..ed812c954945 100755
--- a/ruby/ext/google/protobuf_c/extconf.rb
+++ b/ruby/ext/google/protobuf_c/extconf.rb
@@ -22,7 +22,7 @@
 
 $srcs = ["protobuf.c", "convert.c", "defs.c", "message.c",
          "repeated_field.c", "map.c", "ruby-upb.c", "wrap_memcpy.c",
-         "naive.c", "range2-neon.c", "range2-sse.c", "shared_convert.c",
+         "utf8_range.c", "shared_convert.c",
          "shared_message.c"]
 
 create_makefile(ext_name)
diff --git a/ruby/lib/google/tasks/ffi.rake b/ruby/lib/google/tasks/ffi.rake
index c7b2a8e5470d..5de10a7532b1 100644
--- a/ruby/lib/google/tasks/ffi.rake
+++ b/ruby/lib/google/tasks/ffi.rake
@@ -74,9 +74,7 @@ begin
       FFI::Compiler::CompileTask.new 'protobuf_c_ffi' do |c|
         configure_common_compile_task c
         # Ruby UPB was already compiled with different flags.
-        c.exclude << "/range2-neon.c"
-        c.exclude << "/range2-sse.c"
-        c.exclude << "/naive.c"
+        c.exclude << "/utf8_range.c"
         c.exclude << "/ruby-upb.c"
       end
 
diff --git a/third_party/utf8_range/BUILD.bazel b/third_party/utf8_range/BUILD.bazel
index 439faaa2564e..d24e8a14314c 100644
--- a/third_party/utf8_range/BUILD.bazel
+++ b/third_party/utf8_range/BUILD.bazel
@@ -23,9 +23,7 @@ exports_files([
 filegroup(
     name = "utf8_range_srcs",
     srcs = [
-        "naive.c",
-        "range2-neon.c",
-        "range2-sse.c",
+        "utf8_range.c",
         "utf8_range.h",
     ],
     visibility = ["//:__subpackages__"],
@@ -34,9 +32,7 @@ filegroup(
 cc_library(
     name = "utf8_range",
     srcs = [
-        "naive.c",
-        "range2-neon.c",
-        "range2-sse.c",
+        "utf8_range.c",
     ],
     hdrs = ["utf8_range.h"],
     strip_include_prefix = "/third_party/utf8_range",
@@ -48,14 +44,19 @@ cc_library(
     hdrs = ["utf8_validity.h"],
     strip_include_prefix = "/third_party/utf8_range",
     deps = [
+        ":utf8_range",
         "@com_google_absl//absl/strings",
     ],
 )
 
 cc_test(
     name = "utf8_validity_test",
-    srcs = ["utf8_validity_test.cc"],
+    srcs = [
+        "utf8_range.c",
+        "utf8_validity_test.cc",
+    ],
     deps = [
+        ":utf8_range",
         ":utf8_validity",
         "@com_google_absl//absl/strings",
         "@com_google_googletest//:gtest_main",
diff --git a/third_party/utf8_range/CMakeLists.txt b/third_party/utf8_range/CMakeLists.txt
index 344952d38cf5..8d7a6e15c6c0 100644
--- a/third_party/utf8_range/CMakeLists.txt
+++ b/third_party/utf8_range/CMakeLists.txt
@@ -12,14 +12,12 @@ option (utf8_range_ENABLE_INSTALL "Configure installation" ON)
 ##
 # Create the lightweight C library
 add_library (utf8_range STATIC
-  naive.c
-  range2-neon.c
-  range2-sse.c
+  utf8_range.c
 )
 
 ##
 # A heavier-weight C++ wrapper that supports Abseil.
-add_library (utf8_validity STATIC utf8_validity.cc)
+add_library (utf8_validity STATIC utf8_validity.cc utf8_range.c)
 
 # Load Abseil dependency.
 if (NOT TARGET absl::strings)
diff --git a/third_party/utf8_range/utf8_range.c b/third_party/utf8_range/utf8_range.c
new file mode 100644
index 000000000000..9564b07e0333
--- /dev/null
+++ b/third_party/utf8_range/utf8_range.c
@@ -0,0 +1,467 @@
+// Copyright 2023 Google LLC
+//
+// Use of this source code is governed by an MIT-style
+// license that can be found in the LICENSE file or at
+// https://opensource.org/licenses/MIT.
+
+/* This is a wrapper for the Google range-sse.cc algorithm which checks whether
+ * a sequence of bytes is a valid UTF-8 sequence and finds the longest valid
+ * prefix of the UTF-8 sequence.
+ *
+ * The key difference is that it checks for as much ASCII symbols as possible
+ * and then falls back to the range-sse.cc algorithm. The changes to the
+ * algorithm are cosmetic, mostly to trick the clang compiler to produce optimal
+ * code.
+ *
+ * For API see the utf8_validity.h header.
+ */
+#include "utf8_range.h"
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#ifdef __SSE4_1__
+#include <emmintrin.h>
+#include <smmintrin.h>
+#include <tmmintrin.h>
+#endif
+
+#if defined(__GNUC__)
+#define FORCE_INLINE_ATTR __attribute__((always_inline))
+#elif defined(_MSC_VER)
+#define FORCE_INLINE_ATTR __forceinline
+#else
+#define FORCE_INLINE_ATTR
+#endif
+
+static FORCE_INLINE_ATTR inline uint64_t utf8_range_UnalignedLoad64(
+    const void* p) {
+  uint64_t t;
+  memcpy(&t, p, sizeof t);
+  return t;
+}
+
+static FORCE_INLINE_ATTR inline int utf8_range_AsciiIsAscii(unsigned char c) {
+  return c < 128;
+}
+
+static FORCE_INLINE_ATTR inline int utf8_range_IsTrailByteOk(const char c) {
+  return (int8_t)(c) <= (int8_t)(0xBF);
+}
+
+/* If return_position is false then it returns 1 if |data| is a valid utf8
+ * sequence, otherwise returns 0.
+ * If return_position is set to true, returns the length in bytes of the prefix
+   of |data| that is all structurally valid UTF-8.
+ */
+static size_t utf8_range_ValidateUTF8Naive(const char* data, const char* end,
+                                           int return_position) {
+  /* We return err_pos in the loop which is always 0 if !return_position */
+  size_t err_pos = 0;
+  size_t codepoint_bytes = 0;
+  /* The early check is done because of early continue's on codepoints of all
+   * sizes, i.e. we first check for ascii and if it is, we call continue, then
+   * for 2 byte codepoints, etc. This is done in order to reduce indentation and
+   * improve readability of the codepoint validity check.
+   */
+  while (data + codepoint_bytes < end) {
+    if (return_position) {
+      err_pos += codepoint_bytes;
+    }
+    data += codepoint_bytes;
+    const size_t len = end - data;
+    const unsigned char byte1 = data[0];
+
+    /* We do not skip many ascii bytes at the same time as this function is
+       used for tail checking (< 16 bytes) and for non x86 platforms. We also
+       don't think that cases where non-ASCII codepoints are followed by ascii
+       happen often. For small strings it also introduces some penalty. For
+       purely ascii UTF8 strings (which is the overwhelming case) we call
+       SkipAscii function which is multiplatform and extremely fast.
+     */
+    /* [00..7F] ASCII -> 1 byte */
+    if (utf8_range_AsciiIsAscii(byte1)) {
+      codepoint_bytes = 1;
+      continue;
+    }
+    /* [C2..DF], [80..BF] -> 2 bytes */
+    if (len >= 2 && byte1 >= 0xC2 && byte1 <= 0xDF &&
+        utf8_range_IsTrailByteOk(data[1])) {
+      codepoint_bytes = 2;
+      continue;
+    }
+    if (len >= 3) {
+      const unsigned char byte2 = data[1];
+      const unsigned char byte3 = data[2];
+
+      /* Is byte2, byte3 between [0x80, 0xBF]
+       * Check for 0x80 was done above.
+       */
+      if (!utf8_range_IsTrailByteOk(byte2) ||
+          !utf8_range_IsTrailByteOk(byte3)) {
+        return err_pos;
+      }
+
+      if (/* E0, A0..BF, 80..BF */
+          ((byte1 == 0xE0 && byte2 >= 0xA0) ||
+           /* E1..EC, 80..BF, 80..BF */
+           (byte1 >= 0xE1 && byte1 <= 0xEC) ||
+           /* ED, 80..9F, 80..BF */
+           (byte1 == 0xED && byte2 <= 0x9F) ||
+           /* EE..EF, 80..BF, 80..BF */
+           (byte1 >= 0xEE && byte1 <= 0xEF))) {
+        codepoint_bytes = 3;
+        continue;
+      }
+      if (len >= 4) {
+        const unsigned char byte4 = data[3];
+        /* Is byte4 between 0x80 ~ 0xBF */
+        if (!utf8_range_IsTrailByteOk(byte4)) {
+          return err_pos;
+        }
+
+        if (/* F0, 90..BF, 80..BF, 80..BF */
+            ((byte1 == 0xF0 && byte2 >= 0x90) ||
+             /* F1..F3, 80..BF, 80..BF, 80..BF */
+             (byte1 >= 0xF1 && byte1 <= 0xF3) ||
+             /* F4, 80..8F, 80..BF, 80..BF */
+             (byte1 == 0xF4 && byte2 <= 0x8F))) {
+          codepoint_bytes = 4;
+          continue;
+        }
+      }
+    }
+    return err_pos;
+  }
+  if (return_position) {
+    err_pos += codepoint_bytes;
+  }
+  /* if return_position is false, this returns 1.
+   * if return_position is true, this returns err_pos.
+   */
+  return err_pos + (1 - return_position);
+}
+
+#ifdef __SSE4_1__
+/* Returns the number of bytes needed to skip backwards to get to the first
+   byte of codepoint.
+ */
+static inline int utf8_range_CodepointSkipBackwards(int32_t codepoint_word) {
+  const int8_t* const codepoint = (const int8_t*)(&codepoint_word);
+  if (!utf8_range_IsTrailByteOk(codepoint[3])) {
+    return 1;
+  } else if (!utf8_range_IsTrailByteOk(codepoint[2])) {
+    return 2;
+  } else if (!utf8_range_IsTrailByteOk(codepoint[1])) {
+    return 3;
+  }
+  return 0;
+}
+#endif  // __SSE4_1__
+
+/* Skipping over ASCII as much as possible, per 8 bytes. It is intentional
+   as most strings to check for validity consist only of 1 byte codepoints.
+ */
+static inline const char* utf8_range_SkipAscii(const char* data,
+                                               const char* end) {
+  while (8 <= end - data &&
+         (utf8_range_UnalignedLoad64(data) & 0x8080808080808080) == 0) {
+    data += 8;
+  }
+  while (data < end && utf8_range_AsciiIsAscii(*data)) {
+    ++data;
+  }
+  return data;
+}
+
+static FORCE_INLINE_ATTR inline size_t utf8_range_Validate(
+    const char* data, size_t len, int return_position) {
+  if (len == 0) return 1 - return_position;
+  const char* const end = data + len;
+  data = utf8_range_SkipAscii(data, end);
+  /* SIMD algorithm always outperforms the naive version for any data of
+     length >=16.
+   */
+  if (end - data < 16) {
+    return (return_position ? (data - (end - len)) : 0) +
+           utf8_range_ValidateUTF8Naive(data, end, return_position);
+  }
+#ifndef __SSE4_1__
+  return (return_position ? (data - (end - len)) : 0) +
+         utf8_range_ValidateUTF8Naive(data, end, return_position);
+#else
+  /* This code checks that utf-8 ranges are structurally valid 16 bytes at once
+   * using superscalar instructions.
+   * The mapping between ranges of codepoint and their corresponding utf-8
+   * sequences is below.
+   */
+
+  /*
+   * U+0000...U+007F     00...7F
+   * U+0080...U+07FF     C2...DF 80...BF
+   * U+0800...U+0FFF     E0      A0...BF 80...BF
+   * U+1000...U+CFFF     E1...EC 80...BF 80...BF
+   * U+D000...U+D7FF     ED      80...9F 80...BF
+   * U+E000...U+FFFF     EE...EF 80...BF 80...BF
+   * U+10000...U+3FFFF   F0      90...BF 80...BF 80...BF
+   * U+40000...U+FFFFF   F1...F3 80...BF 80...BF 80...BF
+   * U+100000...U+10FFFF F4      80...8F 80...BF 80...BF
+   */
+
+  /* First we compute the type for each byte, as given by the table below.
+   * This type will be used as an index later on.
+   */
+
+  /*
+   * Index  Min Max Byte Type
+   *  0     00  7F  Single byte sequence
+   *  1,2,3 80  BF  Second, third and fourth byte for many of the sequences.
+   *  4     A0  BF  Second byte after E0
+   *  5     80  9F  Second byte after ED
+   *  6     90  BF  Second byte after F0
+   *  7     80  8F  Second byte after F4
+   *  8     C2  F4  First non ASCII byte
+   *  9..15 7F  80  Invalid byte
+   */
+
+  /* After the first step we compute the index for all bytes, then we permute
+     the bytes according to their indices to check the ranges from the range
+     table.
+   * The range for a given type can be found in the range_min_table and
+     range_max_table, the range for type/index X is in range_min_table[X] ...
+     range_max_table[X].
+   */
+
+  /* Algorithm:
+   * Put index zero to all bytes.
+   * Find all non ASCII characters, give them index 8.
+   * For each tail byte in a codepoint sequence, give it an index corresponding
+     to the 1 based index from the end.
+   * If the first byte of the codepoint is in the [C0...DF] range, we write
+     index 1 in the following byte.
+   * If the first byte of the codepoint is in the range [E0...EF], we write
+     indices 2 and 1 in the next two bytes.
+   * If the first byte of the codepoint is in the range [F0...FF] we write
+     indices 3,2,1 into the next three bytes.
+   * For finding the number of bytes we need to look at high nibbles (4 bits)
+     and do the lookup from the table, it can be done with shift by 4 + shuffle
+     instructions. We call it `first_len`.
+   * Then we shift first_len by 8 bits to get the indices of the 2nd bytes.
+   * Saturating sub 1 and shift by 8 bits to get the indices of the 3rd bytes.
+   * Again to get the indices of the 4th bytes.
+   * Take OR of all that 4 values and check within range.
+   */
+  /* For example:
+   * input       C3 80 68 E2 80 20 A6 F0 A0 80 AC 20 F0 93 80 80
+   * first_len   1  0  0  2  0  0  0  3  0  0  0  0  3  0  0  0
+   * 1st byte    8  0  0  8  0  0  0  8  0  0  0  0  8  0  0  0
+   * 2nd byte    0  1  0  0  2  0  0  0  3  0  0  0  0  3  0  0 // Shift + sub
+   * 3rd byte    0  0  0  0  0  1  0  0  0  2  0  0  0  0  2  0 // Shift + sub
+   * 4th byte    0  0  0  0  0  0  0  0  0  0  1  0  0  0  0  1 // Shift + sub
+   * Index       8  1  0  8  2  1  0  8  3  2  1  0  8  3  2  1 // OR of results
+   */
+
+  /* Checking for errors:
+   * Error checking is done by looking up the high nibble (4 bits) of each byte
+     against an error checking table.
+   * Because the lookup value for the second byte depends of the value of the
+     first byte in codepoint, we use saturated operations to adjust the index.
+   * Specifically we need to add 2 for E0, 3 for ED, 3 for F0 and 4 for F4 to
+     match the correct index.
+       * If we subtract from all bytes EF then EO -> 241, ED -> 254, F0 -> 1,
+         F4 -> 5
+       * Do saturating sub 240, then E0 -> 1, ED -> 14 and we can do lookup to
+         match the adjustment
+       * Add saturating 112, then F0 -> 113, F4 -> 117, all that were > 16 will
+         be more 128 and lookup in ef_fe_table will return 0 but for F0
+         and F4 it will be 4 and 5 accordingly
+   */
+  /*
+   * Then just check the appropriate ranges with greater/smaller equal
+     instructions. Check tail with a naive algorithm.
+   * To save from previous 16 byte checks we just align previous_first_len to
+     get correct continuations of the codepoints.
+   */
+
+  /*
+   * Map high nibble of "First Byte" to legal character length minus 1
+   * 0x00 ~ 0xBF --> 0
+   * 0xC0 ~ 0xDF --> 1
+   * 0xE0 ~ 0xEF --> 2
+   * 0xF0 ~ 0xFF --> 3
+   */
+  const __m128i first_len_table =
+      _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 3);
+
+  /* Map "First Byte" to 8-th item of range table (0xC2 ~ 0xF4) */
+  const __m128i first_range_table =
+      _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8);
+
+  /*
+   * Range table, map range index to min and max values
+   */
+  const __m128i range_min_table =
+      _mm_setr_epi8(0x00, 0x80, 0x80, 0x80, 0xA0, 0x80, 0x90, 0x80, 0xC2, 0x7F,
+                    0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F);
+
+  const __m128i range_max_table =
+      _mm_setr_epi8(0x7F, 0xBF, 0xBF, 0xBF, 0xBF, 0x9F, 0xBF, 0x8F, 0xF4, 0x80,
+                    0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
+
+  /*
+   * Tables for fast handling of four special First Bytes(E0,ED,F0,F4), after
+   * which the Second Byte are not 80~BF. It contains "range index adjustment".
+   * +------------+---------------+------------------+----------------+
+   * | First Byte | original range| range adjustment | adjusted range |
+   * +------------+---------------+------------------+----------------+
+   * | E0         | 2             | 2                | 4              |
+   * +------------+---------------+------------------+----------------+
+   * | ED         | 2             | 3                | 5              |
+   * +------------+---------------+------------------+----------------+
+   * | F0         | 3             | 3                | 6              |
+   * +------------+---------------+------------------+----------------+
+   * | F4         | 4             | 4                | 8              |
+   * +------------+---------------+------------------+----------------+
+   */
+
+  /* df_ee_table[1] -> E0, df_ee_table[14] -> ED as ED - E0 = 13 */
+  // The values represent the adjustment in the Range Index table for a correct
+  // index.
+  const __m128i df_ee_table =
+      _mm_setr_epi8(0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0);
+
+  /* ef_fe_table[1] -> F0, ef_fe_table[5] -> F4, F4 - F0 = 4 */
+  // The values represent the adjustment in the Range Index table for a correct
+  // index.
+  const __m128i ef_fe_table =
+      _mm_setr_epi8(0, 3, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+
+  __m128i prev_input = _mm_set1_epi8(0);
+  __m128i prev_first_len = _mm_set1_epi8(0);
+  __m128i error = _mm_set1_epi8(0);
+  while (end - data >= 16) {
+    const __m128i input =
+        _mm_loadu_si128((const __m128i*)(data));
+
+    /* high_nibbles = input >> 4 */
+    const __m128i high_nibbles =
+        _mm_and_si128(_mm_srli_epi16(input, 4), _mm_set1_epi8(0x0F));
+
+    /* first_len = legal character length minus 1 */
+    /* 0 for 00~7F, 1 for C0~DF, 2 for E0~EF, 3 for F0~FF */
+    /* first_len = first_len_table[high_nibbles] */
+    __m128i first_len = _mm_shuffle_epi8(first_len_table, high_nibbles);
+
+    /* First Byte: set range index to 8 for bytes within 0xC0 ~ 0xFF */
+    /* range = first_range_table[high_nibbles] */
+    __m128i range = _mm_shuffle_epi8(first_range_table, high_nibbles);
+
+    /* Second Byte: set range index to first_len */
+    /* 0 for 00~7F, 1 for C0~DF, 2 for E0~EF, 3 for F0~FF */
+    /* range |= (first_len, prev_first_len) << 1 byte */
+    range = _mm_or_si128(range, _mm_alignr_epi8(first_len, prev_first_len, 15));
+
+    /* Third Byte: set range index to saturate_sub(first_len, 1) */
+    /* 0 for 00~7F, 0 for C0~DF, 1 for E0~EF, 2 for F0~FF */
+    __m128i tmp1;
+    __m128i tmp2;
+    /* tmp1 = saturate_sub(first_len, 1) */
+    tmp1 = _mm_subs_epu8(first_len, _mm_set1_epi8(1));
+    /* tmp2 = saturate_sub(prev_first_len, 1) */
+    tmp2 = _mm_subs_epu8(prev_first_len, _mm_set1_epi8(1));
+    /* range |= (tmp1, tmp2) << 2 bytes */
+    range = _mm_or_si128(range, _mm_alignr_epi8(tmp1, tmp2, 14));
+
+    /* Fourth Byte: set range index to saturate_sub(first_len, 2) */
+    /* 0 for 00~7F, 0 for C0~DF, 0 for E0~EF, 1 for F0~FF */
+    /* tmp1 = saturate_sub(first_len, 2) */
+    tmp1 = _mm_subs_epu8(first_len, _mm_set1_epi8(2));
+    /* tmp2 = saturate_sub(prev_first_len, 2) */
+    tmp2 = _mm_subs_epu8(prev_first_len, _mm_set1_epi8(2));
+    /* range |= (tmp1, tmp2) << 3 bytes */
+    range = _mm_or_si128(range, _mm_alignr_epi8(tmp1, tmp2, 13));
+
+    /*
+     * Now we have below range indices calculated
+     * Correct cases:
+     * - 8 for C0~FF
+     * - 3 for 1st byte after F0~FF
+     * - 2 for 1st byte after E0~EF or 2nd byte after F0~FF
+     * - 1 for 1st byte after C0~DF or 2nd byte after E0~EF or
+     *         3rd byte after F0~FF
+     * - 0 for others
+     * Error cases:
+     *   >9 for non ascii First Byte overlapping
+     *   E.g., F1 80 C2 90 --> 8 3 10 2, where 10 indicates error
+     */
+
+    /* Adjust Second Byte range for special First Bytes(E0,ED,F0,F4) */
+    /* Overlaps lead to index 9~15, which are illegal in range table */
+    __m128i shift1;
+    __m128i pos;
+    __m128i range2;
+    /* shift1 = (input, prev_input) << 1 byte */
+    shift1 = _mm_alignr_epi8(input, prev_input, 15);
+    pos = _mm_sub_epi8(shift1, _mm_set1_epi8(0xEF));
+    /*
+     * shift1:  | EF  F0 ... FE | FF  00  ... ...  DE | DF  E0 ... EE |
+     * pos:     | 0   1      15 | 16  17           239| 240 241    255|
+     * pos-240: | 0   0      0  | 0   0            0  | 0   1      15 |
+     * pos+112: | 112 113    127|       >= 128        |     >= 128    |
+     */
+    tmp1 = _mm_subs_epu8(pos, _mm_set1_epi8(-16));
+    range2 = _mm_shuffle_epi8(df_ee_table, tmp1);
+    tmp2 = _mm_adds_epu8(pos, _mm_set1_epi8(112));
+    range2 = _mm_add_epi8(range2, _mm_shuffle_epi8(ef_fe_table, tmp2));
+
+    range = _mm_add_epi8(range, range2);
+
+    /* Load min and max values per calculated range index */
+    __m128i min_range = _mm_shuffle_epi8(range_min_table, range);
+    __m128i max_range = _mm_shuffle_epi8(range_max_table, range);
+
+    /* Check value range */
+    if (return_position) {
+      error = _mm_cmplt_epi8(input, min_range);
+      error = _mm_or_si128(error, _mm_cmpgt_epi8(input, max_range));
+      /* 5% performance drop from this conditional branch */
+      if (!_mm_testz_si128(error, error)) {
+        break;
+      }
+    } else {
+      error = _mm_or_si128(error, _mm_cmplt_epi8(input, min_range));
+      error = _mm_or_si128(error, _mm_cmpgt_epi8(input, max_range));
+    }
+
+    prev_input = input;
+    prev_first_len = first_len;
+
+    data += 16;
+  }
+  /* If we got to the end, we don't need to skip any bytes backwards */
+  if (return_position && (data - (end - len)) == 0) {
+    return utf8_range_ValidateUTF8Naive(data, end, return_position);
+  }
+  /* Find previous codepoint (not 80~BF) */
+  data -= utf8_range_CodepointSkipBackwards(_mm_extract_epi32(prev_input, 3));
+  if (return_position) {
+    return (data - (end - len)) +
+           utf8_range_ValidateUTF8Naive(data, end, return_position);
+  }
+  /* Test if there was any error */
+  if (!_mm_testz_si128(error, error)) {
+    return 0;
+  }
+  /* Check the tail */
+  return utf8_range_ValidateUTF8Naive(data, end, return_position);
+#endif
+}
+
+int utf8_range_IsValid(const char* data, size_t len) {
+  return utf8_range_Validate(data, len, /*return_position=*/0) != 0;
+}
+
+size_t utf8_range_ValidPrefix(const char* data, size_t len) {
+  return utf8_range_Validate(data, len, /*return_position=*/1);
+}
diff --git a/third_party/utf8_range/utf8_range.h b/third_party/utf8_range/utf8_range.h
index 24d5c77d2fdc..d7c232616022 100644
--- a/third_party/utf8_range/utf8_range.h
+++ b/third_party/utf8_range/utf8_range.h
@@ -1,18 +1,19 @@
 #ifndef THIRD_PARTY_UTF8_RANGE_UTF8_RANGE_H_
 #define THIRD_PARTY_UTF8_RANGE_UTF8_RANGE_H_
 
+#include <stddef.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#if (defined(__ARM_NEON) && defined(__aarch64__)) || defined(__SSE4_1__)
-int utf8_range2(const unsigned char* data, int len);
-#else
-int utf8_naive(const unsigned char* data, int len);
-static inline int utf8_range2(const unsigned char* data, int len) {
-  return utf8_naive(data, len);
-}
-#endif
+// Returns 1 if the sequence of characters is a valid UTF-8 sequence, otherwise
+// 0.
+int utf8_range_IsValid(const char* data, size_t len);
+
+// Returns the length in bytes of the prefix of str that is all
+// structurally valid UTF-8.
+size_t utf8_range_ValidPrefix(const char* data, size_t len);
 
 #ifdef __cplusplus
 }  // extern "C"
diff --git a/third_party/utf8_range/utf8_validity.cc b/third_party/utf8_range/utf8_validity.cc
index 9e945766732d..4f4574e3f698 100644
--- a/third_party/utf8_range/utf8_validity.cc
+++ b/third_party/utf8_range/utf8_validity.cc
@@ -15,446 +15,22 @@
  *
  * For API see the utf8_validity.h header.
  */
+
 #include "utf8_validity.h"
 
 #include <cstddef>
-#include <cstdint>
 
-#include "absl/strings/ascii.h"
 #include "absl/strings/string_view.h"
-
-#ifdef __SSE4_1__
-#include <emmintrin.h>
-#include <smmintrin.h>
-#include <tmmintrin.h>
-#endif
+#include "utf8_range.h"
 
 namespace utf8_range {
-namespace {
-
-inline uint64_t UNALIGNED_LOAD64(const void* p) {
-  uint64_t t;
-  memcpy(&t, p, sizeof t);
-  return t;
-}
-
-inline bool TrailByteOk(const char c) {
-  return static_cast<int8_t>(c) <= static_cast<int8_t>(0xBF);
-}
-
-/* If ReturnPosition is false then it returns 1 if |data| is a valid utf8
- * sequence, otherwise returns 0.
- * If ReturnPosition is set to true, returns the length in bytes of the prefix
-   of |data| that is all structurally valid UTF-8.
- */
-template <bool ReturnPosition>
-size_t ValidUTF8Span(const char* data, const char* end) {
-  /* We return err_pos in the loop which is always 0 if !ReturnPosition */
-  size_t err_pos = 0;
-  size_t codepoint_bytes = 0;
-  /* The early check is done because of early continue's on codepoints of all
-   * sizes, i.e. we first check for ascii and if it is, we call continue, then
-   * for 2 byte codepoints, etc. This is done in order to reduce indentation and
-   * improve readability of the codepoint validity check.
-   */
-  while (data + codepoint_bytes < end) {
-    if (ReturnPosition) {
-      err_pos += codepoint_bytes;
-    }
-    data += codepoint_bytes;
-    const size_t len = end - data;
-    const unsigned char byte1 = data[0];
-
-    /* We do not skip many ascii bytes at the same time as this function is
-       used for tail checking (< 16 bytes) and for non x86 platforms. We also
-       don't think that cases where non-ASCII codepoints are followed by ascii
-       happen often. For small strings it also introduces some penalty. For
-       purely ascii UTF8 strings (which is the overwhelming case) we call
-       SkipAscii function which is multiplatform and extremely fast.
-     */
-    /* [00..7F] ASCII -> 1 byte */
-    if (absl::ascii_isascii(byte1)) {
-      codepoint_bytes = 1;
-      continue;
-    }
-    /* [C2..DF], [80..BF] -> 2 bytes */
-    if (len >= 2 && byte1 >= 0xC2 && byte1 <= 0xDF && TrailByteOk(data[1])) {
-      codepoint_bytes = 2;
-      continue;
-    }
-    if (len >= 3) {
-      const unsigned char byte2 = data[1];
-      const unsigned char byte3 = data[2];
-
-      /* Is byte2, byte3 between [0x80, 0xBF]
-       * Check for 0x80 was done above.
-       */
-      if (!TrailByteOk(byte2) || !TrailByteOk(byte3)) {
-        return err_pos;
-      }
-
-      if (/* E0, A0..BF, 80..BF */
-          ((byte1 == 0xE0 && byte2 >= 0xA0) ||
-           /* E1..EC, 80..BF, 80..BF */
-           (byte1 >= 0xE1 && byte1 <= 0xEC) ||
-           /* ED, 80..9F, 80..BF */
-           (byte1 == 0xED && byte2 <= 0x9F) ||
-           /* EE..EF, 80..BF, 80..BF */
-           (byte1 >= 0xEE && byte1 <= 0xEF))) {
-        codepoint_bytes = 3;
-        continue;
-      }
-      if (len >= 4) {
-        const unsigned char byte4 = data[3];
-        /* Is byte4 between 0x80 ~ 0xBF */
-        if (!TrailByteOk(byte4)) {
-          return err_pos;
-        }
-
-        if (/* F0, 90..BF, 80..BF, 80..BF */
-            ((byte1 == 0xF0 && byte2 >= 0x90) ||
-             /* F1..F3, 80..BF, 80..BF, 80..BF */
-             (byte1 >= 0xF1 && byte1 <= 0xF3) ||
-             /* F4, 80..8F, 80..BF, 80..BF */
-             (byte1 == 0xF4 && byte2 <= 0x8F))) {
-          codepoint_bytes = 4;
-          continue;
-        }
-      }
-    }
-    return err_pos;
-  }
-  if (ReturnPosition) {
-    err_pos += codepoint_bytes;
-  }
-  /* if ReturnPosition is false, this returns 1.
-   * if ReturnPosition is true, this returns err_pos.
-   */
-  return err_pos + (1 - ReturnPosition);
-}
-
-#ifdef __SSE4_1__
-/* Returns the number of bytes needed to skip backwards to get to the first
-   byte of codepoint.
- */
-inline int CodepointSkipBackwards(int32_t codepoint_word) {
-  const int8_t* const codepoint =
-      reinterpret_cast<const int8_t*>(&codepoint_word);
-  if (!TrailByteOk(codepoint[3])) {
-    return 1;
-  } else if (!TrailByteOk(codepoint[2])) {
-    return 2;
-  } else if (!TrailByteOk(codepoint[1])) {
-    return 3;
-  }
-  return 0;
-}
-#endif  // __SSE4_1__
-
-/* Skipping over ASCII as much as possible, per 8 bytes. It is intentional
-   as most strings to check for validity consist only of 1 byte codepoints.
- */
-inline const char* SkipAscii(const char* data, const char* end) {
-  while (8 <= end - data &&
-         (UNALIGNED_LOAD64(data) & 0x8080808080808080) == 0) {
-    data += 8;
-  }
-  while (data < end && absl::ascii_isascii(*data)) {
-    ++data;
-  }
-  return data;
-}
-
-template <bool ReturnPosition>
-size_t ValidUTF8(const char* data, size_t len) {
-  if (len == 0) return 1 - ReturnPosition;
-  const char* const end = data + len;
-  data = SkipAscii(data, end);
-  /* SIMD algorithm always outperforms the naive version for any data of
-     length >=16.
-   */
-  if (end - data < 16) {
-    return (ReturnPosition ? (data - (end - len)) : 0) +
-           ValidUTF8Span<ReturnPosition>(data, end);
-  }
-#ifndef __SSE4_1__
-  return (ReturnPosition ? (data - (end - len)) : 0) +
-         ValidUTF8Span<ReturnPosition>(data, end);
-#else
-  /* This code checks that utf-8 ranges are structurally valid 16 bytes at once
-   * using superscalar instructions.
-   * The mapping between ranges of codepoint and their corresponding utf-8
-   * sequences is below.
-   */
-
-  /*
-   * U+0000...U+007F     00...7F
-   * U+0080...U+07FF     C2...DF 80...BF
-   * U+0800...U+0FFF     E0      A0...BF 80...BF
-   * U+1000...U+CFFF     E1...EC 80...BF 80...BF
-   * U+D000...U+D7FF     ED      80...9F 80...BF
-   * U+E000...U+FFFF     EE...EF 80...BF 80...BF
-   * U+10000...U+3FFFF   F0      90...BF 80...BF 80...BF
-   * U+40000...U+FFFFF   F1...F3 80...BF 80...BF 80...BF
-   * U+100000...U+10FFFF F4      80...8F 80...BF 80...BF
-   */
-
-  /* First we compute the type for each byte, as given by the table below.
-   * This type will be used as an index later on.
-   */
-
-  /*
-   * Index  Min Max Byte Type
-   *  0     00  7F  Single byte sequence
-   *  1,2,3 80  BF  Second, third and fourth byte for many of the sequences.
-   *  4     A0  BF  Second byte after E0
-   *  5     80  9F  Second byte after ED
-   *  6     90  BF  Second byte after F0
-   *  7     80  8F  Second byte after F4
-   *  8     C2  F4  First non ASCII byte
-   *  9..15 7F  80  Invalid byte
-   */
-
-  /* After the first step we compute the index for all bytes, then we permute
-     the bytes according to their indices to check the ranges from the range
-     table.
-   * The range for a given type can be found in the range_min_table and
-     range_max_table, the range for type/index X is in range_min_table[X] ...
-     range_max_table[X].
-   */
-
-  /* Algorithm:
-   * Put index zero to all bytes.
-   * Find all non ASCII characters, give them index 8.
-   * For each tail byte in a codepoint sequence, give it an index corresponding
-     to the 1 based index from the end.
-   * If the first byte of the codepoint is in the [C0...DF] range, we write
-     index 1 in the following byte.
-   * If the first byte of the codepoint is in the range [E0...EF], we write
-     indices 2 and 1 in the next two bytes.
-   * If the first byte of the codepoint is in the range [F0...FF] we write
-     indices 3,2,1 into the next three bytes.
-   * For finding the number of bytes we need to look at high nibbles (4 bits)
-     and do the lookup from the table, it can be done with shift by 4 + shuffle
-     instructions. We call it `first_len`.
-   * Then we shift first_len by 8 bits to get the indices of the 2nd bytes.
-   * Saturating sub 1 and shift by 8 bits to get the indices of the 3rd bytes.
-   * Again to get the indices of the 4th bytes.
-   * Take OR of all that 4 values and check within range.
-   */
-  /* For example:
-   * input       C3 80 68 E2 80 20 A6 F0 A0 80 AC 20 F0 93 80 80
-   * first_len   1  0  0  2  0  0  0  3  0  0  0  0  3  0  0  0
-   * 1st byte    8  0  0  8  0  0  0  8  0  0  0  0  8  0  0  0
-   * 2nd byte    0  1  0  0  2  0  0  0  3  0  0  0  0  3  0  0 // Shift + sub
-   * 3rd byte    0  0  0  0  0  1  0  0  0  2  0  0  0  0  2  0 // Shift + sub
-   * 4th byte    0  0  0  0  0  0  0  0  0  0  1  0  0  0  0  1 // Shift + sub
-   * Index       8  1  0  8  2  1  0  8  3  2  1  0  8  3  2  1 // OR of results
-   */
-
-  /* Checking for errors:
-   * Error checking is done by looking up the high nibble (4 bits) of each byte
-     against an error checking table.
-   * Because the lookup value for the second byte depends of the value of the
-     first byte in codepoint, we use saturated operations to adjust the index.
-   * Specifically we need to add 2 for E0, 3 for ED, 3 for F0 and 4 for F4 to
-     match the correct index.
-       * If we subtract from all bytes EF then EO -> 241, ED -> 254, F0 -> 1,
-         F4 -> 5
-       * Do saturating sub 240, then E0 -> 1, ED -> 14 and we can do lookup to
-         match the adjustment
-       * Add saturating 112, then F0 -> 113, F4 -> 117, all that were > 16 will
-         be more 128 and lookup in ef_fe_table will return 0 but for F0
-         and F4 it will be 4 and 5 accordingly
-   */
-  /*
-   * Then just check the appropriate ranges with greater/smaller equal
-     instructions. Check tail with a naive algorithm.
-   * To save from previous 16 byte checks we just align previous_first_len to
-     get correct continuations of the codepoints.
-   */
-
-  /*
-   * Map high nibble of "First Byte" to legal character length minus 1
-   * 0x00 ~ 0xBF --> 0
-   * 0xC0 ~ 0xDF --> 1
-   * 0xE0 ~ 0xEF --> 2
-   * 0xF0 ~ 0xFF --> 3
-   */
-  const __m128i first_len_table =
-      _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 3);
-
-  /* Map "First Byte" to 8-th item of range table (0xC2 ~ 0xF4) */
-  const __m128i first_range_table =
-      _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8);
-
-  /*
-   * Range table, map range index to min and max values
-   */
-  const __m128i range_min_table =
-      _mm_setr_epi8(0x00, 0x80, 0x80, 0x80, 0xA0, 0x80, 0x90, 0x80, 0xC2, 0x7F,
-                    0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F);
-
-  const __m128i range_max_table =
-      _mm_setr_epi8(0x7F, 0xBF, 0xBF, 0xBF, 0xBF, 0x9F, 0xBF, 0x8F, 0xF4, 0x80,
-                    0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
-
-  /*
-   * Tables for fast handling of four special First Bytes(E0,ED,F0,F4), after
-   * which the Second Byte are not 80~BF. It contains "range index adjustment".
-   * +------------+---------------+------------------+----------------+
-   * | First Byte | original range| range adjustment | adjusted range |
-   * +------------+---------------+------------------+----------------+
-   * | E0         | 2             | 2                | 4              |
-   * +------------+---------------+------------------+----------------+
-   * | ED         | 2             | 3                | 5              |
-   * +------------+---------------+------------------+----------------+
-   * | F0         | 3             | 3                | 6              |
-   * +------------+---------------+------------------+----------------+
-   * | F4         | 4             | 4                | 8              |
-   * +------------+---------------+------------------+----------------+
-   */
-
-  /* df_ee_table[1] -> E0, df_ee_table[14] -> ED as ED - E0 = 13 */
-  // The values represent the adjustment in the Range Index table for a correct
-  // index.
-  const __m128i df_ee_table =
-      _mm_setr_epi8(0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0);
-
-  /* ef_fe_table[1] -> F0, ef_fe_table[5] -> F4, F4 - F0 = 4 */
-  // The values represent the adjustment in the Range Index table for a correct
-  // index.
-  const __m128i ef_fe_table =
-      _mm_setr_epi8(0, 3, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-
-  __m128i prev_input = _mm_set1_epi8(0);
-  __m128i prev_first_len = _mm_set1_epi8(0);
-  __m128i error = _mm_set1_epi8(0);
-  while (end - data >= 16) {
-    const __m128i input =
-        _mm_loadu_si128(reinterpret_cast<const __m128i*>(data));
-
-    /* high_nibbles = input >> 4 */
-    const __m128i high_nibbles =
-        _mm_and_si128(_mm_srli_epi16(input, 4), _mm_set1_epi8(0x0F));
-
-    /* first_len = legal character length minus 1 */
-    /* 0 for 00~7F, 1 for C0~DF, 2 for E0~EF, 3 for F0~FF */
-    /* first_len = first_len_table[high_nibbles] */
-    __m128i first_len = _mm_shuffle_epi8(first_len_table, high_nibbles);
-
-    /* First Byte: set range index to 8 for bytes within 0xC0 ~ 0xFF */
-    /* range = first_range_table[high_nibbles] */
-    __m128i range = _mm_shuffle_epi8(first_range_table, high_nibbles);
-
-    /* Second Byte: set range index to first_len */
-    /* 0 for 00~7F, 1 for C0~DF, 2 for E0~EF, 3 for F0~FF */
-    /* range |= (first_len, prev_first_len) << 1 byte */
-    range = _mm_or_si128(range, _mm_alignr_epi8(first_len, prev_first_len, 15));
-
-    /* Third Byte: set range index to saturate_sub(first_len, 1) */
-    /* 0 for 00~7F, 0 for C0~DF, 1 for E0~EF, 2 for F0~FF */
-    __m128i tmp1;
-    __m128i tmp2;
-    /* tmp1 = saturate_sub(first_len, 1) */
-    tmp1 = _mm_subs_epu8(first_len, _mm_set1_epi8(1));
-    /* tmp2 = saturate_sub(prev_first_len, 1) */
-    tmp2 = _mm_subs_epu8(prev_first_len, _mm_set1_epi8(1));
-    /* range |= (tmp1, tmp2) << 2 bytes */
-    range = _mm_or_si128(range, _mm_alignr_epi8(tmp1, tmp2, 14));
-
-    /* Fourth Byte: set range index to saturate_sub(first_len, 2) */
-    /* 0 for 00~7F, 0 for C0~DF, 0 for E0~EF, 1 for F0~FF */
-    /* tmp1 = saturate_sub(first_len, 2) */
-    tmp1 = _mm_subs_epu8(first_len, _mm_set1_epi8(2));
-    /* tmp2 = saturate_sub(prev_first_len, 2) */
-    tmp2 = _mm_subs_epu8(prev_first_len, _mm_set1_epi8(2));
-    /* range |= (tmp1, tmp2) << 3 bytes */
-    range = _mm_or_si128(range, _mm_alignr_epi8(tmp1, tmp2, 13));
-
-    /*
-     * Now we have below range indices calculated
-     * Correct cases:
-     * - 8 for C0~FF
-     * - 3 for 1st byte after F0~FF
-     * - 2 for 1st byte after E0~EF or 2nd byte after F0~FF
-     * - 1 for 1st byte after C0~DF or 2nd byte after E0~EF or
-     *         3rd byte after F0~FF
-     * - 0 for others
-     * Error cases:
-     *   >9 for non ascii First Byte overlapping
-     *   E.g., F1 80 C2 90 --> 8 3 10 2, where 10 indicates error
-     */
-
-    /* Adjust Second Byte range for special First Bytes(E0,ED,F0,F4) */
-    /* Overlaps lead to index 9~15, which are illegal in range table */
-    __m128i shift1;
-    __m128i pos;
-    __m128i range2;
-    /* shift1 = (input, prev_input) << 1 byte */
-    shift1 = _mm_alignr_epi8(input, prev_input, 15);
-    pos = _mm_sub_epi8(shift1, _mm_set1_epi8(0xEF));
-    /*
-     * shift1:  | EF  F0 ... FE | FF  00  ... ...  DE | DF  E0 ... EE |
-     * pos:     | 0   1      15 | 16  17           239| 240 241    255|
-     * pos-240: | 0   0      0  | 0   0            0  | 0   1      15 |
-     * pos+112: | 112 113    127|       >= 128        |     >= 128    |
-     */
-    tmp1 = _mm_subs_epu8(pos, _mm_set1_epi8(-16));
-    range2 = _mm_shuffle_epi8(df_ee_table, tmp1);
-    tmp2 = _mm_adds_epu8(pos, _mm_set1_epi8(112));
-    range2 = _mm_add_epi8(range2, _mm_shuffle_epi8(ef_fe_table, tmp2));
-
-    range = _mm_add_epi8(range, range2);
-
-    /* Load min and max values per calculated range index */
-    __m128i min_range = _mm_shuffle_epi8(range_min_table, range);
-    __m128i max_range = _mm_shuffle_epi8(range_max_table, range);
-
-    /* Check value range */
-    if (ReturnPosition) {
-      error = _mm_cmplt_epi8(input, min_range);
-      error = _mm_or_si128(error, _mm_cmpgt_epi8(input, max_range));
-      /* 5% performance drop from this conditional branch */
-      if (!_mm_testz_si128(error, error)) {
-        break;
-      }
-    } else {
-      error = _mm_or_si128(error, _mm_cmplt_epi8(input, min_range));
-      error = _mm_or_si128(error, _mm_cmpgt_epi8(input, max_range));
-    }
-
-    prev_input = input;
-    prev_first_len = first_len;
-
-    data += 16;
-  }
-  /* If we got to the end, we don't need to skip any bytes backwards */
-  if (ReturnPosition && (data - (end - len)) == 0) {
-    return ValidUTF8Span<true>(data, end);
-  }
-  /* Find previous codepoint (not 80~BF) */
-  data -= CodepointSkipBackwards(_mm_extract_epi32(prev_input, 3));
-  if (ReturnPosition) {
-    return (data - (end - len)) + ValidUTF8Span<true>(data, end);
-  }
-  /* Test if there was any error */
-  if (!_mm_testz_si128(error, error)) {
-    return 0;
-  }
-  /* Check the tail */
-  return ValidUTF8Span<false>(data, end);
-#endif
-}
-
-}  // namespace
 
 bool IsStructurallyValid(absl::string_view str) {
-  return ValidUTF8</*ReturnPosition=*/false>(str.data(), str.size());
+  return utf8_range_IsValid(str.data(), str.size());
 }
 
 size_t SpanStructurallyValid(absl::string_view str) {
-  return ValidUTF8</*ReturnPosition=*/true>(str.data(), str.size());
+  return utf8_range_ValidPrefix(str.data(), str.size());
 }
 
 }  // namespace utf8_range
diff --git a/third_party/utf8_range/utf8_validity.h b/third_party/utf8_range/utf8_validity.h
index 4a8d75b3b46d..1f251d0fec0a 100644
--- a/third_party/utf8_range/utf8_validity.h
+++ b/third_party/utf8_range/utf8_validity.h
@@ -7,6 +7,8 @@
 #ifndef THIRD_PARTY_UTF8_RANGE_UTF8_VALIDITY_H_
 #define THIRD_PARTY_UTF8_RANGE_UTF8_VALIDITY_H_
 
+#include <cstddef>
+
 #include "absl/strings/string_view.h"
 
 namespace utf8_range {
diff --git a/upb/wire/internal/decode.h b/upb/wire/internal/decode.h
index 23648cd07266..f36b2b7e97d3 100644
--- a/upb/wire/internal/decode.h
+++ b/upb/wire/internal/decode.h
@@ -56,26 +56,7 @@ extern const uint8_t upb_utf8_offsets[];
 
 UPB_INLINE
 bool _upb_Decoder_VerifyUtf8Inline(const char* ptr, int len) {
-  const char* end = ptr + len;
-
-  // Check 8 bytes at a time for any non-ASCII char.
-  while (end - ptr >= 8) {
-    uint64_t data;
-    memcpy(&data, ptr, 8);
-    if (data & 0x8080808080808080) goto non_ascii;
-    ptr += 8;
-  }
-
-  // Check one byte at a time for non-ASCII.
-  while (ptr < end) {
-    if (*ptr & 0x80) goto non_ascii;
-    ptr++;
-  }
-
-  return true;
-
-non_ascii:
-  return utf8_range2((const unsigned char*)ptr, end - ptr) == 0;
+  return utf8_range_IsValid(ptr, len);
 }
 
 const char* _upb_Decoder_CheckRequired(upb_Decoder* d, const char* ptr,