Skip to content

Commit

Permalink
Make the utf8_range implementation just in C
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 590961088
  • Loading branch information
protobuf-github-bot authored and copybara-github committed Dec 14, 2023
1 parent 1db8ed4 commit 9c7d2b9
Show file tree
Hide file tree
Showing 13 changed files with 499 additions and 477 deletions.
2 changes: 1 addition & 1 deletion php/ext/google/protobuf/config.m4
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ if test "$PHP_PROTOBUF" != "no"; then

PHP_NEW_EXTENSION(
protobuf,
arena.c array.c convert.c def.c map.c message.c names.c php-upb.c protobuf.c third_party/utf8_range/naive.c third_party/utf8_range/range2-neon.c third_party/utf8_range/range2-sse.c,
arena.c array.c convert.c def.c map.c message.c names.c php-upb.c protobuf.c third_party/utf8_range/utf8_range.c,
$ext_shared, , -std=gnu99 -I@ext_srcdir@/third_party/utf8_range)
PHP_ADD_BUILD_DIR($ext_builddir/third_party/utf8_range)

Expand Down
2 changes: 1 addition & 1 deletion python/convert.c
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ bool PyUpb_PyToUpb(PyObject* obj, const upb_FieldDef* f, upb_MessageValue* val,
// Use the object's bytes if they are valid UTF-8.
char* ptr;
if (PyBytes_AsStringAndSize(obj, &ptr, &size) < 0) return false;
if (utf8_range2((const unsigned char*)ptr, size) != 0) {
if (!utf8_range_IsValid(ptr, size)) {
// Invalid UTF-8. Try to convert the message to a Python Unicode
// object, even though we know this will fail, just to get the
// idiomatic Python error message.
Expand Down
4 changes: 1 addition & 3 deletions ruby/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@ pkg/
tmp/
tests/google/
ext/google/protobuf_c/third_party/utf8_range/utf8_range.h
ext/google/protobuf_c/third_party/utf8_range/range2-sse.c
ext/google/protobuf_c/third_party/utf8_range/range2-neon.c
ext/google/protobuf_c/third_party/utf8_range/naive.c
ext/google/protobuf_c/third_party/utf8_range/utf8_range.c
ext/google/protobuf_c/third_party/utf8_range/LICENSE
lib/google/protobuf/*_pb.rb
2 changes: 1 addition & 1 deletion ruby/Rakefile
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ task :copy_third_party do
# We need utf8_range in-tree.
utf8_root = '../third_party/utf8_range'
%w[
utf8_range.h naive.c range2-neon.c range2-neon.c range2-sse.c LICENSE
utf8_range.h utf8_range.c LICENSE
].each do |file|
FileUtils.cp File.join(utf8_root, file),
"ext/google/protobuf_c/third_party/utf8_range"
Expand Down
2 changes: 1 addition & 1 deletion ruby/ext/google/protobuf_c/extconf.rb
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

$srcs = ["protobuf.c", "convert.c", "defs.c", "message.c",
"repeated_field.c", "map.c", "ruby-upb.c", "wrap_memcpy.c",
"naive.c", "range2-neon.c", "range2-sse.c", "shared_convert.c",
"utf8_range.c", "shared_convert.c",
"shared_message.c"]

create_makefile(ext_name)
4 changes: 1 addition & 3 deletions ruby/lib/google/tasks/ffi.rake
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,7 @@ begin
FFI::Compiler::CompileTask.new 'protobuf_c_ffi' do |c|
configure_common_compile_task c
# Ruby UPB was already compiled with different flags.
c.exclude << "/range2-neon.c"
c.exclude << "/range2-sse.c"
c.exclude << "/naive.c"
c.exclude << "/utf8_range.c"
c.exclude << "/ruby-upb.c"
end

Expand Down
15 changes: 8 additions & 7 deletions third_party/utf8_range/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,7 @@ exports_files([
filegroup(
name = "utf8_range_srcs",
srcs = [
"naive.c",
"range2-neon.c",
"range2-sse.c",
"utf8_range.c",
"utf8_range.h",
],
visibility = ["//:__subpackages__"],
Expand All @@ -34,9 +32,7 @@ filegroup(
cc_library(
name = "utf8_range",
srcs = [
"naive.c",
"range2-neon.c",
"range2-sse.c",
"utf8_range.c",
],
hdrs = ["utf8_range.h"],
strip_include_prefix = "/third_party/utf8_range",
Expand All @@ -48,14 +44,19 @@ cc_library(
hdrs = ["utf8_validity.h"],
strip_include_prefix = "/third_party/utf8_range",
deps = [
":utf8_range",
"@com_google_absl//absl/strings",
],
)

cc_test(
name = "utf8_validity_test",
srcs = ["utf8_validity_test.cc"],
srcs = [
"utf8_range.c",
"utf8_validity_test.cc",
],
deps = [
":utf8_range",
":utf8_validity",
"@com_google_absl//absl/strings",
"@com_google_googletest//:gtest_main",
Expand Down
6 changes: 2 additions & 4 deletions third_party/utf8_range/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,12 @@ option (utf8_range_ENABLE_INSTALL "Configure installation" ON)
##
# Create the lightweight C library
add_library (utf8_range STATIC
naive.c
range2-neon.c
range2-sse.c
utf8_range.c
)

##
# A heavier-weight C++ wrapper that supports Abseil.
add_library (utf8_validity STATIC utf8_validity.cc)
add_library (utf8_validity STATIC utf8_validity.cc utf8_range.c)

# Load Abseil dependency.
if (NOT TARGET absl::strings)
Expand Down
Loading

0 comments on commit 9c7d2b9

Please sign in to comment.