Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[libc++] Implement a libc++ private version of isascii #122361

Merged
merged 2 commits into from
Jan 10, 2025

Conversation

ldionne
Copy link
Member

@ldionne ldionne commented Jan 9, 2025

The isascii() function is not standard, so we should avoid relying on the platform providing it, especially since it's easy to implement in libc++ portably.

@ldionne ldionne requested a review from a team as a code owner January 9, 2025 20:28
@llvmbot llvmbot added the libc++ libc++ C++ Standard Library. Not GNU libstdc++. Not libc++abi. label Jan 9, 2025
@llvmbot
Copy link
Member

llvmbot commented Jan 9, 2025

@llvm/pr-subscribers-libcxx

Author: Louis Dionne (ldionne)

Changes

The isascii() function is not standard, so we should avoid relying on the platform providing it, especially since it's easy to implement in libc++ portably.


Full diff: https://github.com/llvm/llvm-project/pull/122361.diff

2 Files Affected:

  • (modified) libcxx/include/__locale (+6-4)
  • (modified) libcxx/src/locale.cpp (+33-33)
diff --git a/libcxx/include/__locale b/libcxx/include/__locale
index 94dc8a08437bfe..fb433a147056e4 100644
--- a/libcxx/include/__locale
+++ b/libcxx/include/__locale
@@ -516,6 +516,8 @@ protected:
 };
 #endif // _LIBCPP_HAS_WIDE_CHARACTERS
 
+inline _LIBCPP_HIDE_FROM_ABI int __isascii(int __c) { return (__c & ~0x7F) == 0; }
+
 template <>
 class _LIBCPP_EXPORTED_FROM_ABI ctype<char> : public locale::facet, public ctype_base {
   const mask* __tab_;
@@ -527,25 +529,25 @@ public:
   explicit ctype(const mask* __tab = nullptr, bool __del = false, size_t __refs = 0);
 
   _LIBCPP_HIDE_FROM_ABI bool is(mask __m, char_type __c) const {
-    return isascii(__c) ? (__tab_[static_cast<int>(__c)] & __m) != 0 : false;
+    return std::__isascii(__c) ? (__tab_[static_cast<int>(__c)] & __m) != 0 : false;
   }
 
   _LIBCPP_HIDE_FROM_ABI const char_type* is(const char_type* __low, const char_type* __high, mask* __vec) const {
     for (; __low != __high; ++__low, ++__vec)
-      *__vec = isascii(*__low) ? __tab_[static_cast<int>(*__low)] : 0;
+      *__vec = std::__isascii(*__low) ? __tab_[static_cast<int>(*__low)] : 0;
     return __low;
   }
 
   _LIBCPP_HIDE_FROM_ABI const char_type* scan_is(mask __m, const char_type* __low, const char_type* __high) const {
     for (; __low != __high; ++__low)
-      if (isascii(*__low) && (__tab_[static_cast<int>(*__low)] & __m))
+      if (std::__isascii(*__low) && (__tab_[static_cast<int>(*__low)] & __m))
         break;
     return __low;
   }
 
   _LIBCPP_HIDE_FROM_ABI const char_type* scan_not(mask __m, const char_type* __low, const char_type* __high) const {
     for (; __low != __high; ++__low)
-      if (!isascii(*__low) || !(__tab_[static_cast<int>(*__low)] & __m))
+      if (!std::__isascii(*__low) || !(__tab_[static_cast<int>(*__low)] & __m))
         break;
     return __low;
   }
diff --git a/libcxx/src/locale.cpp b/libcxx/src/locale.cpp
index 599c61ca7a36da..ed1e84645fb609 100644
--- a/libcxx/src/locale.cpp
+++ b/libcxx/src/locale.cpp
@@ -707,69 +707,69 @@ constinit locale::id ctype<wchar_t>::id;
 ctype<wchar_t>::~ctype() {}
 
 bool ctype<wchar_t>::do_is(mask m, char_type c) const {
-  return isascii(c) ? (ctype<char>::classic_table()[c] & m) != 0 : false;
+  return std::__isascii(c) ? (ctype<char>::classic_table()[c] & m) != 0 : false;
 }
 
 const wchar_t* ctype<wchar_t>::do_is(const char_type* low, const char_type* high, mask* vec) const {
   for (; low != high; ++low, ++vec)
-    *vec = static_cast<mask>(isascii(*low) ? ctype<char>::classic_table()[*low] : 0);
+    *vec = static_cast<mask>(std::__isascii(*low) ? ctype<char>::classic_table()[*low] : 0);
   return low;
 }
 
 const wchar_t* ctype<wchar_t>::do_scan_is(mask m, const char_type* low, const char_type* high) const {
   for (; low != high; ++low)
-    if (isascii(*low) && (ctype<char>::classic_table()[*low] & m))
+    if (std::__isascii(*low) && (ctype<char>::classic_table()[*low] & m))
       break;
   return low;
 }
 
 const wchar_t* ctype<wchar_t>::do_scan_not(mask m, const char_type* low, const char_type* high) const {
   for (; low != high; ++low)
-    if (!(isascii(*low) && (ctype<char>::classic_table()[*low] & m)))
+    if (!(std::__isascii(*low) && (ctype<char>::classic_table()[*low] & m)))
       break;
   return low;
 }
 
 wchar_t ctype<wchar_t>::do_toupper(char_type c) const {
 #  ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE
-  return isascii(c) ? _DefaultRuneLocale.__mapupper[c] : c;
+  return std::__isascii(c) ? _DefaultRuneLocale.__mapupper[c] : c;
 #  elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__NetBSD__) || defined(__MVS__)
-  return isascii(c) ? ctype<char>::__classic_upper_table()[c] : c;
+  return std::__isascii(c) ? ctype<char>::__classic_upper_table()[c] : c;
 #  else
-  return (isascii(c) && __locale::__iswlower(c, _LIBCPP_GET_C_LOCALE)) ? c - L'a' + L'A' : c;
+  return (std::__isascii(c) && __locale::__iswlower(c, _LIBCPP_GET_C_LOCALE)) ? c - L'a' + L'A' : c;
 #  endif
 }
 
 const wchar_t* ctype<wchar_t>::do_toupper(char_type* low, const char_type* high) const {
   for (; low != high; ++low)
 #  ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE
-    *low = isascii(*low) ? _DefaultRuneLocale.__mapupper[*low] : *low;
+    *low = std::__isascii(*low) ? _DefaultRuneLocale.__mapupper[*low] : *low;
 #  elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__NetBSD__) || defined(__MVS__)
-    *low = isascii(*low) ? ctype<char>::__classic_upper_table()[*low] : *low;
+    *low = std::__isascii(*low) ? ctype<char>::__classic_upper_table()[*low] : *low;
 #  else
-    *low = (isascii(*low) && __locale::__islower(*low, _LIBCPP_GET_C_LOCALE)) ? (*low - L'a' + L'A') : *low;
+    *low = (std::__isascii(*low) && __locale::__islower(*low, _LIBCPP_GET_C_LOCALE)) ? (*low - L'a' + L'A') : *low;
 #  endif
   return low;
 }
 
 wchar_t ctype<wchar_t>::do_tolower(char_type c) const {
 #  ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE
-  return isascii(c) ? _DefaultRuneLocale.__maplower[c] : c;
+  return std::__isascii(c) ? _DefaultRuneLocale.__maplower[c] : c;
 #  elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__NetBSD__) || defined(__MVS__)
-  return isascii(c) ? ctype<char>::__classic_lower_table()[c] : c;
+  return std::__isascii(c) ? ctype<char>::__classic_lower_table()[c] : c;
 #  else
-  return (isascii(c) && __locale::__isupper(c, _LIBCPP_GET_C_LOCALE)) ? c - L'A' + 'a' : c;
+  return (std::__isascii(c) && __locale::__isupper(c, _LIBCPP_GET_C_LOCALE)) ? c - L'A' + 'a' : c;
 #  endif
 }
 
 const wchar_t* ctype<wchar_t>::do_tolower(char_type* low, const char_type* high) const {
   for (; low != high; ++low)
 #  ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE
-    *low = isascii(*low) ? _DefaultRuneLocale.__maplower[*low] : *low;
+    *low = std::__isascii(*low) ? _DefaultRuneLocale.__maplower[*low] : *low;
 #  elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__NetBSD__) || defined(__MVS__)
-    *low = isascii(*low) ? ctype<char>::__classic_lower_table()[*low] : *low;
+    *low = std::__isascii(*low) ? ctype<char>::__classic_lower_table()[*low] : *low;
 #  else
-    *low = (isascii(*low) && __locale::__isupper(*low, _LIBCPP_GET_C_LOCALE)) ? *low - L'A' + L'a' : *low;
+    *low = (std::__isascii(*low) && __locale::__isupper(*low, _LIBCPP_GET_C_LOCALE)) ? *low - L'A' + L'a' : *low;
 #  endif
   return low;
 }
@@ -783,14 +783,14 @@ const char* ctype<wchar_t>::do_widen(const char* low, const char* high, char_typ
 }
 
 char ctype<wchar_t>::do_narrow(char_type c, char dfault) const {
-  if (isascii(c))
+  if (std::__isascii(c))
     return static_cast<char>(c);
   return dfault;
 }
 
 const wchar_t* ctype<wchar_t>::do_narrow(const char_type* low, const char_type* high, char dfault, char* dest) const {
   for (; low != high; ++low, ++dest)
-    if (isascii(*low))
+    if (std::__isascii(*low))
       *dest = static_cast<char>(*low);
     else
       *dest = dfault;
@@ -816,52 +816,52 @@ ctype<char>::~ctype() {
 
 char ctype<char>::do_toupper(char_type c) const {
 #ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE
-  return isascii(c) ? static_cast<char>(_DefaultRuneLocale.__mapupper[static_cast<ptrdiff_t>(c)]) : c;
+  return std::__isascii(c) ? static_cast<char>(_DefaultRuneLocale.__mapupper[static_cast<ptrdiff_t>(c)]) : c;
 #elif defined(__NetBSD__)
   return static_cast<char>(__classic_upper_table()[static_cast<unsigned char>(c)]);
 #elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__MVS__)
-  return isascii(c) ? static_cast<char>(__classic_upper_table()[static_cast<unsigned char>(c)]) : c;
+  return std::__isascii(c) ? static_cast<char>(__classic_upper_table()[static_cast<unsigned char>(c)]) : c;
 #else
-  return (isascii(c) && __locale::__islower(c, _LIBCPP_GET_C_LOCALE)) ? c - 'a' + 'A' : c;
+  return (std::__isascii(c) && __locale::__islower(c, _LIBCPP_GET_C_LOCALE)) ? c - 'a' + 'A' : c;
 #endif
 }
 
 const char* ctype<char>::do_toupper(char_type* low, const char_type* high) const {
   for (; low != high; ++low)
 #ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE
-    *low = isascii(*low) ? static_cast<char>(_DefaultRuneLocale.__mapupper[static_cast<ptrdiff_t>(*low)]) : *low;
+    *low = std::__isascii(*low) ? static_cast<char>(_DefaultRuneLocale.__mapupper[static_cast<ptrdiff_t>(*low)]) : *low;
 #elif defined(__NetBSD__)
     *low = static_cast<char>(__classic_upper_table()[static_cast<unsigned char>(*low)]);
 #elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__MVS__)
-    *low = isascii(*low) ? static_cast<char>(__classic_upper_table()[static_cast<size_t>(*low)]) : *low;
+    *low = std::__isascii(*low) ? static_cast<char>(__classic_upper_table()[static_cast<size_t>(*low)]) : *low;
 #else
-    *low = (isascii(*low) && __locale::__islower(*low, _LIBCPP_GET_C_LOCALE)) ? *low - 'a' + 'A' : *low;
+    *low = (std::__isascii(*low) && __locale::__islower(*low, _LIBCPP_GET_C_LOCALE)) ? *low - 'a' + 'A' : *low;
 #endif
   return low;
 }
 
 char ctype<char>::do_tolower(char_type c) const {
 #ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE
-  return isascii(c) ? static_cast<char>(_DefaultRuneLocale.__maplower[static_cast<ptrdiff_t>(c)]) : c;
+  return std::__isascii(c) ? static_cast<char>(_DefaultRuneLocale.__maplower[static_cast<ptrdiff_t>(c)]) : c;
 #elif defined(__NetBSD__)
   return static_cast<char>(__classic_lower_table()[static_cast<unsigned char>(c)]);
 #elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__MVS__)
-  return isascii(c) ? static_cast<char>(__classic_lower_table()[static_cast<size_t>(c)]) : c;
+  return std::__isascii(c) ? static_cast<char>(__classic_lower_table()[static_cast<size_t>(c)]) : c;
 #else
-  return (isascii(c) && __locale::__isupper(c, _LIBCPP_GET_C_LOCALE)) ? c - 'A' + 'a' : c;
+  return (std::__isascii(c) && __locale::__isupper(c, _LIBCPP_GET_C_LOCALE)) ? c - 'A' + 'a' : c;
 #endif
 }
 
 const char* ctype<char>::do_tolower(char_type* low, const char_type* high) const {
   for (; low != high; ++low)
 #ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE
-    *low = isascii(*low) ? static_cast<char>(_DefaultRuneLocale.__maplower[static_cast<ptrdiff_t>(*low)]) : *low;
+    *low = std::__isascii(*low) ? static_cast<char>(_DefaultRuneLocale.__maplower[static_cast<ptrdiff_t>(*low)]) : *low;
 #elif defined(__NetBSD__)
     *low = static_cast<char>(__classic_lower_table()[static_cast<unsigned char>(*low)]);
 #elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__MVS__)
-    *low = isascii(*low) ? static_cast<char>(__classic_lower_table()[static_cast<size_t>(*low)]) : *low;
+    *low = std::__isascii(*low) ? static_cast<char>(__classic_lower_table()[static_cast<size_t>(*low)]) : *low;
 #else
-    *low = (isascii(*low) && __locale::__isupper(*low, _LIBCPP_GET_C_LOCALE)) ? *low - 'A' + 'a' : *low;
+    *low = (std::__isascii(*low) && __locale::__isupper(*low, _LIBCPP_GET_C_LOCALE)) ? *low - 'A' + 'a' : *low;
 #endif
   return low;
 }
@@ -875,14 +875,14 @@ const char* ctype<char>::do_widen(const char* low, const char* high, char_type*
 }
 
 char ctype<char>::do_narrow(char_type c, char dfault) const {
-  if (isascii(c))
+  if (std::__isascii(c))
     return static_cast<char>(c);
   return dfault;
 }
 
 const char* ctype<char>::do_narrow(const char_type* low, const char_type* high, char dfault, char* dest) const {
   for (; low != high; ++low, ++dest)
-    if (isascii(*low))
+    if (std::__isascii(*low))
       *dest = *low;
     else
       *dest = dfault;
@@ -1140,7 +1140,7 @@ bool ctype_byname<wchar_t>::do_is(mask m, char_type c) const {
 
 const wchar_t* ctype_byname<wchar_t>::do_is(const char_type* low, const char_type* high, mask* vec) const {
   for (; low != high; ++low, ++vec) {
-    if (isascii(*low))
+    if (std::__isascii(*low))
       *vec = static_cast<mask>(ctype<char>::classic_table()[*low]);
     else {
       *vec      = 0;

@ldionne ldionne force-pushed the review/locale-3-isascii branch from e7e8c71 to a4bdba5 Compare January 9, 2025 20:36
The isascii() function is not standard, so we should avoid relying on
the platform providing it, especially since it's easy to implement in
libc++ portably.
@ldionne ldionne force-pushed the review/locale-3-isascii branch from a4bdba5 to 9f095b5 Compare January 9, 2025 21:04
libcxx/include/__locale Outdated Show resolved Hide resolved
@ldionne ldionne added the pending-ci Merging the PR is only pending completion of CI label Jan 10, 2025
@ldionne ldionne merged commit 4c6ca3e into llvm:main Jan 10, 2025
77 checks passed
@ldionne ldionne deleted the review/locale-3-isascii branch January 10, 2025 20:20
BaiXilin pushed a commit to BaiXilin/llvm-fix-vnni-instr-types that referenced this pull request Jan 12, 2025
The isascii() function is not standard, so we should avoid relying on
the platform providing it, especially since it's easy to implement in
libc++ portably.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
libc++ libc++ C++ Standard Library. Not GNU libstdc++. Not libc++abi. pending-ci Merging the PR is only pending completion of CI
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants