From b910b5d62bee1f07af910cf9156a8f97ade86702 Mon Sep 17 00:00:00 2001 From: qumuq-til <43707841+qumuq-til@users.noreply.github.com> Date: Fri, 4 Jun 2021 15:44:31 +0300 Subject: [PATCH] Fixes to make filters work with Unicode text Putting an end to [A-Z] hegemony one fix at a time --- system/src/Grav/Common/Inflector.php | 77 +++++++++++++++++++--------- 1 file changed, 54 insertions(+), 23 deletions(-) diff --git a/system/src/Grav/Common/Inflector.php b/system/src/Grav/Common/Inflector.php index 50c218f1b..306aa0034 100644 --- a/system/src/Grav/Common/Inflector.php +++ b/system/src/Grav/Common/Inflector.php @@ -161,9 +161,16 @@ public static function singularize($word, $count = 1) */ public static function titleize($word, $uppercase = '') { - $uppercase = $uppercase === 'first' ? 'ucfirst' : 'ucwords'; - - return $uppercase(static::humanize(static::underscorize($word))); + $replacement = preg_replace('/(\p{Lu}\p{Ll})/u', ' \1', $word); /* Me -> Me */ + $replacement = preg_replace('/(\p{Ll})(\p{Lu})/u', '\1 \2', $replacement); /* eM -> e M */ + $replacement = preg_replace('/(\p{N})(\p{L})/u', '\1 \2', $replacement); /* 1a -> 1 a (any case) */ + $replacement = preg_replace('/(\p{L})(\p{N})/u', '\1 \2', $replacement); /* a1 -> a 1 (any case) */ + $replacement = preg_replace('/[^\p{L}\p{N}]/u', ' ', $replacement); /* if not a letter or a number replace with a space */ + $replacement = preg_replace('/( )\1+/', ' ', $replacement); /* remove repeating spaces */ + $replacement = trim($replacement, ' '); + $replacement = mb_convert_case($replacement, MB_CASE_TITLE, "UTF-8"); /* title case words */ + + return $replacement; } /** @@ -180,7 +187,11 @@ public static function titleize($word, $uppercase = '') */ public static function camelize($word) { - return str_replace(' ', '', ucwords(preg_replace('/[^A-Z^a-z^0-9]+/', ' ', $word))); + $replacement = preg_replace('/[^\p{L}\p{N}]+/u', ' ', $word); /* replace every non-alphanumeric character with a space */ + $replacement = mb_convert_case($replacement, MB_CASE_TITLE, "UTF-8"); /* title case words */ + $replacement = str_replace(' ', '', $replacement); /* remove spaces */ + + return $replacement; } /** @@ -196,11 +207,16 @@ public static function camelize($word) */ public static function underscorize($word) { - $regex1 = preg_replace('/([A-Z]+)([A-Z][a-z])/', '\1_\2', $word); - $regex2 = preg_replace('/([a-zd])([A-Z])/', '\1_\2', $regex1); - $regex3 = preg_replace('/[^A-Z^a-z^0-9]+/', '_', $regex2); - - return strtolower($regex3); + $replacement = preg_replace('/(\p{Lu}\p{Ll})/u', '_\1', $word); /* Me -> _Me */ + $replacement = preg_replace('/(\p{Ll})(\p{Lu})/u', '\1_\2', $replacement); /* eM -> e_M */ + $replacement = preg_replace('/(\p{N})(\p{L})/u', '\1_\2', $replacement); /* 1a -> 1_a (any case) */ + $replacement = preg_replace('/(\p{L})(\p{N})/u', '\1_\2', $replacement); /* a1 -> a_1 (any case) */ + $replacement = preg_replace('/[^\p{L}\p{N}]/u', '_', $replacement); /* if not a letter or a number replace with a '_' */ + $replacement = preg_replace('/(_)\1+/', '_', $replacement); /* remove repeating '_'s */ + $replacement = trim($replacement, '_'); + $replacement = mb_strtolower ($replacement); + + return $replacement; } /** @@ -216,14 +232,16 @@ public static function underscorize($word) */ public static function hyphenize($word) { - $regex1 = preg_replace('/([A-Z]+)([A-Z][a-z])/', '\1-\2', $word); - $regex2 = preg_replace('/([a-z])([A-Z])/', '\1-\2', $regex1); - $regex3 = preg_replace('/([0-9])([A-Z])/', '\1-\2', $regex2); - $regex4 = preg_replace('/[^A-Z^a-z^0-9]+/', '-', $regex3); - - $regex4 = trim($regex4, '-'); - - return strtolower($regex4); + $replacement = preg_replace('/(\p{Lu}\p{Ll})/u', '-\1', $word); /* Me -> -Me */ + $replacement = preg_replace('/(\p{Ll})(\p{Lu})/u', '\1-\2', $replacement); /* eM -> e-M */ + $replacement = preg_replace('/(\p{N})(\p{L})/u', '\1-\2', $replacement); /* 1a -> 1-a (any case) */ + $replacement = preg_replace('/(\p{L})(\p{N})/u', '\1-\2', $replacement); /* a1 -> a-1 (any case) */ + $replacement = preg_replace('/[^\p{L}\p{N}]/u', '-', $replacement); /* if not a letter or a number replace with a '-' */ + $replacement = preg_replace('/(-)\1+/', '-', $replacement); /* remove repeating '-'s */ + $replacement = trim($replacement, '-'); + $replacement = mb_strtolower ($replacement); + + return $replacement; } /** @@ -244,12 +262,20 @@ public static function hyphenize($word) */ public static function humanize($word, $uppercase = '') { - $uppercase = $uppercase === 'all' ? 'ucwords' : 'ucfirst'; - - return $uppercase(str_replace('_', ' ', preg_replace('/_id$/', '', $word))); + $replacement = preg_replace('/[^\p{L}\p{N}]/u', ' ', $word); /* if not a letter or a number replace with a space */ + $strlen = mb_strlen($replacement); + $firstChar = mb_substr($replacement, 0, 1); + $then = mb_substr($replacement, 1, $strlen - 1); + $then = mb_strtolower ($then); + $replacement = mb_strtoupper($firstChar) . $then; + + return $replacement; } /** + * + * WARNING: This function is currently not in use in Twig filters and is a candidate for removal + * * Same as camelize but first char is underscored * * Converts a word like "send_email" to "sendEmail". It @@ -263,9 +289,14 @@ public static function humanize($word, $uppercase = '') */ public static function variablize($word) { - $word = static::camelize($word); - - return strtolower($word[0]) . substr($word, 1); + $replacement = static::camelize($word); + + $strlen = mb_strlen($replacement); + $firstChar = mb_substr($replacement, 0, 1); + $then = mb_substr($replacement, 1, $strlen - 1); + $replacement = mb_strtolower($firstChar) . $then; + + return $replacement; } /**