Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move language_choose_code() into new LangChooser class #1081

Merged
merged 9 commits into from
Oct 14, 2024
168 changes: 9 additions & 159 deletions include/langchooser.inc
Original file line number Diff line number Diff line change
Expand Up @@ -28,170 +28,20 @@

*/

use phpweb\LangChooser;

require_once __DIR__ . '/../src/autoload.php';

// Default STRIPPED_URI
$_SERVER['STRIPPED_URI'] = htmlspecialchars($_SERVER['REQUEST_URI'], ENT_QUOTES, 'UTF-8');

// The code is encapsulated in a function,
// so the variable namespace is not polluted
list($LANG, $EXPL_LANG, $UA_LANGS) = language_choose_code();
list($LANG, $EXPL_LANG) = (new LangChooser($LANGUAGES, $INACTIVE_ONLINE_LANGUAGES, myphpnet_language(), default_language() ?: ''))->chooseCode(
$_REQUEST['lang'] ?? null,
$_SERVER['REQUEST_URI'],
$_SERVER['HTTP_ACCEPT_LANGUAGE'] ?? null,
);

// Compatibility
if ($EXPL_LANG == '') { unset($EXPL_LANG); }

function language_choose_code()
{
// Contains all the languages picked up by the
// process in priority order (without repeating codes)
$languages = [];

// Default values for languages
$explicitly_specified = ''; $selected = '';

// Specified for the request (GET/POST parameter)
if (!empty($_REQUEST['lang']) && is_string($_REQUEST['lang'])) {
$explicitly_specified = language_add(htmlspecialchars($_REQUEST['lang'], ENT_QUOTES, 'UTF-8'), $languages);
}

// Specified in a shortcut URL (eg. /en/echo or /pt_br/echo)
if (preg_match("!^/(\\w{2}(_\\w{2})?)/!", htmlspecialchars($_SERVER['REQUEST_URI'],ENT_QUOTES, 'UTF-8'), $flang)) {

// Put language into preference list
$rlang = language_add($flang[1], $languages);

// Set explicity specified language
if (empty($explicitly_specified)) {
$explicitly_specified = $rlang;
}

// Drop out langauge specification from URL, as this is already handled
$_SERVER['STRIPPED_URI'] = preg_replace(
"!^/$flang[1]/!", "/", htmlspecialchars($_SERVER['REQUEST_URI'], ENT_QUOTES, 'UTF-8'),
);

}

// Specified in a manual URL (eg. manual/en/ or manual/pt_br/)
if (preg_match("!^/manual/(\\w{2}(_\\w{2})?)(/|$)!", htmlspecialchars($_SERVER['REQUEST_URI'], ENT_QUOTES, 'UTF-8'), $flang)) {

$flang = language_add($flang[1], $languages);

// Set explicity specified language
if (empty($explicitly_specified)) {
$explicitly_specified = $flang;
}
}

// Honor the users own language setting (if available)
if (myphpnet_language()) {
language_add(myphpnet_language(), $languages);
}

// Specified by the user via the browser's Accept Language setting
// Samples: "hu, en-us;q=0.66, en;q=0.33", "hu,en-us;q=0.5"
$browser_langs = []; $parsed_langs = [];

// Check if we have $_SERVER['HTTP_ACCEPT_LANGUAGE'] set and
// it no longer breaks if you only have one language set :)
if (isset($_SERVER['HTTP_ACCEPT_LANGUAGE'])) {
$browser_accept = explode(",", $_SERVER['HTTP_ACCEPT_LANGUAGE']);

// Go through all language preference specs
foreach ($browser_accept as $value) {
// The language part is either a code or a code with a quality
// We cannot do anything with a * code, so it is skipped
// If the quality is missing, it is assumed to be 1 according to the RFC
if (preg_match("!([a-z-]+)(;q=([0-9\\.]+))?!", trim($value), $found)) {
$quality = (isset($found[3]) ? (float) $found[3] : 1.0);
$browser_langs[] = [$found[1], $quality];
}
unset($found);
}
}

// Order the codes by quality
usort($browser_langs, "language_accept_order");

// For all languages found in the accept-language
foreach ($browser_langs as $langdata) {

// Translation table for accept-language codes and phpdoc codes
switch ($langdata[0]) {
case "pt-br":
$langdata[0] = 'pt_br';
break;
case "zh-cn":
$langdata[0] = 'zh';
break;
case "zh-hk":
$langdata[0] = 'hk';
break;
case "zh-tw":
$langdata[0] = 'tw';
break;
}

// We do not support flavors of languages (except the ones above)
// This is not in conformance to the RFC, but it here for user
// convinience reasons
if (preg_match("!^(.+)-!", $langdata[0], $match)) {
$langdata[0] = $match[1];
}

// Add language to priority order
$parsed_langs[] = language_add($langdata[0], $languages);
}

// Language preferred by this mirror site
language_add(default_language(), $languages);

// Last default language is English
language_add("en", $languages);

// Try to find out what language is available on this mirror.
// As most of the language dependant operations involve manual
// page display (lookup, search, shortcuts), we will check for
// the index file of manuals.
/*
foreach ($languages as $language) {
if (file_exists($_SERVER['DOCUMENT_ROOT'] . "/manual/$language/index.php")) {
$selected = $language;
break;
}
}
*/
$selected = $languages[0];

// Return with all found data
return [$selected, $explicitly_specified, $parsed_langs];
}

// Add a language to the possible languages' list
function language_add($langcode, &$langs)
{
global $LANGUAGES, $INACTIVE_ONLINE_LANGUAGES;

// Make language code lowercase, html encode special chars and remove slashes
$langcode = strtolower(htmlspecialchars($langcode));

// The Brazilian Portuguese code needs special attention
if ($langcode == 'pt_br') { $langcode = 'pt_BR'; }

// Append language code in priority order if it is not
// there already and supported by the PHP site. Try to
// lower number of file_exists() calls to the minumum...
if (!in_array($langcode, $langs, false) && isset($LANGUAGES[$langcode])
&& !isset($INACTIVE_ONLINE_LANGUAGES[$langcode])) {
$langs[] = $langcode;
}

// Return with language code
return $langcode;
}

// Order the array of compiled
// accept-language codes by quality value
function language_accept_order($a, $b)
{
if ($a[1] == $b[1]) { return 0; }
return ($a[1] > $b[1]) ? -1 : 1;
}
168 changes: 168 additions & 0 deletions src/LangChooser.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
<?php

namespace phpweb;

class LangChooser
{
private readonly string $preferredLanguage;

private readonly string $defaultLanguage;

/**
* @param array<string, string> $availableLanguages
* @param array<string, string> $inactiveLanguages
*/
public function __construct(
private readonly array $availableLanguages,
private readonly array $inactiveLanguages,
string $preferredLanguage,
string $defaultLanguage,
)
{
$this->defaultLanguage = $this->normalize($defaultLanguage);
$this->preferredLanguage = $this->normalize($preferredLanguage);
}

/**
* @return array{string, string}
*/
public function chooseCode(
string|array|null $langParam,
string $requestUri,
?string $acceptLanguageHeader,
): array
{
// Default values for languages
$explicitly_specified = '';

// Specified for the request (GET/POST parameter)
if (is_string($langParam)) {
$langCode = $this->normalize(htmlspecialchars($langParam, ENT_QUOTES, 'UTF-8'));
$explicitly_specified = $langCode;
if ($this->isAvailableLanguage($langCode)) {
return [$langCode, $explicitly_specified];
}
}

// Specified in a shortcut URL (eg. /en/echo or /pt_br/echo)
if (preg_match("!^/(\\w{2}(_\\w{2})?)/!", htmlspecialchars($requestUri,ENT_QUOTES, 'UTF-8'), $flang)) {
// Put language into preference list
$rlang = $this->normalize($flang[1]);

// Set explicitly specified language
if (empty($explicitly_specified)) {
$explicitly_specified = $rlang;
}

// Drop out language specification from URL, as this is already handled
$_SERVER['STRIPPED_URI'] = preg_replace(
"!^/$flang[1]/!", "/", htmlspecialchars($requestUri, ENT_QUOTES, 'UTF-8'),
);

if ($this->isAvailableLanguage($rlang)) {
return [$rlang, $explicitly_specified];
}
}

// Specified in a manual URL (eg. manual/en/ or manual/pt_br/)
if (preg_match("!^/manual/(\\w{2}(_\\w{2})?)(/|$)!", htmlspecialchars($requestUri, ENT_QUOTES, 'UTF-8'), $flang)) {
$flang = $this->normalize($flang[1]);

// Set explicitly specified language
if (empty($explicitly_specified)) {
$explicitly_specified = $flang;
}

if ($this->isAvailableLanguage($flang)) {
return [$flang, $explicitly_specified];
}
}

// Honor the users own language setting (if available)
if ($this->isAvailableLanguage($this->preferredLanguage)) {
return [$this->preferredLanguage, $explicitly_specified];
}

// Specified by the user via the browser's Accept Language setting
// Samples: "hu, en-us;q=0.66, en;q=0.33", "hu,en-us;q=0.5"
$browser_langs = [];

// Check if we have $_SERVER['HTTP_ACCEPT_LANGUAGE'] set and
// it no longer breaks if you only have one language set :)
if (isset($acceptLanguageHeader)) {
$browser_accept = explode(",", $acceptLanguageHeader);

// Go through all language preference specs
foreach ($browser_accept as $value) {
// The language part is either a code or a code with a quality
// We cannot do anything with a * code, so it is skipped
// If the quality is missing, it is assumed to be 1 according to the RFC
if (preg_match("!([a-z-]+)(;q=([0-9\\.]+))?!", trim($value), $found)) {
$quality = (isset($found[3]) ? (float) $found[3] : 1.0);
$browser_langs[] = [$found[1], $quality];
}
unset($found);
}
}

// Order the codes by quality
usort($browser_langs, fn ($a, $b) => $b[1] <=> $a[1]);

// For all languages found in the accept-language
foreach ($browser_langs as $langdata) {

// Translation table for accept-language codes and phpdoc codes
switch ($langdata[0]) {
case "pt-br":
$langdata[0] = 'pt_br';
break;
case "zh-cn":
$langdata[0] = 'zh';
break;
case "zh-hk":
$langdata[0] = 'hk';
break;
case "zh-tw":
$langdata[0] = 'tw';
break;
}

// We do not support flavors of languages (except the ones above)
// This is not in conformance to the RFC, but it here for user
// convenience reasons
if (preg_match("!^(.+)-!", $langdata[0], $match)) {
$langdata[0] = $match[1];
}

$lang = $this->normalize($langdata[0]);
if ($this->isAvailableLanguage($lang)) {
return [$lang, $explicitly_specified];
}
}

// Language preferred by this mirror site
if ($this->isAvailableLanguage($this->defaultLanguage)) {
return [$this->defaultLanguage, $explicitly_specified];
}

// Last default language is English
return ["en", $explicitly_specified];
}

private function normalize(string $langCode): string
{
// Make language code lowercase, html encode special chars and remove slashes
$langCode = strtolower(htmlspecialchars($langCode));

// The Brazilian Portuguese code needs special attention
if ($langCode == 'pt_br') {
return 'pt_BR';
}
return $langCode;
}

private function isAvailableLanguage(string $langCode): bool
{
return isset($this->availableLanguages[$langCode]) && !isset($this->inactiveLanguages[$langCode]);
}
}
Loading
Loading