From 7a9b30279fe7db4b4e23a932c0f1f4025e60f33c Mon Sep 17 00:00:00 2001 From: miro Date: Wed, 16 Oct 2024 00:52:25 +0100 Subject: [PATCH 1/3] fix:standardize_lang --- ovos_utils/lang/__init__.py | 50 ++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/ovos_utils/lang/__init__.py b/ovos_utils/lang/__init__.py index 38951b2..439d30b 100644 --- a/ovos_utils/lang/__init__.py +++ b/ovos_utils/lang/__init__.py @@ -1,46 +1,46 @@ from os import listdir from os.path import isdir, join - +from langcodes import tag_distance, standardize_tag as std from ovos_utils.file_utils import resolve_resource_file def standardize_lang_tag(lang_code, macro=True): """https://langcodes-hickford.readthedocs.io/en/sphinx/index.html""" try: - from langcodes import standardize_tag as std return std(lang_code, macro=macro) except: if macro: return lang_code.split("-")[0].lower() + if "-" in lang_code: + a, b = lang_code.split("-", 2) + return f"{a.lower()}-{b.upper()}" return lang_code.lower() -def get_language_dir(base_path, lang="en-us"): +def get_language_dir(base_path, lang="en-US"): """ checks for all language variations and returns best path """ - lang_path = join(base_path, lang) - # base_path/en-us - if isdir(lang_path): - return lang_path - if "-" in lang: - main = lang.split("-")[0] - # base_path/en - general_lang_path = join(base_path, main) - if isdir(general_lang_path): - return general_lang_path - else: - main = lang - # base_path/en-uk, base_path/en-au... - if isdir(base_path): - candidates = [join(base_path, f) - for f in listdir(base_path) if f.startswith(main)] - paths = [p for p in candidates if isdir(p)] - # TODO how to choose best local dialect? - if len(paths): - return paths[0] - return join(base_path, lang) + lang = standardize_lang_tag(lang) + + candidates = [] + for f in listdir(base_path): + if isdir(f"{base_path}/{f}"): + try: + score = tag_distance(lang, f) + except: # not a valid language code + continue + # https://langcodes-hickford.readthedocs.io/en/sphinx/index.html#distance-values + # 0 -> These codes represent the same language, possibly after filling in values and normalizing. + # 1- 3 -> These codes indicate a minor regional difference. + # 4 - 10 -> These codes indicate a significant but unproblematic regional difference. + if score < 10: + candidates.append((f, score)) + + # sort by distance to target lang code + candidates = sorted(candidates, key=lambda k: k[1]) + return candidates[0] -def translate_word(name, lang='en-us'): +def translate_word(name, lang='en-US'): """ Helper to get word translations Args: name (str): Word name. Returned as the default value if not translated From e14b97e5db9a96dfb2fce4501b9e515b44476bf4 Mon Sep 17 00:00:00 2001 From: miro Date: Wed, 16 Oct 2024 00:53:45 +0100 Subject: [PATCH 2/3] fix:standardize_lang --- requirements/extras.txt | 1 - requirements/requirements.txt | 3 ++- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements/extras.txt b/requirements/extras.txt index d7a5396..671233b 100644 --- a/requirements/extras.txt +++ b/requirements/extras.txt @@ -3,4 +3,3 @@ ovos-plugin-manager>=0.0.25,<1.0.0 ovos-config>=0.0.12,<1.0.0 ovos-workshop>=0.0.13,<1.0.0 ovos_bus_client>=0.0.8,<1.0.0 -langcodes diff --git a/requirements/requirements.txt b/requirements/requirements.txt index 53510a8..02981b2 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -7,4 +7,5 @@ pyee>=8.0.0 combo-lock~=0.2 rich-click~=1.7 rich~=13.7 -orjson \ No newline at end of file +orjson +langcodes \ No newline at end of file From dad5f1d8079ed647dc4a00c9988c9a7fdac0b11b Mon Sep 17 00:00:00 2001 From: miro Date: Wed, 16 Oct 2024 01:05:55 +0100 Subject: [PATCH 3/3] fix:standardize_lang --- test/unittests/test_event_scheduler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/unittests/test_event_scheduler.py b/test/unittests/test_event_scheduler.py index 49b11d7..5f2dcd7 100644 --- a/test/unittests/test_event_scheduler.py +++ b/test/unittests/test_event_scheduler.py @@ -115,9 +115,9 @@ def f(message): # Schedule a repeating event es.schedule_repeating_event(f, None, 10, name='f') - self.assertTrue(len(es.bus.ee._events['id:f']) == 1) + self.assertTrue(len(es.bus.ee._events.get('id:f', [])) == 1) es.shutdown() # Check that the reference to the function has been removed from the # bus emitter - self.assertTrue(len(bus._events['id:f']) == 0) + self.assertTrue(len(bus._events.get('id:f', [])) == 0)