Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix:standardize_lang #281

Merged
merged 3 commits into from
Oct 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 25 additions & 25 deletions ovos_utils/lang/__init__.py
Original file line number Diff line number Diff line change
@@ -1,46 +1,46 @@
from os import listdir
from os.path import isdir, join

from langcodes import tag_distance, standardize_tag as std
from ovos_utils.file_utils import resolve_resource_file


def standardize_lang_tag(lang_code, macro=True):
"""https://langcodes-hickford.readthedocs.io/en/sphinx/index.html"""
try:
from langcodes import standardize_tag as std
return std(lang_code, macro=macro)
except:
if macro:
return lang_code.split("-")[0].lower()
if "-" in lang_code:
a, b = lang_code.split("-", 2)
return f"{a.lower()}-{b.upper()}"
return lang_code.lower()


def get_language_dir(base_path, lang="en-us"):
def get_language_dir(base_path, lang="en-US"):
""" checks for all language variations and returns best path """
lang_path = join(base_path, lang)
# base_path/en-us
if isdir(lang_path):
return lang_path
if "-" in lang:
main = lang.split("-")[0]
# base_path/en
general_lang_path = join(base_path, main)
if isdir(general_lang_path):
return general_lang_path
else:
main = lang
# base_path/en-uk, base_path/en-au...
if isdir(base_path):
candidates = [join(base_path, f)
for f in listdir(base_path) if f.startswith(main)]
paths = [p for p in candidates if isdir(p)]
# TODO how to choose best local dialect?
if len(paths):
return paths[0]
return join(base_path, lang)
lang = standardize_lang_tag(lang)

candidates = []
for f in listdir(base_path):
if isdir(f"{base_path}/{f}"):
try:
score = tag_distance(lang, f)
except: # not a valid language code
continue
# https://langcodes-hickford.readthedocs.io/en/sphinx/index.html#distance-values
# 0 -> These codes represent the same language, possibly after filling in values and normalizing.
# 1- 3 -> These codes indicate a minor regional difference.
# 4 - 10 -> These codes indicate a significant but unproblematic regional difference.
if score < 10:
candidates.append((f, score))

# sort by distance to target lang code
candidates = sorted(candidates, key=lambda k: k[1])
return candidates[0]


def translate_word(name, lang='en-us'):
def translate_word(name, lang='en-US'):
""" Helper to get word translations
Args:
name (str): Word name. Returned as the default value if not translated
Expand Down
1 change: 0 additions & 1 deletion requirements/extras.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,3 @@ ovos-plugin-manager>=0.0.25,<1.0.0
ovos-config>=0.0.12,<1.0.0
ovos-workshop>=0.0.13,<1.0.0
ovos_bus_client>=0.0.8,<1.0.0
langcodes
3 changes: 2 additions & 1 deletion requirements/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@ pyee>=8.0.0
combo-lock~=0.2
rich-click~=1.7
rich~=13.7
orjson
orjson
langcodes
4 changes: 2 additions & 2 deletions test/unittests/test_event_scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,9 +115,9 @@ def f(message):

# Schedule a repeating event
es.schedule_repeating_event(f, None, 10, name='f')
self.assertTrue(len(es.bus.ee._events['id:f']) == 1)
self.assertTrue(len(es.bus.ee._events.get('id:f', [])) == 1)

es.shutdown()
# Check that the reference to the function has been removed from the
# bus emitter
self.assertTrue(len(bus._events['id:f']) == 0)
self.assertTrue(len(bus._events.get('id:f', [])) == 0)
Loading