From 13a79afd4f29d67a495684fcb2bc1cebf78dadd0 Mon Sep 17 00:00:00 2001 From: henrikth93 Date: Sat, 23 Mar 2024 23:45:26 +0100 Subject: [PATCH] Created translate_words.py for spanish Same script as Andrew created but for Spanish --- .../Spanish/translations/translate_words.py | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 src/scribe_data/extract_transform/languages/Spanish/translations/translate_words.py diff --git a/src/scribe_data/extract_transform/languages/Spanish/translations/translate_words.py b/src/scribe_data/extract_transform/languages/Spanish/translations/translate_words.py new file mode 100644 index 000000000..b5151b151 --- /dev/null +++ b/src/scribe_data/extract_transform/languages/Spanish/translations/translate_words.py @@ -0,0 +1,41 @@ +""" +Translates the Spanish words queried from Wikidata to all other Scribe languages. + +Example +------- + python3 src/scribe_data/extract_transform/languages/Spanish/translations/translate_words.py +""" + +import json +import os +import sys + +PATH_TO_SCRIBE_ORG = os.path.dirname(sys.path[0]).split("Scribe-Data")[0] +PATH_TO_SCRIBE_DATA_SRC = f"{PATH_TO_SCRIBE_ORG}Scribe-Data/src" +sys.path.insert(0, PATH_TO_SCRIBE_DATA_SRC) + +from scribe_data.utils import translate_to_other_languages # noqa: E402 + +SRC_LANG = "Spanish" +translate_script_dir = os.path.dirname(os.path.abspath(__file__)) +words_to_translate_path = os.path.join(translate_script_dir, "words_to_translate.json") + +with open(words_to_translate_path, "r", encoding="utf-8") as file: + json_data = json.load(file) + +word_list = [item["word"] for item in json_data] + +translations = {} +translated_words_path = os.path.join( + translate_script_dir, "../formatted_data/translated_words.json" +) +if os.path.exists(translated_words_path): + with open(translated_words_path, "r", encoding="utf-8") as file: + translations = json.load(file) + +translate_to_other_languages( + source_language=SRC_LANG, + word_list=word_list, + translations=translations, + batch_size=100, +)