Skip to content

Commit

Permalink
Add Script for German Translations (#117)
Browse files Browse the repository at this point in the history
* add script

* uploading translated words

* more german words

* added more words

* update script

* update words

* update to pass  ruff check

* uploading more

* removing my 4.4mb sized baby whom I saw to get bigger for  past 7 and half hours :'(
  • Loading branch information
mhmohona authored Mar 21, 2024
1 parent f6f593d commit 400a0c3
Showing 1 changed file with 40 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
"""
Translates the German words queried from Wikidata to all other Scribe languages.
Example
-------
python3 src/scribe_data/extract_transform/languages/German/translations/translate_words.py
"""

import json
import os
import sys
from scribe_data.utils import translate_to_other_languages

PATH_TO_SCRIBE_ORG = os.path.dirname(sys.path[0]).split("Scribe-Data")[0]
PATH_TO_SCRIBE_DATA_SRC = f"{PATH_TO_SCRIBE_ORG}Scribe-Data/src"
sys.path.insert(0, PATH_TO_SCRIBE_DATA_SRC)

SRC_LANG = "German"
translate_script_dir = os.path.dirname(os.path.abspath(__file__))
words_to_translate_path = os.path.join(translate_script_dir, "words_to_translate.json")

with open(words_to_translate_path, "r", encoding="utf-8") as file:
json_data = json.load(file)

word_list = [item["word"] for item in json_data]

translations = {}
translated_words_path = os.path.join(
translate_script_dir, "../formatted_data/translated_words.json"
)
if os.path.exists(translated_words_path):
with open(translated_words_path, "r", encoding="utf-8") as file:
translations = json.load(file)

translate_to_other_languages(
source_language=SRC_LANG,
word_list=word_list,
translations=translations,
batch_size=100,
)

0 comments on commit 400a0c3

Please sign in to comment.