Skip to content

Commit

Permalink
test 2
Browse files Browse the repository at this point in the history
  • Loading branch information
DJump13 committed May 9, 2024
1 parent fef2cc5 commit 348f0fc
Show file tree
Hide file tree
Showing 4 changed files with 226 additions and 0 deletions.
64 changes: 64 additions & 0 deletions translator/libargos.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import argostranslate.package
import argostranslate.settings
import os
from pathlib import Path

PATH = os.getcwd()
ARGOS_PACKAGES_DIR=Path(f"{PATH}/translator/argos_models")

#Download all available packages
def download_packages():
if not ARGOS_PACKAGES_DIR.exists(): os.mkdir(ARGOS_PACKAGES_DIR)
argostranslate.settings.downloads_dir = ARGOS_PACKAGES_DIR
argostranslate.package.update_package_index()
available_packages = argostranslate.package.get_available_packages()
for package in available_packages:
package_name = package.code + ".argosmodel"
if package_name not in os.listdir(ARGOS_PACKAGES_DIR):
package.download()


#returns list of installed_packages with names in format matching the filenames
def get_installed_package_names():
models = []
installed_packages = argostranslate.package.get_installed_packages()
for package in installed_packages:
model_name = f"translate-{package.from_code}_{package.to_code}.argosmodel"
models.append(model_name)
return models

#Installs all packages from local directory
def install_packages():
installed_packages = get_installed_package_names()
for filename in os.listdir(ARGOS_PACKAGES_DIR):
if filename not in installed_packages:
file = os.path.join(ARGOS_PACKAGES_DIR, filename)
argostranslate.package.install_from_path(file)
return argostranslate.package.get_installed_packages()

#Uninstall all packages
def uninstall_all_packages():
installed = argostranslate.package.get_installed_packages()
for package in installed:
argostranslate.package.uninstall(package)

#update all installed packages
def update_packages():
installed = install_packages()
for package in installed:
package.update()

#displays all installed packages
def display_installed_packages():
installed = install_packages()
list = {}
for package in installed:
list.update({package.to_code:package.to_name})
list.update({package.from_code:package.from_name})
for code in list:
print(f"{code}, {list[code]}")

if __name__ == "__main__":
download_packages()
display_installed_packages()

22 changes: 22 additions & 0 deletions translator/libmarian.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from transformers import MarianMTModel, MarianTokenizer
from typing import Sequence
import os
PATH = os.getcwd()

def download_package(src, dst):
print(f"Downloading {src}-{dst}...")
model_name = f"Helsinki-NLP/opus-mt-{src}-{dst}"
try:
tokenizer = MarianTokenizer.from_pretrained(model_name)
tokenizer.save_pretrained(f"{PATH}/translator/marian_models/opus-mt-{src}-{dst}")
model = MarianMTModel.from_pretrained(model_name)
model.save_pretrained(f"{PATH}/translator/marian_models/opus-mt-{src}-{dst}")
except OSError:
print("Package not found")

def package_downloaded(src, dst):
package_name = f"opus-mt-{src}-{dst}"
os.makedirs(f"{PATH}/translator/marian_models", exist_ok=True)
if package_name in os.listdir(f"{PATH}/translator/marian_models"):
return True
return False
33 changes: 33 additions & 0 deletions translator/optimizeLanguage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import sys
import libargos as argos
import libmarian as marian

def optimize_path(src, dest):
#download all available argos packages
argos.download_packages()

#check if installed as argos
argos_packages = argos.install_packages()
for package in argos_packages:
if(package.from_code == src and package.to_code == dest):
return

#check if present as marian
if(marian.package_downloaded(src, dest)):
return

#download marian package
marian.download_package(src, dest)

def main():
lang_one = sys.argv[1]
lang_two = sys.argv[2]
optimize_path(lang_one, lang_two)
optimize_path(lang_two, lang_one)
print(f"{lang_one} and {lang_two} Optimized")

if __name__ == "__main__":
main()



107 changes: 107 additions & 0 deletions translator/server.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
import os
import sys
import json
import argostranslate.package
import argostranslate.translate
from functools import cached_property
from http.server import BaseHTTPRequestHandler
from urllib.parse import parse_qsl, urlparse
from http.server import HTTPServer
from pathlib import Path
from transformers import MarianMTModel, MarianTokenizer
from typing import Sequence
from libargos import install_packages
import socket
import time

PORTS = [8000, 5000, 8001, 8002, 8003, 8004, 8005, 8006, 8007, 8008]
TIMEOUT = 3600
PATH = os.getcwd()



class MarianModel:
def __init__(self, source_lang: str, dest_lang: str) -> None:
path = f"{PATH}/translator/marian_models/opus-mt-{source_lang}-{dest_lang}"
self.model = MarianMTModel.from_pretrained(path, local_files_only = True)
self.tokenizer = MarianTokenizer.from_pretrained(path, local_files_only = True)

def translate(self, texts: Sequence[str]) -> Sequence[str]:
tokens = self.tokenizer(list(texts), return_tensors="pt", padding=True)
translate_tokens = self.model.generate(**tokens)
return [self.tokenizer.decode(t, skip_special_tokens=True) for t in translate_tokens]

class WebRequestHandler(BaseHTTPRequestHandler):
@cached_property
def url(self):
return urlparse(self.path)

@cached_property
def query_data(self):
return dict(parse_qsl(self.url.query))

@cached_property
def translate_data(self):
text = self.query_data['text']
from_code = self.query_data['from']
to_code = self.query_data['to']

# Use Argos if Language Package Exists
if Path(f"{PATH}/translator/argos_models/translate-{from_code}_{to_code}.argosmodel").exists():
translatedText = argostranslate.translate.translate(text, from_code, to_code)
return translatedText
# Use Marian if Language Package Exists in Marian but not Argos
elif Path(f"{PATH}/translator/marian_models/opus-mt-{from_code}-{to_code}").exists():
marian = MarianModel(from_code, to_code)
translatedText = marian.translate([text])
return translatedText[0]
# Use Argos "English in the Middle" if not in Argos and Marian by Default
elif (Path(f"{PATH}/translator/argos_models/translate-{from_code}_en.argosmodel").exists() and \
Path(f"{PATH}/translator/argos_models/translate-{to_code}_en.argosmodel").exists()) or \
(Path(f"{PATH}/translator/argos_models/translate-en_{from_code}.argosmodel").exists() and \
Path(f"{PATH}/translator/argos_models/translate-en_{to_code}.argosmodel").exists()):
translatedText = argostranslate.translate.translate(text, from_code, to_code)
return translatedText
# If a package doesn't exist
else:
return "Translation Unavailable:" + from_code + to_code

def do_GET(self):
self.send_response(200)
self.send_header("Content-Type", "application/json")
self.end_headers()
self.wfile.write(self.get_response().encode("utf-8"))

def get_response(self):
return json.dumps(
{
"translate_data" : self.translate_data if self.query_data else "",
},
ensure_ascii=False
)


def port_open(port):
#connect_ex returns 0 if it connects to a socket meaning port is closed
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
return s.connect_ex(('localhost', port)) != 0

def start_server(port):
try:
server = HTTPServer(("127.0.0.1", port), WebRequestHandler)
server.timeout = TIMEOUT
server.handle_timeout = lambda: (_ for _ in ()).throw(TimeoutError())
print(f"Serving at port: {port}", file=sys.stderr)
print(f"Server started at {time.strftime('%I:%M')} with timeout: {TIMEOUT} seconds", file=sys.stderr)
while(True): server.handle_request()
except TimeoutError:
print("Translation server timed out")
sys.exit()

if __name__ == "__main__":
install_packages()
for port in PORTS:
if(port_open(port)):
start_server(port)


0 comments on commit 348f0fc

Please sign in to comment.