Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🔶 Add asyncIO feature for optimization of batch_translate #202

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions deep_translator/async_requests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from typing import Optional

import aiohttp

from deep_translator.exceptions import RequestError, TooManyRequests


async def async_get_request(
session: aiohttp.ClientSession,
url: str,
params: Optional[dict] = None,
proxies: Optional[dict] = None,
):
async with session.get(url=url, params=params) as response:
if response.status == 429:
raise TooManyRequests()

if response.status != 200:
raise RequestError()

return await response.text()
32 changes: 32 additions & 0 deletions deep_translator/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,14 @@

__copyright__ = "Copyright (C) 2020 Nidhal Baccouri"

import asyncio
from abc import ABC, abstractmethod
from functools import lru_cache
from pathlib import Path
from typing import List, Optional, Union

import aiohttp
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You will need a try catch here when you make this depedency optional or maybe just add the import inside the function where this will be used. For an example, check out the docx or pypdf dependencies.


from deep_translator.constants import GOOGLE_LANGUAGES_TO_CODES
from deep_translator.exceptions import (
InvalidSourceOrTargetLanguage,
Expand Down Expand Up @@ -128,6 +132,23 @@ def translate(self, text: str, **kwargs) -> str:
"""
return NotImplemented("You need to implement the translate method!")

@abstractmethod
@lru_cache(maxsize=128)
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would be better if you make the cache_maxsize value in a separate global config.py file

async def _async_translate(
self, text: str, session: aiohttp.ClientSession, **kwargs
) -> str:
"""
translate a text using a async_translator under the hood and return
the translated text
@param text: text to translate
@param session: a network ClientSession object of anyiohttp
@param kwargs: additional arguments
@return: str
"""
return NotImplemented(
"You need to implement the _async_translate method!"
)

def _read_docx(self, f: str):
import docx2txt

Expand Down Expand Up @@ -181,3 +202,14 @@ def _translate_batch(self, batch: List[str], **kwargs) -> List[str]:
translated = self.translate(text, **kwargs)
arr.append(translated)
return arr

async def async_translate_batch(
self, batch: List[str], **kwargs
) -> List[str]:
if not batch:
raise Exception("Enter your text list that you want to translate")
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add this custom exception to exceptions.py just to keep everything consistent? Something like a NotValidInputBatch exception

async with aiohttp.ClientSession() as session:
translation_tasks = [
self._async_translate(text, session) for text in batch
]
return await asyncio.gather(*translation_tasks)
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you can get a ValueError here if translation_tasks is empty. You may want to add a check for that

8 changes: 8 additions & 0 deletions deep_translator/deepl.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
__copyright__ = "Copyright (C) 2020 Nidhal Baccouri"

from functools import lru_cache
from typing import List, Optional

import aiohttp
import requests

from deep_translator.base import BaseTranslator
Expand Down Expand Up @@ -87,6 +89,12 @@ def translate(self, text: str, **kwargs) -> str:
# Process and return the response.
return res["translations"][0]["text"]

@lru_cache(maxsize=None)
async def _async_translate(
self, text: str, session: aiohttp.ClientSession, **kwargs
):
...

def translate_file(self, path: str, **kwargs) -> str:
return self._translate_file(path, **kwargs)

Expand Down
53 changes: 53 additions & 0 deletions deep_translator/google.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,14 @@

__copyright__ = "Copyright (C) 2020 Nidhal Baccouri"

from functools import lru_cache
from typing import List, Optional

import aiohttp
import requests
from bs4 import BeautifulSoup

from deep_translator.async_requests import async_get_request
from deep_translator.base import BaseTranslator
from deep_translator.constants import BASE_URLS
from deep_translator.exceptions import (
Expand Down Expand Up @@ -120,3 +123,53 @@ def translate_batch(self, batch: List[str], **kwargs) -> List[str]:
@return: list of translations
"""
return self._translate_batch(batch, **kwargs)

@lru_cache(maxsize=None)
async def _async_translate(
self, text: str, session: aiohttp.ClientSession, **kwargs
):
if is_input_valid(text):
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since most of the code in this function is the same as the non-async translate, maybe you can find a way to make some parts reusable

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@nidhaloff I'll try to keep all the things in mind and fix it. Thanks for you detailed explanation.

text = text.strip()
if self._same_source_target() or is_empty(text):
return text
self._url_params["tl"] = self._target
self._url_params["sl"] = self._source

if self.payload_key:
self._url_params[self.payload_key] = text

response_text = await async_get_request(
session,
url=self._base_url,
params=self._url_params,
proxies=self.proxies,
)

soup = BeautifulSoup(response_text, "html.parser")

element = soup.find(self._element_tag, self._element_query)

if not element:
element = soup.find(self._element_tag, self._alt_element_query)
if not element:
raise TranslationNotFound(text)
if element.get_text(strip=True) == text.strip():
to_translate_alpha = "".join(
ch for ch in text.strip() if ch.isalnum()
)
translated_alpha = "".join(
ch for ch in element.get_text(strip=True) if ch.isalnum()
)
if (
to_translate_alpha
and translated_alpha
and to_translate_alpha == translated_alpha
):
self._url_params["tl"] = self._target
if "hl" not in self._url_params:
return text.strip()
del self._url_params["hl"]
return self.translate(text)

else:
return element.get_text(strip=True)
8 changes: 8 additions & 0 deletions deep_translator/libre.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@
"""
__copyright__ = "Copyright (C) 2020 Nidhal Baccouri"

from functools import lru_cache
from typing import List, Optional

import aiohttp
import requests

from deep_translator.base import BaseTranslator
Expand Down Expand Up @@ -95,6 +97,12 @@ def translate(self, text: str, **kwargs) -> str:
# Process and return the response.
return res["translatedText"]

@lru_cache(maxsize=None)
async def _async_translate(
self, text: str, session: aiohttp.ClientSession, **kwargs
):
...

def translate_file(self, path: str, **kwargs) -> str:
"""
translate directly from file
Expand Down
8 changes: 8 additions & 0 deletions deep_translator/linguee.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@

__copyright__ = "Copyright (C) 2020 Nidhal Baccouri"

from functools import lru_cache
from typing import List, Optional, Union

import aiohttp
import requests
from bs4 import BeautifulSoup
from requests.utils import requote_uri
Expand Down Expand Up @@ -98,6 +100,12 @@ def translate(

return filtered_elements if return_all else filtered_elements[0]

@lru_cache(maxsize=None)
async def _async_translate(
self, text: str, session: aiohttp.ClientSession, **kwargs
):
...

def translate_words(self, words: List[str], **kwargs) -> List[str]:
"""
translate a batch of words together by providing them in a list
Expand Down
8 changes: 8 additions & 0 deletions deep_translator/microsoft.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@

import logging
import sys
from functools import lru_cache
from typing import List, Optional

import aiohttp
import requests

from deep_translator.base import BaseTranslator
Expand Down Expand Up @@ -111,6 +113,12 @@ def translate(self, text: str, **kwargs) -> str:
]
return "\n".join(all_translations)

@lru_cache(maxsize=None)
async def _async_translate(
self, text: str, session: aiohttp.ClientSession, **kwargs
):
...

def translate_file(self, path: str, **kwargs) -> str:
"""
translate from a file
Expand Down
8 changes: 8 additions & 0 deletions deep_translator/mymemory.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@

__copyright__ = "Copyright (C) 2020 Nidhal Baccouri"

from functools import lru_cache
from typing import List, Optional, Union

import aiohttp
import requests

from deep_translator.base import BaseTranslator
Expand Down Expand Up @@ -93,6 +95,12 @@ def translate(
next_match = next(matches)
return next_match if not return_all else list(all_matches)

@lru_cache(maxsize=None)
async def _async_translate(
self, text: str, session: aiohttp.ClientSession, **kwargs
):
...

def translate_file(self, path: str, **kwargs) -> str:
"""
translate directly from file
Expand Down
8 changes: 8 additions & 0 deletions deep_translator/papago.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@
__copyright__ = "Copyright (C) 2020 Nidhal Baccouri"

import json
from functools import lru_cache
from typing import List, Optional

import aiohttp
import requests

from deep_translator.base import BaseTranslator
Expand Down Expand Up @@ -82,6 +84,12 @@ def translate(self, text: str, **kwargs) -> str:
translated_text = result.get("translatedText")
return translated_text

@lru_cache(maxsize=None)
async def _async_translate(
self, text: str, session: aiohttp.ClientSession, **kwargs
):
...

def translate_file(self, path: str, **kwargs) -> str:
"""
translate directly from file
Expand Down
8 changes: 8 additions & 0 deletions deep_translator/pons.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@

__copyright__ = "Copyright (C) 2020 Nidhal Baccouri"

from functools import lru_cache
from typing import List, Optional, Union

import aiohttp
import requests
from bs4 import BeautifulSoup
from requests.utils import requote_uri
Expand Down Expand Up @@ -100,6 +102,12 @@ def translate(

return word_list if return_all else word_list[0]

@lru_cache(maxsize=None)
async def _async_translate(
self, text: str, session: aiohttp.ClientSession, **kwargs
):
...

def translate_words(self, words: List[str], **kwargs) -> List[str]:
"""
translate a batch of words together by providing them in a list
Expand Down
8 changes: 8 additions & 0 deletions deep_translator/qcri.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
__copyright__ = "Copyright (C) 2020 Nidhal Baccouri"

from functools import lru_cache
from typing import List, Optional

import aiohttp
import requests

from deep_translator.base import BaseTranslator
Expand Down Expand Up @@ -95,6 +97,12 @@ def translate(self, text: str, **kwargs) -> str:
raise TranslationNotFound(text)
return translation

@lru_cache(maxsize=None)
async def _async_translate(
self, text: str, session: aiohttp.ClientSession, **kwargs
):
...

def translate_file(self, path: str, **kwargs) -> str:
return self._translate_file(path, **kwargs)

Expand Down
8 changes: 8 additions & 0 deletions deep_translator/yandex.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@

__copyright__ = "Copyright (C) 2020 Nidhal Baccouri"

from functools import lru_cache
from typing import List, Optional

import aiohttp
import requests

from deep_translator.base import BaseTranslator
Expand Down Expand Up @@ -139,6 +141,12 @@ def translate(

return response["text"]

@lru_cache(maxsize=None)
async def _async_translate(
self, text: str, session: aiohttp.ClientSession, **kwargs
):
...

def translate_file(self, path: str, **kwargs) -> str:
"""
translate from a file
Expand Down
Loading