Skip to content

Commit

Permalink
fix the use of BPEmb in download models
Browse files Browse the repository at this point in the history
  • Loading branch information
davebulaval committed Jul 9, 2024
1 parent bd04738 commit e44a7ce
Show file tree
Hide file tree
Showing 9 changed files with 35 additions and 23 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -356,4 +356,8 @@

- Fix Sentry version error in Docker Image.

## 0.9.12

- Bug-fix the call to the BPEmb class instead of the BPEmbBaseURLWrapperBugFix to fix the download URL in `download_models`.

## dev
1 change: 1 addition & 0 deletions deepparse/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# pylint: disable=wildcard-import
from .bpemb_url_bug_fix import *
from .download_tools import *
from .validations import *
from .version import __version__
Expand Down
13 changes: 13 additions & 0 deletions deepparse/bpemb_url_bug_fix.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
"""
Due to an error in the BPEmb base URL to download the weights, and since the authors and maintainer do not respond or
seems to maintain the project; we use a wrapper to bug-fix the URL to change it.
However, the wrapper must be placed here due to circular import.
"""

from bpemb import BPEmb


class BPEmbBaseURLWrapperBugFix(BPEmb):
def __init__(self, **kwargs):
self.base_url = "https://bpemb.h-its.org/multi/"
super().__init__(**kwargs)
2 changes: 1 addition & 1 deletion deepparse/download_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from requests import HTTPError
from urllib3.exceptions import MaxRetryError

from .embeddings_models import BPEmbBaseURLWrapperBugFix
from .bpemb_url_bug_fix import BPEmbBaseURLWrapperBugFix
from .errors.server_error import ServerError

BASE_URL = "https://graal.ift.ulaval.ca/public/deepparse/{}.{}"
Expand Down
8 changes: 1 addition & 7 deletions deepparse/embeddings_models/bpemb_embeddings_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,13 @@
from pathlib import Path

import requests
from bpemb import BPEmb
from numpy.core.multiarray import ndarray
from urllib3.exceptions import InsecureRequestWarning

from ..bpemb_url_bug_fix import BPEmbBaseURLWrapperBugFix
from .embeddings_model import EmbeddingsModel


class BPEmbBaseURLWrapperBugFix(BPEmb):
def __init__(self, **kwargs):
self.base_url = "https://bpemb.h-its.org/multi/"
super().__init__(**kwargs)


class BPEmbEmbeddingsModel(EmbeddingsModel):
"""
BPEmb embeddings network from `BPEmb: Tokenization-free Pre-trained Subword Embeddings in 275 Languages
Expand Down
12 changes: 6 additions & 6 deletions tests/cli/test_download_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def test_givenAFasttextMagnitudeDownload_whenModelIsNotLocal_thenDownloadEmbeddi
@patch("deepparse.download_tools.download_weights")
def test_givenABPembDownload_whenModelIsNotLocal_thenDownloadEmbeddings(self, weights_download_mock):
with patch("deepparse.download_tools.CACHE_PATH", self.fake_cache_dir):
with patch("deepparse.download_tools.BPEmb") as downloader:
with patch("deepparse.download_tools.BPEmbBaseURLWrapperBugFix") as downloader:
download_model_cli_main([self.a_bpemb_model_type])

downloader.assert_called()
Expand Down Expand Up @@ -95,7 +95,7 @@ def test_givenAFasttextLightDownload_whenModelIsNotLocal_thenDownloadWeights(sel
downloader.assert_called()
downloader.assert_any_call(self.a_fasttext_light_model_file_name, saving_dir=self.fake_cache_dir)

@patch("deepparse.download_tools.BPEmb")
@patch("deepparse.download_tools.BPEmbBaseURLWrapperBugFix")
def test_givenABPembDownload_whenModelIsNotLocal_thenDownloadWeights(self, download_embeddings_mock):
with patch("deepparse.download_tools.CACHE_PATH", self.fake_cache_dir):
with patch("deepparse.download_tools.download_weights") as downloader:
Expand All @@ -104,7 +104,7 @@ def test_givenABPembDownload_whenModelIsNotLocal_thenDownloadWeights(self, downl
downloader.assert_called()
downloader.assert_any_call(self.a_bpemb_model_type, saving_dir=self.fake_cache_dir)

@patch("deepparse.download_tools.BPEmb")
@patch("deepparse.download_tools.BPEmbBaseURLWrapperBugFix")
def test_givenABPembAttDownload_whenModelIsNotLocal_thenDownloadWeights(self, download_embeddings_mock):
with patch("deepparse.download_tools.CACHE_PATH", self.fake_cache_dir):
with patch("deepparse.download_tools.download_weights") as downloader:
Expand Down Expand Up @@ -139,7 +139,7 @@ def test_givenAFasttextLightDownload_whenModelIsLocalButNotLatest_thenDownloadWe
downloader.assert_called()
downloader.assert_any_call(self.a_fasttext_light_model_file_name, saving_dir=self.fake_cache_dir)

@patch("deepparse.download_tools.BPEmb")
@patch("deepparse.download_tools.BPEmbBaseURLWrapperBugFix")
@patch("deepparse.download_tools.os.path.isfile", return_value=True)
@patch("deepparse.download_tools.latest_version", return_value=False) # not the latest version
def test_givenABPembDownload_whenModelIsLocalButNotLatest_thenDownloadWeights(
Expand Down Expand Up @@ -179,7 +179,7 @@ def test_givenAFasttextLightDownload_whenModelIsLocalAndGoodVersion_thenDoNoting

downloader.assert_not_called()

@patch("deepparse.download_tools.BPEmb")
@patch("deepparse.download_tools.BPEmbBaseURLWrapperBugFix")
@patch("deepparse.download_tools.os.path.isfile", return_value=True)
@patch("deepparse.download_tools.latest_version", return_value=True) # the latest version
def test_givenABPembDownload_whenModelIsLocalAndGoodVersion_thenDoNoting(
Expand Down Expand Up @@ -218,7 +218,7 @@ def test_givenAFasttextLightDownload_whenModelIsNotLocalButNotLatest_thenDownloa
downloader.assert_called()
downloader.assert_any_call(self.a_fasttext_light_model_file_name, saving_dir=self.fake_cache_dir)

@patch("deepparse.download_tools.BPEmb")
@patch("deepparse.download_tools.BPEmbBaseURLWrapperBugFix")
@patch("deepparse.download_tools.os.path.isfile", side_effect=[False, True]) # no version file in local
def test_givenABPembDownload_whenModelIsNotLocalButNotLatest_thenDownloadWeights(
self, download_embeddings_mock, os_is_file_mock
Expand Down
2 changes: 1 addition & 1 deletion tests/cli/test_download_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def tearDown(self) -> None:
@patch("deepparse.download_tools.download_weights")
@patch("deepparse.download_tools.download_fasttext_embeddings")
@patch("deepparse.download_tools.download_fasttext_magnitude_embeddings")
@patch("deepparse.download_tools.BPEmb")
@patch("deepparse.download_tools.BPEmbBaseURLWrapperBugFix")
def test_givenADownloadAllModels_whenModelsAreNotLocal_thenDownloadAllModels(
self,
BPEmb_mock,
Expand Down
14 changes: 7 additions & 7 deletions tests/test_download_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -352,7 +352,7 @@ def tearDown(self) -> None:
@patch("deepparse.download_tools.download_weights")
@patch("deepparse.download_tools.download_fasttext_embeddings")
@patch("deepparse.download_tools.download_fasttext_magnitude_embeddings")
@patch("deepparse.download_tools.BPEmb")
@patch("deepparse.download_tools.BPEmbBaseURLWrapperBugFix")
def test_givenADownloadAllModels_whenModelsAreNotLocal_thenDownloadAllModels(
self,
BPEmb_mock,
Expand Down Expand Up @@ -428,7 +428,7 @@ def test_givenAFasttextMagnitudeDownload_whenModelIsNotLocal_thenDownloadEmbeddi
@patch("deepparse.download_tools.download_weights")
def test_givenABPembDownload_whenModelIsNotLocal_thenDownloadEmbeddings(self, weights_download_mock):
with patch("deepparse.download_tools.CACHE_PATH", self.fake_cache_dir):
with patch("deepparse.download_tools.BPEmb") as downloader:
with patch("deepparse.download_tools.BPEmbBaseURLWrapperBugFix") as downloader:
download_model(self.a_bpemb_model_type)

downloader.assert_called()
Expand Down Expand Up @@ -463,7 +463,7 @@ def test_givenAFasttextLightDownload_whenModelIsNotLocal_thenDownloadWeights(sel
downloader.assert_called()
downloader.assert_any_call(self.a_fasttext_light_model_file_name, saving_dir=self.fake_cache_dir)

@patch("deepparse.download_tools.BPEmb")
@patch("deepparse.download_tools.BPEmbBaseURLWrapperBugFix")
def test_givenABPembDownload_whenModelIsNotLocal_thenDownloadWeights(self, download_embeddings_mock):
with patch("deepparse.download_tools.CACHE_PATH", self.fake_cache_dir):
with patch("deepparse.download_tools.download_weights") as downloader:
Expand All @@ -472,7 +472,7 @@ def test_givenABPembDownload_whenModelIsNotLocal_thenDownloadWeights(self, downl
downloader.assert_called()
downloader.assert_any_call(self.a_bpemb_model_type, saving_dir=self.fake_cache_dir)

@patch("deepparse.download_tools.BPEmb")
@patch("deepparse.download_tools.BPEmbBaseURLWrapperBugFix")
def test_givenABPembAttDownload_whenModelIsNotLocal_thenDownloadWeights(self, download_embeddings_mock):
with patch("deepparse.download_tools.CACHE_PATH", self.fake_cache_dir):
with patch("deepparse.download_tools.download_weights") as downloader:
Expand Down Expand Up @@ -507,7 +507,7 @@ def test_givenAFasttextLightDownload_whenModelIsLocalButNotLatest_thenDownloadWe
downloader.assert_called()
downloader.assert_any_call(self.a_fasttext_light_model_file_name, saving_dir=self.fake_cache_dir)

@patch("deepparse.download_tools.BPEmb")
@patch("deepparse.download_tools.BPEmbBaseURLWrapperBugFix")
@patch("deepparse.download_tools.os.path.isfile", return_value=True)
@patch("deepparse.download_tools.latest_version", return_value=False) # not the latest version
def test_givenABPembDownload_whenModelIsLocalButNotLatest_thenDownloadWeights(
Expand Down Expand Up @@ -547,7 +547,7 @@ def test_givenAFasttextLightDownload_whenModelIsLocalAndGoodVersion_thenDoNoting

downloader.assert_not_called()

@patch("deepparse.download_tools.BPEmb")
@patch("deepparse.download_tools.BPEmbBaseURLWrapperBugFix")
@patch("deepparse.download_tools.os.path.isfile", return_value=True)
@patch("deepparse.download_tools.latest_version", return_value=True) # the latest version
def test_givenABPembDownload_whenModelIsLocalAndGoodVersion_thenDoNoting(
Expand Down Expand Up @@ -586,7 +586,7 @@ def test_givenAFasttextLightDownload_whenModelIsNotLocalButNotLatest_thenDownloa
downloader.assert_called()
downloader.assert_any_call(self.a_fasttext_light_model_file_name, saving_dir=self.fake_cache_dir)

@patch("deepparse.download_tools.BPEmb")
@patch("deepparse.download_tools.BPEmbBaseURLWrapperBugFix")
@patch("deepparse.download_tools.os.path.isfile", side_effect=[False, True]) # no version file in local
def test_givenABPembDownload_whenModelIsNotLocalButNotLatest_thenDownloadWeights(
self, download_embeddings_mock, os_is_file_mock
Expand Down
2 changes: 1 addition & 1 deletion version.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.9.11
0.9.12

0 comments on commit e44a7ce

Please sign in to comment.