diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index 1faf672d..9c4a3f05 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -15,9 +15,22 @@ jobs: name: pypi url: https://pypi.org/project/deepparse/ permissions: - id-token: write + id-token: write + steps: - # retrieve your distributions here + - uses: actions/checkout@v3 + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: "3.11" + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install setuptools wheel + + - name: Build and publish + run: + DEEPPARSE_RELEASE_BUILD=1 python setup.py sdist bdist_wheel - - name: Publish package distributions to PyPI - uses: pypa/gh-action-pypi-publish@release/v1 \ No newline at end of file + - name: Publish package distributions to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index 8d9e47b8..1ca798c1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -356,4 +356,12 @@ - Fix Sentry version error in Docker Image. +## 0.9.12 + +- Bug-fix the call to the BPEmb class instead of the BPEmbBaseURLWrapperBugFix to fix the download URL in `download_models`. + +## 0.9.13 + +- Update Gensim version in setup to allow for the installation of recent Scipy versions. + ## dev \ No newline at end of file diff --git a/deepparse/__init__.py b/deepparse/__init__.py index 54ba23f1..23a31899 100644 --- a/deepparse/__init__.py +++ b/deepparse/__init__.py @@ -1,4 +1,5 @@ # pylint: disable=wildcard-import +from .bpemb_url_bug_fix import * from .download_tools import * from .validations import * from .version import __version__ diff --git a/deepparse/bpemb_url_bug_fix.py b/deepparse/bpemb_url_bug_fix.py new file mode 100644 index 00000000..dd9476eb --- /dev/null +++ b/deepparse/bpemb_url_bug_fix.py @@ -0,0 +1,13 @@ +""" +Due to an error in the BPEmb base URL to download the weights, and since the authors and maintainer do not respond or +seems to maintain the project; we use a wrapper to bug-fix the URL to change it. +However, the wrapper must be placed here due to circular import. +""" + +from bpemb import BPEmb + + +class BPEmbBaseURLWrapperBugFix(BPEmb): + def __init__(self, **kwargs): + self.base_url = "https://bpemb.h-its.org/multi/" + super().__init__(**kwargs) diff --git a/deepparse/download_tools.py b/deepparse/download_tools.py index 035c3a47..0355542e 100644 --- a/deepparse/download_tools.py +++ b/deepparse/download_tools.py @@ -8,11 +8,11 @@ from urllib.request import urlopen import requests -from bpemb import BPEmb from fasttext.FastText import _FastText from requests import HTTPError from urllib3.exceptions import MaxRetryError +from .bpemb_url_bug_fix import BPEmbBaseURLWrapperBugFix from .errors.server_error import ServerError BASE_URL = "https://graal.ift.ulaval.ca/public/deepparse/{}.{}" @@ -127,7 +127,7 @@ def download_model( elif model_type == "fasttext-light": download_fasttext_magnitude_embeddings(cache_dir=saving_cache_path) elif "bpemb" in model_type: - BPEmb( + BPEmbBaseURLWrapperBugFix( lang="multi", vs=100000, dim=300, cache_dir=saving_cache_path ) # The class manages the download of the pretrained words embedding diff --git a/deepparse/embeddings_models/bpemb_embeddings_model.py b/deepparse/embeddings_models/bpemb_embeddings_model.py index 4697b175..4761de94 100644 --- a/deepparse/embeddings_models/bpemb_embeddings_model.py +++ b/deepparse/embeddings_models/bpemb_embeddings_model.py @@ -3,19 +3,13 @@ from pathlib import Path import requests -from bpemb import BPEmb from numpy.core.multiarray import ndarray from urllib3.exceptions import InsecureRequestWarning +from ..bpemb_url_bug_fix import BPEmbBaseURLWrapperBugFix from .embeddings_model import EmbeddingsModel -class BPEmbBaseURLWrapperBugFix(BPEmb): - def __init__(self, **kwargs): - self.base_url = "https://bpemb.h-its.org/multi/" - super().__init__(**kwargs) - - class BPEmbEmbeddingsModel(EmbeddingsModel): """ BPEmb embeddings network from `BPEmb: Tokenization-free Pre-trained Subword Embeddings in 275 Languages diff --git a/requirements.txt b/requirements.txt index f93a8e98..aa294ec7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,12 +1,12 @@ torch bpemb numpy<2.0.0 -scipy<=1.10.1 +scipy requests pymagnitude-light colorama>=0.4.3 poutyne -gensim>=4.2.0 +gensim>=4.3.3 fasttext-wheel pandas urllib3 diff --git a/setup.cfg b/setup.cfg index e20fbfde..6e787448 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [metadata] -description-file = README.md +description_file = README.md [options.entry_points] console_scripts = diff --git a/setup.py b/setup.py index 815e8621..12f39d70 100644 --- a/setup.py +++ b/setup.py @@ -72,8 +72,8 @@ def main(): "numpy<2.0.0", "torch", "bpemb", - "scipy<=1.10.1", - "gensim>=4.0.0", + "scipy", + "gensim>=4.3.3", "requests", "fasttext-wheel", "pymagnitude-light", @@ -90,7 +90,7 @@ def main(): "colorama": "colorama", "app": ["fastapi[all]>=0.109.1", "uvicorn==0.22.0", "sentry-sdk[fastapi]>=2.0.0", "python-decouple==3.8"], "all": [ - "colorama>", # colorama + "colorama", # colorama "fastapi[all]>=0.109.1", # app requirements "uvicorn==0.22.0", "sentry-sdk[fastapi]>=2.0.0", diff --git a/tests/cli/test_download_model.py b/tests/cli/test_download_model.py index 8df88984..efb824dc 100644 --- a/tests/cli/test_download_model.py +++ b/tests/cli/test_download_model.py @@ -60,7 +60,7 @@ def test_givenAFasttextMagnitudeDownload_whenModelIsNotLocal_thenDownloadEmbeddi @patch("deepparse.download_tools.download_weights") def test_givenABPembDownload_whenModelIsNotLocal_thenDownloadEmbeddings(self, weights_download_mock): with patch("deepparse.download_tools.CACHE_PATH", self.fake_cache_dir): - with patch("deepparse.download_tools.BPEmb") as downloader: + with patch("deepparse.download_tools.BPEmbBaseURLWrapperBugFix") as downloader: download_model_cli_main([self.a_bpemb_model_type]) downloader.assert_called() @@ -95,7 +95,7 @@ def test_givenAFasttextLightDownload_whenModelIsNotLocal_thenDownloadWeights(sel downloader.assert_called() downloader.assert_any_call(self.a_fasttext_light_model_file_name, saving_dir=self.fake_cache_dir) - @patch("deepparse.download_tools.BPEmb") + @patch("deepparse.download_tools.BPEmbBaseURLWrapperBugFix") def test_givenABPembDownload_whenModelIsNotLocal_thenDownloadWeights(self, download_embeddings_mock): with patch("deepparse.download_tools.CACHE_PATH", self.fake_cache_dir): with patch("deepparse.download_tools.download_weights") as downloader: @@ -104,7 +104,7 @@ def test_givenABPembDownload_whenModelIsNotLocal_thenDownloadWeights(self, downl downloader.assert_called() downloader.assert_any_call(self.a_bpemb_model_type, saving_dir=self.fake_cache_dir) - @patch("deepparse.download_tools.BPEmb") + @patch("deepparse.download_tools.BPEmbBaseURLWrapperBugFix") def test_givenABPembAttDownload_whenModelIsNotLocal_thenDownloadWeights(self, download_embeddings_mock): with patch("deepparse.download_tools.CACHE_PATH", self.fake_cache_dir): with patch("deepparse.download_tools.download_weights") as downloader: @@ -139,7 +139,7 @@ def test_givenAFasttextLightDownload_whenModelIsLocalButNotLatest_thenDownloadWe downloader.assert_called() downloader.assert_any_call(self.a_fasttext_light_model_file_name, saving_dir=self.fake_cache_dir) - @patch("deepparse.download_tools.BPEmb") + @patch("deepparse.download_tools.BPEmbBaseURLWrapperBugFix") @patch("deepparse.download_tools.os.path.isfile", return_value=True) @patch("deepparse.download_tools.latest_version", return_value=False) # not the latest version def test_givenABPembDownload_whenModelIsLocalButNotLatest_thenDownloadWeights( @@ -179,7 +179,7 @@ def test_givenAFasttextLightDownload_whenModelIsLocalAndGoodVersion_thenDoNoting downloader.assert_not_called() - @patch("deepparse.download_tools.BPEmb") + @patch("deepparse.download_tools.BPEmbBaseURLWrapperBugFix") @patch("deepparse.download_tools.os.path.isfile", return_value=True) @patch("deepparse.download_tools.latest_version", return_value=True) # the latest version def test_givenABPembDownload_whenModelIsLocalAndGoodVersion_thenDoNoting( @@ -218,7 +218,7 @@ def test_givenAFasttextLightDownload_whenModelIsNotLocalButNotLatest_thenDownloa downloader.assert_called() downloader.assert_any_call(self.a_fasttext_light_model_file_name, saving_dir=self.fake_cache_dir) - @patch("deepparse.download_tools.BPEmb") + @patch("deepparse.download_tools.BPEmbBaseURLWrapperBugFix") @patch("deepparse.download_tools.os.path.isfile", side_effect=[False, True]) # no version file in local def test_givenABPembDownload_whenModelIsNotLocalButNotLatest_thenDownloadWeights( self, download_embeddings_mock, os_is_file_mock diff --git a/tests/cli/test_download_models.py b/tests/cli/test_download_models.py index 114fcd84..26a967af 100644 --- a/tests/cli/test_download_models.py +++ b/tests/cli/test_download_models.py @@ -31,7 +31,7 @@ def tearDown(self) -> None: @patch("deepparse.download_tools.download_weights") @patch("deepparse.download_tools.download_fasttext_embeddings") @patch("deepparse.download_tools.download_fasttext_magnitude_embeddings") - @patch("deepparse.download_tools.BPEmb") + @patch("deepparse.download_tools.BPEmbBaseURLWrapperBugFix") def test_givenADownloadAllModels_whenModelsAreNotLocal_thenDownloadAllModels( self, BPEmb_mock, diff --git a/tests/test_download_tools.py b/tests/test_download_tools.py index 202c7c89..9469fbe2 100644 --- a/tests/test_download_tools.py +++ b/tests/test_download_tools.py @@ -352,7 +352,7 @@ def tearDown(self) -> None: @patch("deepparse.download_tools.download_weights") @patch("deepparse.download_tools.download_fasttext_embeddings") @patch("deepparse.download_tools.download_fasttext_magnitude_embeddings") - @patch("deepparse.download_tools.BPEmb") + @patch("deepparse.download_tools.BPEmbBaseURLWrapperBugFix") def test_givenADownloadAllModels_whenModelsAreNotLocal_thenDownloadAllModels( self, BPEmb_mock, @@ -428,7 +428,7 @@ def test_givenAFasttextMagnitudeDownload_whenModelIsNotLocal_thenDownloadEmbeddi @patch("deepparse.download_tools.download_weights") def test_givenABPembDownload_whenModelIsNotLocal_thenDownloadEmbeddings(self, weights_download_mock): with patch("deepparse.download_tools.CACHE_PATH", self.fake_cache_dir): - with patch("deepparse.download_tools.BPEmb") as downloader: + with patch("deepparse.download_tools.BPEmbBaseURLWrapperBugFix") as downloader: download_model(self.a_bpemb_model_type) downloader.assert_called() @@ -463,7 +463,7 @@ def test_givenAFasttextLightDownload_whenModelIsNotLocal_thenDownloadWeights(sel downloader.assert_called() downloader.assert_any_call(self.a_fasttext_light_model_file_name, saving_dir=self.fake_cache_dir) - @patch("deepparse.download_tools.BPEmb") + @patch("deepparse.download_tools.BPEmbBaseURLWrapperBugFix") def test_givenABPembDownload_whenModelIsNotLocal_thenDownloadWeights(self, download_embeddings_mock): with patch("deepparse.download_tools.CACHE_PATH", self.fake_cache_dir): with patch("deepparse.download_tools.download_weights") as downloader: @@ -472,7 +472,7 @@ def test_givenABPembDownload_whenModelIsNotLocal_thenDownloadWeights(self, downl downloader.assert_called() downloader.assert_any_call(self.a_bpemb_model_type, saving_dir=self.fake_cache_dir) - @patch("deepparse.download_tools.BPEmb") + @patch("deepparse.download_tools.BPEmbBaseURLWrapperBugFix") def test_givenABPembAttDownload_whenModelIsNotLocal_thenDownloadWeights(self, download_embeddings_mock): with patch("deepparse.download_tools.CACHE_PATH", self.fake_cache_dir): with patch("deepparse.download_tools.download_weights") as downloader: @@ -507,7 +507,7 @@ def test_givenAFasttextLightDownload_whenModelIsLocalButNotLatest_thenDownloadWe downloader.assert_called() downloader.assert_any_call(self.a_fasttext_light_model_file_name, saving_dir=self.fake_cache_dir) - @patch("deepparse.download_tools.BPEmb") + @patch("deepparse.download_tools.BPEmbBaseURLWrapperBugFix") @patch("deepparse.download_tools.os.path.isfile", return_value=True) @patch("deepparse.download_tools.latest_version", return_value=False) # not the latest version def test_givenABPembDownload_whenModelIsLocalButNotLatest_thenDownloadWeights( @@ -547,7 +547,7 @@ def test_givenAFasttextLightDownload_whenModelIsLocalAndGoodVersion_thenDoNoting downloader.assert_not_called() - @patch("deepparse.download_tools.BPEmb") + @patch("deepparse.download_tools.BPEmbBaseURLWrapperBugFix") @patch("deepparse.download_tools.os.path.isfile", return_value=True) @patch("deepparse.download_tools.latest_version", return_value=True) # the latest version def test_givenABPembDownload_whenModelIsLocalAndGoodVersion_thenDoNoting( @@ -586,7 +586,7 @@ def test_givenAFasttextLightDownload_whenModelIsNotLocalButNotLatest_thenDownloa downloader.assert_called() downloader.assert_any_call(self.a_fasttext_light_model_file_name, saving_dir=self.fake_cache_dir) - @patch("deepparse.download_tools.BPEmb") + @patch("deepparse.download_tools.BPEmbBaseURLWrapperBugFix") @patch("deepparse.download_tools.os.path.isfile", side_effect=[False, True]) # no version file in local def test_givenABPembDownload_whenModelIsNotLocalButNotLatest_thenDownloadWeights( self, download_embeddings_mock, os_is_file_mock diff --git a/version.txt b/version.txt index 6889a311..6af8ded7 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -0.9.11 \ No newline at end of file +0.9.13 \ No newline at end of file