From cf10bf9dc438cdcbe6383416e4b5df8f37027ad4 Mon Sep 17 00:00:00 2001 From: davebulaval Date: Mon, 8 Jul 2024 22:51:17 -0400 Subject: [PATCH 1/6] fix setup tools deprecation warnings --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index e20fbfde..6e787448 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [metadata] -description-file = README.md +description_file = README.md [options.entry_points] console_scripts = From 2e52c67d11b21543cc3c431987e56cd0e7e4fc11 Mon Sep 17 00:00:00 2001 From: davebulaval Date: Mon, 8 Jul 2024 22:53:05 -0400 Subject: [PATCH 2/6] fix problem with dependencies build in setup --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 815e8621..77eb0562 100644 --- a/setup.py +++ b/setup.py @@ -90,7 +90,7 @@ def main(): "colorama": "colorama", "app": ["fastapi[all]>=0.109.1", "uvicorn==0.22.0", "sentry-sdk[fastapi]>=2.0.0", "python-decouple==3.8"], "all": [ - "colorama>", # colorama + "colorama", # colorama "fastapi[all]>=0.109.1", # app requirements "uvicorn==0.22.0", "sentry-sdk[fastapi]>=2.0.0", From bd04738a6caa7b091a4f3047d010e327f93a42be Mon Sep 17 00:00:00 2001 From: davebulaval Date: Tue, 9 Jul 2024 08:48:32 -0400 Subject: [PATCH 3/6] fix pypi workflow problem --- .github/workflows/python-publish.yml | 21 +++++++++++++++++---- deepparse/download_tools.py | 4 ++-- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index 1faf672d..4c1bd4ef 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -15,9 +15,22 @@ jobs: name: pypi url: https://pypi.org/project/deepparse/ permissions: - id-token: write + id-token: write + steps: - # retrieve your distributions here + - uses: actions/checkout@v3 + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: "3.11" + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install setuptools wheel + + - name: Build and publish + run: + RISC_RELEASE_BUILD=1 python setup.py sdist bdist_wheel - - name: Publish package distributions to PyPI - uses: pypa/gh-action-pypi-publish@release/v1 \ No newline at end of file + - name: Publish package distributions to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 \ No newline at end of file diff --git a/deepparse/download_tools.py b/deepparse/download_tools.py index 035c3a47..ca1f0a64 100644 --- a/deepparse/download_tools.py +++ b/deepparse/download_tools.py @@ -8,11 +8,11 @@ from urllib.request import urlopen import requests -from bpemb import BPEmb from fasttext.FastText import _FastText from requests import HTTPError from urllib3.exceptions import MaxRetryError +from .embeddings_models import BPEmbBaseURLWrapperBugFix from .errors.server_error import ServerError BASE_URL = "https://graal.ift.ulaval.ca/public/deepparse/{}.{}" @@ -127,7 +127,7 @@ def download_model( elif model_type == "fasttext-light": download_fasttext_magnitude_embeddings(cache_dir=saving_cache_path) elif "bpemb" in model_type: - BPEmb( + BPEmbBaseURLWrapperBugFix( lang="multi", vs=100000, dim=300, cache_dir=saving_cache_path ) # The class manages the download of the pretrained words embedding From e44a7ce5a8e329facd2dd632590c23cc66bb33f4 Mon Sep 17 00:00:00 2001 From: davebulaval Date: Tue, 9 Jul 2024 09:04:15 -0400 Subject: [PATCH 4/6] fix the use of BPEmb in download models --- CHANGELOG.md | 4 ++++ deepparse/__init__.py | 1 + deepparse/bpemb_url_bug_fix.py | 13 +++++++++++++ deepparse/download_tools.py | 2 +- .../embeddings_models/bpemb_embeddings_model.py | 8 +------- tests/cli/test_download_model.py | 12 ++++++------ tests/cli/test_download_models.py | 2 +- tests/test_download_tools.py | 14 +++++++------- version.txt | 2 +- 9 files changed, 35 insertions(+), 23 deletions(-) create mode 100644 deepparse/bpemb_url_bug_fix.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 8d9e47b8..2034fd23 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -356,4 +356,8 @@ - Fix Sentry version error in Docker Image. +## 0.9.12 + +- Bug-fix the call to the BPEmb class instead of the BPEmbBaseURLWrapperBugFix to fix the download URL in `download_models`. + ## dev \ No newline at end of file diff --git a/deepparse/__init__.py b/deepparse/__init__.py index 54ba23f1..23a31899 100644 --- a/deepparse/__init__.py +++ b/deepparse/__init__.py @@ -1,4 +1,5 @@ # pylint: disable=wildcard-import +from .bpemb_url_bug_fix import * from .download_tools import * from .validations import * from .version import __version__ diff --git a/deepparse/bpemb_url_bug_fix.py b/deepparse/bpemb_url_bug_fix.py new file mode 100644 index 00000000..dd9476eb --- /dev/null +++ b/deepparse/bpemb_url_bug_fix.py @@ -0,0 +1,13 @@ +""" +Due to an error in the BPEmb base URL to download the weights, and since the authors and maintainer do not respond or +seems to maintain the project; we use a wrapper to bug-fix the URL to change it. +However, the wrapper must be placed here due to circular import. +""" + +from bpemb import BPEmb + + +class BPEmbBaseURLWrapperBugFix(BPEmb): + def __init__(self, **kwargs): + self.base_url = "https://bpemb.h-its.org/multi/" + super().__init__(**kwargs) diff --git a/deepparse/download_tools.py b/deepparse/download_tools.py index ca1f0a64..0355542e 100644 --- a/deepparse/download_tools.py +++ b/deepparse/download_tools.py @@ -12,7 +12,7 @@ from requests import HTTPError from urllib3.exceptions import MaxRetryError -from .embeddings_models import BPEmbBaseURLWrapperBugFix +from .bpemb_url_bug_fix import BPEmbBaseURLWrapperBugFix from .errors.server_error import ServerError BASE_URL = "https://graal.ift.ulaval.ca/public/deepparse/{}.{}" diff --git a/deepparse/embeddings_models/bpemb_embeddings_model.py b/deepparse/embeddings_models/bpemb_embeddings_model.py index 4697b175..4761de94 100644 --- a/deepparse/embeddings_models/bpemb_embeddings_model.py +++ b/deepparse/embeddings_models/bpemb_embeddings_model.py @@ -3,19 +3,13 @@ from pathlib import Path import requests -from bpemb import BPEmb from numpy.core.multiarray import ndarray from urllib3.exceptions import InsecureRequestWarning +from ..bpemb_url_bug_fix import BPEmbBaseURLWrapperBugFix from .embeddings_model import EmbeddingsModel -class BPEmbBaseURLWrapperBugFix(BPEmb): - def __init__(self, **kwargs): - self.base_url = "https://bpemb.h-its.org/multi/" - super().__init__(**kwargs) - - class BPEmbEmbeddingsModel(EmbeddingsModel): """ BPEmb embeddings network from `BPEmb: Tokenization-free Pre-trained Subword Embeddings in 275 Languages diff --git a/tests/cli/test_download_model.py b/tests/cli/test_download_model.py index 8df88984..efb824dc 100644 --- a/tests/cli/test_download_model.py +++ b/tests/cli/test_download_model.py @@ -60,7 +60,7 @@ def test_givenAFasttextMagnitudeDownload_whenModelIsNotLocal_thenDownloadEmbeddi @patch("deepparse.download_tools.download_weights") def test_givenABPembDownload_whenModelIsNotLocal_thenDownloadEmbeddings(self, weights_download_mock): with patch("deepparse.download_tools.CACHE_PATH", self.fake_cache_dir): - with patch("deepparse.download_tools.BPEmb") as downloader: + with patch("deepparse.download_tools.BPEmbBaseURLWrapperBugFix") as downloader: download_model_cli_main([self.a_bpemb_model_type]) downloader.assert_called() @@ -95,7 +95,7 @@ def test_givenAFasttextLightDownload_whenModelIsNotLocal_thenDownloadWeights(sel downloader.assert_called() downloader.assert_any_call(self.a_fasttext_light_model_file_name, saving_dir=self.fake_cache_dir) - @patch("deepparse.download_tools.BPEmb") + @patch("deepparse.download_tools.BPEmbBaseURLWrapperBugFix") def test_givenABPembDownload_whenModelIsNotLocal_thenDownloadWeights(self, download_embeddings_mock): with patch("deepparse.download_tools.CACHE_PATH", self.fake_cache_dir): with patch("deepparse.download_tools.download_weights") as downloader: @@ -104,7 +104,7 @@ def test_givenABPembDownload_whenModelIsNotLocal_thenDownloadWeights(self, downl downloader.assert_called() downloader.assert_any_call(self.a_bpemb_model_type, saving_dir=self.fake_cache_dir) - @patch("deepparse.download_tools.BPEmb") + @patch("deepparse.download_tools.BPEmbBaseURLWrapperBugFix") def test_givenABPembAttDownload_whenModelIsNotLocal_thenDownloadWeights(self, download_embeddings_mock): with patch("deepparse.download_tools.CACHE_PATH", self.fake_cache_dir): with patch("deepparse.download_tools.download_weights") as downloader: @@ -139,7 +139,7 @@ def test_givenAFasttextLightDownload_whenModelIsLocalButNotLatest_thenDownloadWe downloader.assert_called() downloader.assert_any_call(self.a_fasttext_light_model_file_name, saving_dir=self.fake_cache_dir) - @patch("deepparse.download_tools.BPEmb") + @patch("deepparse.download_tools.BPEmbBaseURLWrapperBugFix") @patch("deepparse.download_tools.os.path.isfile", return_value=True) @patch("deepparse.download_tools.latest_version", return_value=False) # not the latest version def test_givenABPembDownload_whenModelIsLocalButNotLatest_thenDownloadWeights( @@ -179,7 +179,7 @@ def test_givenAFasttextLightDownload_whenModelIsLocalAndGoodVersion_thenDoNoting downloader.assert_not_called() - @patch("deepparse.download_tools.BPEmb") + @patch("deepparse.download_tools.BPEmbBaseURLWrapperBugFix") @patch("deepparse.download_tools.os.path.isfile", return_value=True) @patch("deepparse.download_tools.latest_version", return_value=True) # the latest version def test_givenABPembDownload_whenModelIsLocalAndGoodVersion_thenDoNoting( @@ -218,7 +218,7 @@ def test_givenAFasttextLightDownload_whenModelIsNotLocalButNotLatest_thenDownloa downloader.assert_called() downloader.assert_any_call(self.a_fasttext_light_model_file_name, saving_dir=self.fake_cache_dir) - @patch("deepparse.download_tools.BPEmb") + @patch("deepparse.download_tools.BPEmbBaseURLWrapperBugFix") @patch("deepparse.download_tools.os.path.isfile", side_effect=[False, True]) # no version file in local def test_givenABPembDownload_whenModelIsNotLocalButNotLatest_thenDownloadWeights( self, download_embeddings_mock, os_is_file_mock diff --git a/tests/cli/test_download_models.py b/tests/cli/test_download_models.py index 114fcd84..26a967af 100644 --- a/tests/cli/test_download_models.py +++ b/tests/cli/test_download_models.py @@ -31,7 +31,7 @@ def tearDown(self) -> None: @patch("deepparse.download_tools.download_weights") @patch("deepparse.download_tools.download_fasttext_embeddings") @patch("deepparse.download_tools.download_fasttext_magnitude_embeddings") - @patch("deepparse.download_tools.BPEmb") + @patch("deepparse.download_tools.BPEmbBaseURLWrapperBugFix") def test_givenADownloadAllModels_whenModelsAreNotLocal_thenDownloadAllModels( self, BPEmb_mock, diff --git a/tests/test_download_tools.py b/tests/test_download_tools.py index 202c7c89..9469fbe2 100644 --- a/tests/test_download_tools.py +++ b/tests/test_download_tools.py @@ -352,7 +352,7 @@ def tearDown(self) -> None: @patch("deepparse.download_tools.download_weights") @patch("deepparse.download_tools.download_fasttext_embeddings") @patch("deepparse.download_tools.download_fasttext_magnitude_embeddings") - @patch("deepparse.download_tools.BPEmb") + @patch("deepparse.download_tools.BPEmbBaseURLWrapperBugFix") def test_givenADownloadAllModels_whenModelsAreNotLocal_thenDownloadAllModels( self, BPEmb_mock, @@ -428,7 +428,7 @@ def test_givenAFasttextMagnitudeDownload_whenModelIsNotLocal_thenDownloadEmbeddi @patch("deepparse.download_tools.download_weights") def test_givenABPembDownload_whenModelIsNotLocal_thenDownloadEmbeddings(self, weights_download_mock): with patch("deepparse.download_tools.CACHE_PATH", self.fake_cache_dir): - with patch("deepparse.download_tools.BPEmb") as downloader: + with patch("deepparse.download_tools.BPEmbBaseURLWrapperBugFix") as downloader: download_model(self.a_bpemb_model_type) downloader.assert_called() @@ -463,7 +463,7 @@ def test_givenAFasttextLightDownload_whenModelIsNotLocal_thenDownloadWeights(sel downloader.assert_called() downloader.assert_any_call(self.a_fasttext_light_model_file_name, saving_dir=self.fake_cache_dir) - @patch("deepparse.download_tools.BPEmb") + @patch("deepparse.download_tools.BPEmbBaseURLWrapperBugFix") def test_givenABPembDownload_whenModelIsNotLocal_thenDownloadWeights(self, download_embeddings_mock): with patch("deepparse.download_tools.CACHE_PATH", self.fake_cache_dir): with patch("deepparse.download_tools.download_weights") as downloader: @@ -472,7 +472,7 @@ def test_givenABPembDownload_whenModelIsNotLocal_thenDownloadWeights(self, downl downloader.assert_called() downloader.assert_any_call(self.a_bpemb_model_type, saving_dir=self.fake_cache_dir) - @patch("deepparse.download_tools.BPEmb") + @patch("deepparse.download_tools.BPEmbBaseURLWrapperBugFix") def test_givenABPembAttDownload_whenModelIsNotLocal_thenDownloadWeights(self, download_embeddings_mock): with patch("deepparse.download_tools.CACHE_PATH", self.fake_cache_dir): with patch("deepparse.download_tools.download_weights") as downloader: @@ -507,7 +507,7 @@ def test_givenAFasttextLightDownload_whenModelIsLocalButNotLatest_thenDownloadWe downloader.assert_called() downloader.assert_any_call(self.a_fasttext_light_model_file_name, saving_dir=self.fake_cache_dir) - @patch("deepparse.download_tools.BPEmb") + @patch("deepparse.download_tools.BPEmbBaseURLWrapperBugFix") @patch("deepparse.download_tools.os.path.isfile", return_value=True) @patch("deepparse.download_tools.latest_version", return_value=False) # not the latest version def test_givenABPembDownload_whenModelIsLocalButNotLatest_thenDownloadWeights( @@ -547,7 +547,7 @@ def test_givenAFasttextLightDownload_whenModelIsLocalAndGoodVersion_thenDoNoting downloader.assert_not_called() - @patch("deepparse.download_tools.BPEmb") + @patch("deepparse.download_tools.BPEmbBaseURLWrapperBugFix") @patch("deepparse.download_tools.os.path.isfile", return_value=True) @patch("deepparse.download_tools.latest_version", return_value=True) # the latest version def test_givenABPembDownload_whenModelIsLocalAndGoodVersion_thenDoNoting( @@ -586,7 +586,7 @@ def test_givenAFasttextLightDownload_whenModelIsNotLocalButNotLatest_thenDownloa downloader.assert_called() downloader.assert_any_call(self.a_fasttext_light_model_file_name, saving_dir=self.fake_cache_dir) - @patch("deepparse.download_tools.BPEmb") + @patch("deepparse.download_tools.BPEmbBaseURLWrapperBugFix") @patch("deepparse.download_tools.os.path.isfile", side_effect=[False, True]) # no version file in local def test_givenABPembDownload_whenModelIsNotLocalButNotLatest_thenDownloadWeights( self, download_embeddings_mock, os_is_file_mock diff --git a/version.txt b/version.txt index 6889a311..bf1ba0c1 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -0.9.11 \ No newline at end of file +0.9.12 \ No newline at end of file From 6bb90d43c754ef5d2c4aeb7630c5ce588652f335 Mon Sep 17 00:00:00 2001 From: davebulaval Date: Tue, 9 Jul 2024 12:27:15 -0400 Subject: [PATCH 5/6] fix error in publish variable --- .github/workflows/python-publish.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index 4c1bd4ef..9c4a3f05 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -30,7 +30,7 @@ jobs: - name: Build and publish run: - RISC_RELEASE_BUILD=1 python setup.py sdist bdist_wheel + DEEPPARSE_RELEASE_BUILD=1 python setup.py sdist bdist_wheel - name: Publish package distributions to PyPI uses: pypa/gh-action-pypi-publish@release/v1 \ No newline at end of file From 559bad244db37d486576e51c99e69976180af538 Mon Sep 17 00:00:00 2001 From: Marouane Yassine <46830666+MAYAS3@users.noreply.github.com> Date: Thu, 12 Sep 2024 18:02:43 -0400 Subject: [PATCH 6/6] Setup update (#236) * update gensim version * Update changelog and version * Update gensim version in requirement.txt * Add dev section to changelog --- CHANGELOG.md | 4 ++++ requirements.txt | 4 ++-- setup.py | 4 ++-- version.txt | 2 +- 4 files changed, 9 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2034fd23..1ca798c1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -360,4 +360,8 @@ - Bug-fix the call to the BPEmb class instead of the BPEmbBaseURLWrapperBugFix to fix the download URL in `download_models`. +## 0.9.13 + +- Update Gensim version in setup to allow for the installation of recent Scipy versions. + ## dev \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index f93a8e98..aa294ec7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,12 +1,12 @@ torch bpemb numpy<2.0.0 -scipy<=1.10.1 +scipy requests pymagnitude-light colorama>=0.4.3 poutyne -gensim>=4.2.0 +gensim>=4.3.3 fasttext-wheel pandas urllib3 diff --git a/setup.py b/setup.py index 77eb0562..12f39d70 100644 --- a/setup.py +++ b/setup.py @@ -72,8 +72,8 @@ def main(): "numpy<2.0.0", "torch", "bpemb", - "scipy<=1.10.1", - "gensim>=4.0.0", + "scipy", + "gensim>=4.3.3", "requests", "fasttext-wheel", "pymagnitude-light", diff --git a/version.txt b/version.txt index bf1ba0c1..6af8ded7 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -0.9.12 \ No newline at end of file +0.9.13 \ No newline at end of file