Skip to content

Commit

Permalink
Merge branch 'main' of https://github.com/GRAAL-Research/deepparse in…
Browse files Browse the repository at this point in the history
…to stable
  • Loading branch information
MAYAS3 committed Sep 12, 2024
2 parents cc07b1c + 559bad2 commit 51029ac
Show file tree
Hide file tree
Showing 13 changed files with 63 additions and 34 deletions.
21 changes: 17 additions & 4 deletions .github/workflows/python-publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,22 @@ jobs:
name: pypi
url: https://pypi.org/project/deepparse/
permissions:
id-token: write
id-token: write

steps:
# retrieve your distributions here
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.11"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install setuptools wheel
- name: Build and publish
run:
DEEPPARSE_RELEASE_BUILD=1 python setup.py sdist bdist_wheel

- name: Publish package distributions to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
- name: Publish package distributions to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -356,4 +356,12 @@

- Fix Sentry version error in Docker Image.

## 0.9.12

- Bug-fix the call to the BPEmb class instead of the BPEmbBaseURLWrapperBugFix to fix the download URL in `download_models`.

## 0.9.13

- Update Gensim version in setup to allow for the installation of recent Scipy versions.

## dev
1 change: 1 addition & 0 deletions deepparse/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# pylint: disable=wildcard-import
from .bpemb_url_bug_fix import *
from .download_tools import *
from .validations import *
from .version import __version__
Expand Down
13 changes: 13 additions & 0 deletions deepparse/bpemb_url_bug_fix.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
"""
Due to an error in the BPEmb base URL to download the weights, and since the authors and maintainer do not respond or
seems to maintain the project; we use a wrapper to bug-fix the URL to change it.
However, the wrapper must be placed here due to circular import.
"""

from bpemb import BPEmb


class BPEmbBaseURLWrapperBugFix(BPEmb):
def __init__(self, **kwargs):
self.base_url = "https://bpemb.h-its.org/multi/"
super().__init__(**kwargs)
4 changes: 2 additions & 2 deletions deepparse/download_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@
from urllib.request import urlopen

import requests
from bpemb import BPEmb
from fasttext.FastText import _FastText
from requests import HTTPError
from urllib3.exceptions import MaxRetryError

from .bpemb_url_bug_fix import BPEmbBaseURLWrapperBugFix
from .errors.server_error import ServerError

BASE_URL = "https://graal.ift.ulaval.ca/public/deepparse/{}.{}"
Expand Down Expand Up @@ -127,7 +127,7 @@ def download_model(
elif model_type == "fasttext-light":
download_fasttext_magnitude_embeddings(cache_dir=saving_cache_path)
elif "bpemb" in model_type:
BPEmb(
BPEmbBaseURLWrapperBugFix(
lang="multi", vs=100000, dim=300, cache_dir=saving_cache_path
) # The class manages the download of the pretrained words embedding

Expand Down
8 changes: 1 addition & 7 deletions deepparse/embeddings_models/bpemb_embeddings_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,13 @@
from pathlib import Path

import requests
from bpemb import BPEmb
from numpy.core.multiarray import ndarray
from urllib3.exceptions import InsecureRequestWarning

from ..bpemb_url_bug_fix import BPEmbBaseURLWrapperBugFix
from .embeddings_model import EmbeddingsModel


class BPEmbBaseURLWrapperBugFix(BPEmb):
def __init__(self, **kwargs):
self.base_url = "https://bpemb.h-its.org/multi/"
super().__init__(**kwargs)


class BPEmbEmbeddingsModel(EmbeddingsModel):
"""
BPEmb embeddings network from `BPEmb: Tokenization-free Pre-trained Subword Embeddings in 275 Languages
Expand Down
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
torch
bpemb
numpy<2.0.0
scipy<=1.10.1
scipy
requests
pymagnitude-light
colorama>=0.4.3
poutyne
gensim>=4.2.0
gensim>=4.3.3
fasttext-wheel
pandas
urllib3
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[metadata]
description-file = README.md
description_file = README.md

[options.entry_points]
console_scripts =
Expand Down
6 changes: 3 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,8 @@ def main():
"numpy<2.0.0",
"torch",
"bpemb",
"scipy<=1.10.1",
"gensim>=4.0.0",
"scipy",
"gensim>=4.3.3",
"requests",
"fasttext-wheel",
"pymagnitude-light",
Expand All @@ -90,7 +90,7 @@ def main():
"colorama": "colorama",
"app": ["fastapi[all]>=0.109.1", "uvicorn==0.22.0", "sentry-sdk[fastapi]>=2.0.0", "python-decouple==3.8"],
"all": [
"colorama>", # colorama
"colorama", # colorama
"fastapi[all]>=0.109.1", # app requirements
"uvicorn==0.22.0",
"sentry-sdk[fastapi]>=2.0.0",
Expand Down
12 changes: 6 additions & 6 deletions tests/cli/test_download_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def test_givenAFasttextMagnitudeDownload_whenModelIsNotLocal_thenDownloadEmbeddi
@patch("deepparse.download_tools.download_weights")
def test_givenABPembDownload_whenModelIsNotLocal_thenDownloadEmbeddings(self, weights_download_mock):
with patch("deepparse.download_tools.CACHE_PATH", self.fake_cache_dir):
with patch("deepparse.download_tools.BPEmb") as downloader:
with patch("deepparse.download_tools.BPEmbBaseURLWrapperBugFix") as downloader:
download_model_cli_main([self.a_bpemb_model_type])

downloader.assert_called()
Expand Down Expand Up @@ -95,7 +95,7 @@ def test_givenAFasttextLightDownload_whenModelIsNotLocal_thenDownloadWeights(sel
downloader.assert_called()
downloader.assert_any_call(self.a_fasttext_light_model_file_name, saving_dir=self.fake_cache_dir)

@patch("deepparse.download_tools.BPEmb")
@patch("deepparse.download_tools.BPEmbBaseURLWrapperBugFix")
def test_givenABPembDownload_whenModelIsNotLocal_thenDownloadWeights(self, download_embeddings_mock):
with patch("deepparse.download_tools.CACHE_PATH", self.fake_cache_dir):
with patch("deepparse.download_tools.download_weights") as downloader:
Expand All @@ -104,7 +104,7 @@ def test_givenABPembDownload_whenModelIsNotLocal_thenDownloadWeights(self, downl
downloader.assert_called()
downloader.assert_any_call(self.a_bpemb_model_type, saving_dir=self.fake_cache_dir)

@patch("deepparse.download_tools.BPEmb")
@patch("deepparse.download_tools.BPEmbBaseURLWrapperBugFix")
def test_givenABPembAttDownload_whenModelIsNotLocal_thenDownloadWeights(self, download_embeddings_mock):
with patch("deepparse.download_tools.CACHE_PATH", self.fake_cache_dir):
with patch("deepparse.download_tools.download_weights") as downloader:
Expand Down Expand Up @@ -139,7 +139,7 @@ def test_givenAFasttextLightDownload_whenModelIsLocalButNotLatest_thenDownloadWe
downloader.assert_called()
downloader.assert_any_call(self.a_fasttext_light_model_file_name, saving_dir=self.fake_cache_dir)

@patch("deepparse.download_tools.BPEmb")
@patch("deepparse.download_tools.BPEmbBaseURLWrapperBugFix")
@patch("deepparse.download_tools.os.path.isfile", return_value=True)
@patch("deepparse.download_tools.latest_version", return_value=False) # not the latest version
def test_givenABPembDownload_whenModelIsLocalButNotLatest_thenDownloadWeights(
Expand Down Expand Up @@ -179,7 +179,7 @@ def test_givenAFasttextLightDownload_whenModelIsLocalAndGoodVersion_thenDoNoting

downloader.assert_not_called()

@patch("deepparse.download_tools.BPEmb")
@patch("deepparse.download_tools.BPEmbBaseURLWrapperBugFix")
@patch("deepparse.download_tools.os.path.isfile", return_value=True)
@patch("deepparse.download_tools.latest_version", return_value=True) # the latest version
def test_givenABPembDownload_whenModelIsLocalAndGoodVersion_thenDoNoting(
Expand Down Expand Up @@ -218,7 +218,7 @@ def test_givenAFasttextLightDownload_whenModelIsNotLocalButNotLatest_thenDownloa
downloader.assert_called()
downloader.assert_any_call(self.a_fasttext_light_model_file_name, saving_dir=self.fake_cache_dir)

@patch("deepparse.download_tools.BPEmb")
@patch("deepparse.download_tools.BPEmbBaseURLWrapperBugFix")
@patch("deepparse.download_tools.os.path.isfile", side_effect=[False, True]) # no version file in local
def test_givenABPembDownload_whenModelIsNotLocalButNotLatest_thenDownloadWeights(
self, download_embeddings_mock, os_is_file_mock
Expand Down
2 changes: 1 addition & 1 deletion tests/cli/test_download_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def tearDown(self) -> None:
@patch("deepparse.download_tools.download_weights")
@patch("deepparse.download_tools.download_fasttext_embeddings")
@patch("deepparse.download_tools.download_fasttext_magnitude_embeddings")
@patch("deepparse.download_tools.BPEmb")
@patch("deepparse.download_tools.BPEmbBaseURLWrapperBugFix")
def test_givenADownloadAllModels_whenModelsAreNotLocal_thenDownloadAllModels(
self,
BPEmb_mock,
Expand Down
14 changes: 7 additions & 7 deletions tests/test_download_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -352,7 +352,7 @@ def tearDown(self) -> None:
@patch("deepparse.download_tools.download_weights")
@patch("deepparse.download_tools.download_fasttext_embeddings")
@patch("deepparse.download_tools.download_fasttext_magnitude_embeddings")
@patch("deepparse.download_tools.BPEmb")
@patch("deepparse.download_tools.BPEmbBaseURLWrapperBugFix")
def test_givenADownloadAllModels_whenModelsAreNotLocal_thenDownloadAllModels(
self,
BPEmb_mock,
Expand Down Expand Up @@ -428,7 +428,7 @@ def test_givenAFasttextMagnitudeDownload_whenModelIsNotLocal_thenDownloadEmbeddi
@patch("deepparse.download_tools.download_weights")
def test_givenABPembDownload_whenModelIsNotLocal_thenDownloadEmbeddings(self, weights_download_mock):
with patch("deepparse.download_tools.CACHE_PATH", self.fake_cache_dir):
with patch("deepparse.download_tools.BPEmb") as downloader:
with patch("deepparse.download_tools.BPEmbBaseURLWrapperBugFix") as downloader:
download_model(self.a_bpemb_model_type)

downloader.assert_called()
Expand Down Expand Up @@ -463,7 +463,7 @@ def test_givenAFasttextLightDownload_whenModelIsNotLocal_thenDownloadWeights(sel
downloader.assert_called()
downloader.assert_any_call(self.a_fasttext_light_model_file_name, saving_dir=self.fake_cache_dir)

@patch("deepparse.download_tools.BPEmb")
@patch("deepparse.download_tools.BPEmbBaseURLWrapperBugFix")
def test_givenABPembDownload_whenModelIsNotLocal_thenDownloadWeights(self, download_embeddings_mock):
with patch("deepparse.download_tools.CACHE_PATH", self.fake_cache_dir):
with patch("deepparse.download_tools.download_weights") as downloader:
Expand All @@ -472,7 +472,7 @@ def test_givenABPembDownload_whenModelIsNotLocal_thenDownloadWeights(self, downl
downloader.assert_called()
downloader.assert_any_call(self.a_bpemb_model_type, saving_dir=self.fake_cache_dir)

@patch("deepparse.download_tools.BPEmb")
@patch("deepparse.download_tools.BPEmbBaseURLWrapperBugFix")
def test_givenABPembAttDownload_whenModelIsNotLocal_thenDownloadWeights(self, download_embeddings_mock):
with patch("deepparse.download_tools.CACHE_PATH", self.fake_cache_dir):
with patch("deepparse.download_tools.download_weights") as downloader:
Expand Down Expand Up @@ -507,7 +507,7 @@ def test_givenAFasttextLightDownload_whenModelIsLocalButNotLatest_thenDownloadWe
downloader.assert_called()
downloader.assert_any_call(self.a_fasttext_light_model_file_name, saving_dir=self.fake_cache_dir)

@patch("deepparse.download_tools.BPEmb")
@patch("deepparse.download_tools.BPEmbBaseURLWrapperBugFix")
@patch("deepparse.download_tools.os.path.isfile", return_value=True)
@patch("deepparse.download_tools.latest_version", return_value=False) # not the latest version
def test_givenABPembDownload_whenModelIsLocalButNotLatest_thenDownloadWeights(
Expand Down Expand Up @@ -547,7 +547,7 @@ def test_givenAFasttextLightDownload_whenModelIsLocalAndGoodVersion_thenDoNoting

downloader.assert_not_called()

@patch("deepparse.download_tools.BPEmb")
@patch("deepparse.download_tools.BPEmbBaseURLWrapperBugFix")
@patch("deepparse.download_tools.os.path.isfile", return_value=True)
@patch("deepparse.download_tools.latest_version", return_value=True) # the latest version
def test_givenABPembDownload_whenModelIsLocalAndGoodVersion_thenDoNoting(
Expand Down Expand Up @@ -586,7 +586,7 @@ def test_givenAFasttextLightDownload_whenModelIsNotLocalButNotLatest_thenDownloa
downloader.assert_called()
downloader.assert_any_call(self.a_fasttext_light_model_file_name, saving_dir=self.fake_cache_dir)

@patch("deepparse.download_tools.BPEmb")
@patch("deepparse.download_tools.BPEmbBaseURLWrapperBugFix")
@patch("deepparse.download_tools.os.path.isfile", side_effect=[False, True]) # no version file in local
def test_givenABPembDownload_whenModelIsNotLocalButNotLatest_thenDownloadWeights(
self, download_embeddings_mock, os_is_file_mock
Expand Down
2 changes: 1 addition & 1 deletion version.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.9.11
0.9.13

0 comments on commit 51029ac

Please sign in to comment.