Skip to content

Commit

Permalink
v1.0.0 add
Browse files Browse the repository at this point in the history
  • Loading branch information
FortiShield committed Sep 7, 2024
1 parent dd784ef commit e1fb5f9
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 47 deletions.
10 changes: 5 additions & 5 deletions bengalinlp/utils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,27 +34,27 @@ class ModelInfo:
"FASTTEXT": {
"name": "bengali_fasttext_wiki.bin",
"type": "zip",
"url": "https://huggingface.co/khulnasoft/bangla-fasttext/resolve/main/bengali_fasttext_wiki.zip",
"url": "https://huggingface.co/sagorsarker/bangla-fasttext/resolve/main/bengali_fasttext_wiki.zip",
},
"GLOVE": {
"name": "bn_glove.39M.100d.txt",
"type": "zip",
"url": "https://huggingface.co/khulnasoft/bangla-glove-vectors/resolve/main/bn_glove.39M.100d.zip",
"url": "https://huggingface.co/sagorsarker/bangla-glove-vectors/resolve/main/bn_glove.39M.100d.zip",
},
"NEWS_DOC2VEC": {
"name": "bangla_news_article_doc2vec.model",
"type": "zip",
"url": "https://huggingface.co/khulnasoft/news_article_doc2vec/resolve/main/news_article_doc2vec.zip",
"url": "https://huggingface.co/sagorsarker/news_article_doc2vec/resolve/main/news_article_doc2vec.zip",
},
"WIKI_DOC2VEC": {
"name": "bnwiki_doc2vec.model",
"type": "zip",
"url": "https://huggingface.co/khulnasoft/bnwiki_doc2vec_model/resolve/main/bnwiki_doc2vec_model.zip",
"url": "https://huggingface.co/sagorsarker/bnwiki_doc2vec_model/resolve/main/bnwiki_doc2vec_model.zip",
},
"WORD2VEC": {
"name": "bnwiki_word2vec.model",
"type": "zip",
"url": "https://huggingface.co/khulnasoft/bangla_word2vec/resolve/main/bangla_word2vec_gen4.zip",
"url": "https://huggingface.co/sagorsarker/bangla_word2vec/resolve/main/bangla_word2vec_gen4.zip",
},
}

Expand Down
80 changes: 38 additions & 42 deletions bengalinlp/utils/downloader.py
Original file line number Diff line number Diff line change
@@ -1,85 +1,84 @@
"""Module providing functions for downloading models."""
"""Module providing Function for downloading models."""

import os
import shutil
from typing import Optional, Union, Tuple
from zipfile import ZipFile
from urllib.parse import urlparse
import requests
from tqdm.auto import tqdm
from bengalinlp import ZipFile

from bengalinlp.utils.config import ModelInfo


def _create_dirs(model_name: str) -> str:
"""Create directories for downloading models.
"""Create directories for downloading models
Args:
model_name (str): Name of the model.
model_name (str): Name of the model
Returns:
str: Absolute path where the model can be downloaded.
str: Absolute path where model can be downloaded
"""
model_dir = os.path.join(os.path.expanduser("~"), "bengalinlp", "models")
os.makedirs(model_dir, exist_ok=True)
model_path = os.path.join(model_dir, model_name)
return model_path


def _unzip_file(zip_file_path: str, unzip_dir: Optional[str] = None) -> None:
"""Extract a .zip archive.
def _unzip_file(zip_file_path: str, unzip_dir: str = "") -> None:
"""Function to extract archives in .zip format
Args:
zip_file_path (str): Path of the archive to be extracted.
unzip_dir (Optional[str]): Directory where the archive will be extracted. Defaults to None, which means the same directory as the zip file.
zip_file_path (str): Path of archive to be extracted
unzip_dir (str, optional): Directory where archive will be extracted. Defaults to "".
Raises:
zip_error: Error from ZipFile module.
zip_error: Error from ZipFile module
"""
if unzip_dir is None:
if not unzip_dir:
unzip_dir = os.path.dirname(zip_file_path)

op_desc = f"Extracting: {os.path.basename(zip_file_path)}"
try:
with ZipFile(file=zip_file_path) as zip_file:
for member_name in tqdm(zip_file.namelist(), desc=op_desc):
file_name = os.path.basename(member_name)
if file_name:
target_path = os.path.join(unzip_dir, file_name)
with zip_file.open(member_name) as source_file, open(
target_path, "wb"
) as target_file:
shutil.copyfileobj(source_file, target_file)
if not file_name:
continue
target_path = os.path.join(unzip_dir, file_name)
target_path = open(target_path, "wb")
source_file = zip_file.open(member_name)
with source_file, target_path:
shutil.copyfileobj(source_file, target_path)
os.remove(zip_file_path)
except Exception as zip_error:
# Clean up any partial extraction
zip_file_str = os.path.splitext(os.path.basename(zip_file_path))[0]
zip_file_str = os.path.basename(zip_file_path)
zip_file_str = os.path.splitext(zip_file_str)[0]
for file_name in os.listdir(unzip_dir):
if zip_file_str in file_name:
os.remove(os.path.join(unzip_dir, file_name))
raise zip_error


def _download_file(file_url: str, file_path: str) -> str:
"""Download a file from a URL.
"""Function to download file
Args:
file_url (str): URL of the file.
file_path (str): Path where the file will be downloaded.
Returns:
str: Path where the file is downloaded.
file_url (str): URL of the file
file_path (str): Path where file will be downloaded
Raises:
network_error: Download related error.
network_error: Download related error
Returns:
str: Path where the file is downloaded
"""
if os.path.exists(file_path):
return file_path

op_desc = f"Downloading {os.path.basename(file_path)}"
try:
with requests.Session() as req_sess:
req_res = req_sess.get(file_url, stream=True)
total_length = int(req_res.headers.get("Content-Length", 0))
total_length = int(req_res.headers.get("Content-Length"))
with tqdm.wrapattr(
req_res.raw, "read", total=total_length, desc=op_desc
) as raw:
Expand All @@ -93,18 +92,17 @@ def _download_file(file_url: str, file_path: str) -> str:


def _download_zip_model(model_url: str, model_path: str) -> str:
"""Download and extract a model archive.
"""Download and extract model archive and return extracted path.
Args:
model_url (str): URL of the model.
model_path (str): Path where the model will be downloaded.
model_url (str): URL of the model
model_path (str): Path where model will be downloaded
Returns:
str: Path where the model is extracted after downloading.
str: Path where model is extracted after downloading
"""
if os.path.exists(model_path):
return model_path

extract_dir = os.path.dirname(model_path)
url_model_name = os.path.basename(urlparse(model_url).path)
tmp_zip_file_path = os.path.join(extract_dir, url_model_name)
Expand All @@ -114,30 +112,28 @@ def _download_zip_model(model_url: str, model_path: str) -> str:


def download_model(name: str) -> str:
"""Download and extract a model if necessary.
"""Download and extract model if necessary
Args:
name (str): Name of the model.
name (str): _description_
Returns:
str: Path where the model is downloaded or extracted.
str: _description_
"""
model_name, model_type, model_url = ModelInfo.get_model_info(name)
model_path = _create_dirs(model_name)

if model_type == "single":
model_path = _download_file(model_url, model_path)
elif model_type == "zip":
model_path = _download_zip_model(model_url, model_path)
else:
print(f"Model type {model_type} not yet implemented")
print(f"model type {model_type} not yet implemented")
model_path = ""

return model_path


def download_all_models() -> None:
"""Download and extract all available models for BengaliNLP."""
"""Download and extract all available models for BengaliNLP"""
model_keys = ModelInfo.get_all_models()
for model_key in model_keys:
download_model(model_key)

0 comments on commit e1fb5f9

Please sign in to comment.