v1.0.0 add

banglawiki · Sep 7, 2024 · e1fb5f9 · e1fb5f9
1 parent dd784ef
commit e1fb5f9
Show file tree

Hide file tree

Showing 2 changed files with 43 additions and 47 deletions.
diff --git a/bengalinlp/utils/config.py b/bengalinlp/utils/config.py
@@ -34,27 +34,27 @@ class ModelInfo:
         "FASTTEXT": {
             "name": "bengali_fasttext_wiki.bin",
             "type": "zip",
-            "url": "https://huggingface.co/khulnasoft/bangla-fasttext/resolve/main/bengali_fasttext_wiki.zip",
+            "url": "https://huggingface.co/sagorsarker/bangla-fasttext/resolve/main/bengali_fasttext_wiki.zip",
         },
         "GLOVE": {
             "name": "bn_glove.39M.100d.txt",
             "type": "zip",
-            "url": "https://huggingface.co/khulnasoft/bangla-glove-vectors/resolve/main/bn_glove.39M.100d.zip",
+            "url": "https://huggingface.co/sagorsarker/bangla-glove-vectors/resolve/main/bn_glove.39M.100d.zip",
         },
         "NEWS_DOC2VEC": {
             "name": "bangla_news_article_doc2vec.model",
             "type": "zip",
-            "url": "https://huggingface.co/khulnasoft/news_article_doc2vec/resolve/main/news_article_doc2vec.zip",
+            "url": "https://huggingface.co/sagorsarker/news_article_doc2vec/resolve/main/news_article_doc2vec.zip",
         },
         "WIKI_DOC2VEC": {
             "name": "bnwiki_doc2vec.model",
             "type": "zip",
-            "url": "https://huggingface.co/khulnasoft/bnwiki_doc2vec_model/resolve/main/bnwiki_doc2vec_model.zip",
+            "url": "https://huggingface.co/sagorsarker/bnwiki_doc2vec_model/resolve/main/bnwiki_doc2vec_model.zip",
         },
         "WORD2VEC": {
             "name": "bnwiki_word2vec.model",
             "type": "zip",
-            "url": "https://huggingface.co/khulnasoft/bangla_word2vec/resolve/main/bangla_word2vec_gen4.zip",
+            "url": "https://huggingface.co/sagorsarker/bangla_word2vec/resolve/main/bangla_word2vec_gen4.zip",
         },
     }
 

diff --git a/bengalinlp/utils/downloader.py b/bengalinlp/utils/downloader.py
@@ -1,85 +1,84 @@
-"""Module providing functions for downloading models."""
+"""Module providing Function for downloading models."""
 
 import os
 import shutil
-from typing import Optional, Union, Tuple
+from zipfile import ZipFile
 from urllib.parse import urlparse
 import requests
 from tqdm.auto import tqdm
-from bengalinlp import ZipFile
+
 from bengalinlp.utils.config import ModelInfo
 
 
 def _create_dirs(model_name: str) -> str:
-    """Create directories for downloading models.
+    """Create directories for downloading models
 
     Args:
-        model_name (str): Name of the model.
+        model_name (str): Name of the model
 
     Returns:
-        str: Absolute path where the model can be downloaded.
+        str: Absolute path where model can be downloaded
     """
     model_dir = os.path.join(os.path.expanduser("~"), "bengalinlp", "models")
     os.makedirs(model_dir, exist_ok=True)
     model_path = os.path.join(model_dir, model_name)
     return model_path
 
 
-def _unzip_file(zip_file_path: str, unzip_dir: Optional[str] = None) -> None:
-    """Extract a .zip archive.
+def _unzip_file(zip_file_path: str, unzip_dir: str = "") -> None:
+    """Function to extract archives in .zip format
 
     Args:
-        zip_file_path (str): Path of the archive to be extracted.
-        unzip_dir (Optional[str]): Directory where the archive will be extracted. Defaults to None, which means the same directory as the zip file.
+        zip_file_path (str): Path of archive to be extracted
+        unzip_dir (str, optional): Directory where archive will be extracted. Defaults to "".
 
     Raises:
-        zip_error: Error from ZipFile module.
+        zip_error: Error from ZipFile module
     """
-    if unzip_dir is None:
+    if not unzip_dir:
         unzip_dir = os.path.dirname(zip_file_path)
-
     op_desc = f"Extracting: {os.path.basename(zip_file_path)}"
     try:
         with ZipFile(file=zip_file_path) as zip_file:
             for member_name in tqdm(zip_file.namelist(), desc=op_desc):
                 file_name = os.path.basename(member_name)
-                if file_name:
-                    target_path = os.path.join(unzip_dir, file_name)
-                    with zip_file.open(member_name) as source_file, open(
-                        target_path, "wb"
-                    ) as target_file:
-                        shutil.copyfileobj(source_file, target_file)
+                if not file_name:
+                    continue
+                target_path = os.path.join(unzip_dir, file_name)
+                target_path = open(target_path, "wb")
+                source_file = zip_file.open(member_name)
+                with source_file, target_path:
+                    shutil.copyfileobj(source_file, target_path)
         os.remove(zip_file_path)
     except Exception as zip_error:
-        # Clean up any partial extraction
-        zip_file_str = os.path.splitext(os.path.basename(zip_file_path))[0]
+        zip_file_str = os.path.basename(zip_file_path)
+        zip_file_str = os.path.splitext(zip_file_str)[0]
         for file_name in os.listdir(unzip_dir):
             if zip_file_str in file_name:
                 os.remove(os.path.join(unzip_dir, file_name))
         raise zip_error
 
 
 def _download_file(file_url: str, file_path: str) -> str:
-    """Download a file from a URL.
+    """Function to download file
 
     Args:
-        file_url (str): URL of the file.
-        file_path (str): Path where the file will be downloaded.
-
-    Returns:
-        str: Path where the file is downloaded.
+        file_url (str): URL of the file
+        file_path (str): Path where file will be downloaded
 
     Raises:
-        network_error: Download related error.
+        network_error: Download related error
+
+    Returns:
+        str: Path where the file is downloaded
     """
     if os.path.exists(file_path):
         return file_path
-
     op_desc = f"Downloading {os.path.basename(file_path)}"
     try:
         with requests.Session() as req_sess:
             req_res = req_sess.get(file_url, stream=True)
-            total_length = int(req_res.headers.get("Content-Length", 0))
+            total_length = int(req_res.headers.get("Content-Length"))
             with tqdm.wrapattr(
                 req_res.raw, "read", total=total_length, desc=op_desc
             ) as raw:
@@ -93,18 +92,17 @@ def _download_file(file_url: str, file_path: str) -> str:
 
 
 def _download_zip_model(model_url: str, model_path: str) -> str:
-    """Download and extract a model archive.
+    """Download and extract model archive and return extracted path.
 
     Args:
-        model_url (str): URL of the model.
-        model_path (str): Path where the model will be downloaded.
+        model_url (str): URL of the model
+        model_path (str): Path where model will be downloaded
 
     Returns:
-        str: Path where the model is extracted after downloading.
+        str: Path where model is extracted after downloading
     """
     if os.path.exists(model_path):
         return model_path
-
     extract_dir = os.path.dirname(model_path)
     url_model_name = os.path.basename(urlparse(model_url).path)
     tmp_zip_file_path = os.path.join(extract_dir, url_model_name)
@@ -114,30 +112,28 @@ def _download_zip_model(model_url: str, model_path: str) -> str:
 
 
 def download_model(name: str) -> str:
-    """Download and extract a model if necessary.
+    """Download and extract model if necessary
 
     Args:
-        name (str): Name of the model.
+        name (str): _description_
 
     Returns:
-        str: Path where the model is downloaded or extracted.
+        str: _description_
     """
     model_name, model_type, model_url = ModelInfo.get_model_info(name)
     model_path = _create_dirs(model_name)
-
     if model_type == "single":
         model_path = _download_file(model_url, model_path)
     elif model_type == "zip":
         model_path = _download_zip_model(model_url, model_path)
     else:
-        print(f"Model type {model_type} not yet implemented")
+        print(f"model type {model_type} not yet implemented")
         model_path = ""
-
     return model_path
 
 
 def download_all_models() -> None:
-    """Download and extract all available models for BengaliNLP."""
+    """Download and extract all available models for BengaliNLP"""
     model_keys = ModelInfo.get_all_models()
     for model_key in model_keys:
         download_model(model_key)