Skip to content

Commit

Permalink
Merge pull request #14 from kumparan/feature/update-model-and-fix-vul…
Browse files Browse the repository at this point in the history
…nerabilities-library

feature/update model and fix vulnerabilities library
  • Loading branch information
zavliju authored Jul 4, 2024
2 parents be78f1f + 7e7de67 commit 2900653
Show file tree
Hide file tree
Showing 7 changed files with 1,330 additions and 42 deletions.
46 changes: 46 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
name: python

on: [pull_request]

jobs:
test:
runs-on: ubuntu-latest

steps:
- name: Checkout repository
uses: actions/checkout@v3

- name: Setup Python 3.11
uses: actions/setup-python@v4
with:
python-version: '3.11'

- name: Install Poetry
run: |
pip install poetry==1.8.2
- name: Install dependencies
run: |
poetry config virtualenvs.create false && poetry install
python -m nltk.downloader punkt
- name: Vulnerabilities checker
run: |
poetry export --without-hashes --format=requirements.txt > requirements.txt
poetry run pip-audit -r requirements.txt
- name: Build package
run: |
poetry build
- name: Verify dist directory
run: |
ls -l dist
- name: Install built package
run: |
pip install dist/*.whl
- name: Run tests
run: |
make test
9 changes: 9 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
run-formatter:
isort .
black .
.PHONY: run-formatter

# Run test
test:
pytest
.PHONY: test
18 changes: 8 additions & 10 deletions nlp_id/postag.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import nltk
import os
import pickle
import warnings
import wget
import logging
from huggingface_hub import hf_hub_download
from nlp_id import tokenizer
from nltk.tree import Tree
from sklearn.ensemble import RandomForestClassifier
Expand All @@ -15,7 +15,8 @@ def __init__(self, model_path=None):
self.current_dir = os.path.dirname(os.path.realpath(__file__))
if not model_path:
folder_name = "data"
file_name = "postagger_v9.pkl"
repo_id = "kumparanai/postagger"
file_name = "postagger_v10.pkl"

folder_path = os.path.join(self.current_dir, folder_name)
model_path = os.path.join(folder_path, file_name)
Expand All @@ -33,14 +34,11 @@ def __init__(self, model_path=None):
if all_pickle:
for pickle_file in all_pickle:
os.remove(pickle_file)
print("Removed", pickle_file)
logging.info("Removed", pickle_file)
else:
print("No model removed")
warnings.warn("Downloading model ..")
url = "https://storage.googleapis.com/kumparan-public-bucket/nlp-id/{}".format(
file_name
)
wget.download(url, model_path)
logging.info("No model removed")
logging.info("Downloading model ..")
model_path = hf_hub_download(repo_id=repo_id, filename=file_name)
self.clf = self.load_model(model_path)
self.tokenizer = tokenizer.Tokenizer()

Expand Down
1,235 changes: 1,235 additions & 0 deletions poetry.lock

Large diffs are not rendered by default.

32 changes: 32 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
[tool.poetry]
name = "nlp-id"
version = "0.1.16.0"
description = "Kumparan's NLP Services"
authors = ["Zavli Juwantara <[email protected]>"]
repository = "https://github.com/kumparan/NLP-ID"
maintainers = [
"Frandy Eddy <[email protected]>",
"Dhanang Hadhi Sasmita <[email protected]>",
"Aslam Hadi H<[email protected]>",
"Ghefira Nur Fatimah Widyasari<[email protected]>"
]
license = "MIT"
readme = "README.md"

[tool.poetry.dependencies]
python = ">=3.9,<3.12"
scikit-learn = "1.5.1"
nltk = "3.8.1"
wget = "3.2"
huggingface-hub = "0.23.4"


[tool.poetry.group.dev.dependencies]
pytest = "8.2.2"
pip-audit = "2.7.3"
black = "24.4.2"
isort = "5.13.2"

[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
2 changes: 0 additions & 2 deletions setup.cfg

This file was deleted.

30 changes: 0 additions & 30 deletions setup.py

This file was deleted.

0 comments on commit 2900653

Please sign in to comment.