Merge pull request #1 from subashcs/develop

Develop
subashcs · Oct 2, 2023 · ae263de · ae263de
2 parents b6b972f + 8e449f2
commit ae263de
Show file tree

Hide file tree

Showing 9 changed files with 1,104 additions and 615 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,5 @@
+testenv
+env
+.DS_Store
+.vscode
+GoogleNews-vectors-negative300.bin.gz
diff --git a/LICENSE b/LICENSE
diff --git a/README.md b/README.md
@@ -1,8 +1,27 @@
-# AdCategorizationAPI
+# AdCategorization Model
 Categorizes ads based on text content
-
+## Tools Used
+  - Python 3.11.5
 ## Input 
  Text content
-
 ## Output
 Category of the text (post)
+
+## Training model locally
+1. Clone the repository
+2. Create a virtual env and run
+```
+pip install -r requirement.txt
+```
+3. Download the `GoogleNews-vectors-negative300.bin.gz` data file and keep it on the root directory of the project. 
+4. You can run the jupyter notebook `trainingModel.ipynb` file on any supporting editor or jupyter notebook itself.
+
+## Running the API 
+To run the API locally, you need to run the flask server using command:
+```
+python main.py
+```
+
+## License
+A comparative study of text categorization for advertisement classification © 2019 by Subash Chandra Sapkota is licensed under CC BY 4.0. 
+To view a copy of this license, visit LICENSE file.
diff --git a/adsdata.xlsx b/adsdata.xlsx
diff --git a/adsdatanew.xlsx b/adsdatanew.xlsx
diff --git a/finalized_model.sav b/finalized_model.sav
diff --git a/Model.py → main.py b/Model.py → main.py
@@ -1,17 +1,14 @@
-from flask import Flask ,jsonify , request,json
+from flask import Flask, jsonify, request, json
 import pickle
-from nltk.corpus import stopwords
-
 import pandas as pd
 import numpy as np
-
 import nltk
-
 import re
+
+
 REPLACE_BY_SPACE_RE = re.compile('[/(){}\[\]\|@,;]')
 BAD_SYMBOLS_RE = re.compile('[^0-9a-z #+_]')
-
-STOPWORDS = set(stopwords.words('english'))
+STOPWORDS = set(nltk.corpus.stopwords.words('english'))
 
 #prepare text clean function
 
@@ -32,24 +29,25 @@ def clean_text(text):
 app = Flask(__name__)
 
 if __name__ == '__main__':
-    app.run(debug=True,host='0.0.0.0')
+    app.run(debug=True,host='0.0.0.0',port=4000)
 
 filename = 'finalized_model.sav'
 loaded_model = pickle.load(open(filename, 'rb'))
 
 @app.route('/')
 def hello_world():
-    return undefined
+    return None
 
 @app.route('/<content>')
 def show_user_profile(content):
-    # show the user profile for that user
-    str1 = ''.join(str(e) for e in content)
     ToPredict=clean_text(content)
     result = loaded_model.predict([ToPredict])
     res = ''.join(str(e) for e in result)
     return jsonify({"category":res})
 
+@app.route('/test')
+def testData():
+    return jsonify({"result":"success"})   
 
 @app.route('/classify',methods = ['POST'])
 def classify():
@@ -61,6 +59,4 @@ def classify():
     res = ''.join(str(e) for e in result)
     return jsonify({"category":res})
 
-@app.route('/test')
-def testData():
-    return jsonify({"result":"success"})    
+
diff --git a/requirement.txt b/requirement.txt
@@ -0,0 +1,13 @@
+gensim==4.3.2
+jupyter_client==8.3.1
+jupyter_core==5.3.2
+matplotlib==3.8.0
+nltk==3.8.1
+numpy==1.26.0
+openpyxl==3.1.2
+pandas==2.1.1
+pickleshare==0.7.5
+Pillow==10.0.1
+scikit-learn==1.3.1
+scipy==1.11.2
+Flask==2.3.3
diff --git a/trainingModel.ipynb b/trainingModel.ipynb