Skip to content

Commit

Permalink
Merge branch 'pub-dev' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
shivishbrahma committed Mar 24, 2021
2 parents c9b94b0 + a6494d0 commit 389bac2
Show file tree
Hide file tree
Showing 85 changed files with 41,180 additions and 8 deletions.
4 changes: 4 additions & 0 deletions .flaskenv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
FLASK_DEBUG=True
FLASK_ENV=development
# FLASK_APP=app
FLASK_RUN_PORT=5454
35 changes: 27 additions & 8 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@

# Created by https://www.toptal.com/developers/gitignore/api/flask,vscode,vue,python
# Edit at https://www.toptal.com/developers/gitignore?templates=flask,vscode,vue,python
# Created by https://www.toptal.com/developers/gitignore/api/flask,python,vscode,vue,vuejs
# Edit at https://www.toptal.com/developers/gitignore?templates=flask,python,vscode,vue,vuejs

### Flask ###
instance/*
Expand All @@ -24,8 +24,6 @@ dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
Expand Down Expand Up @@ -102,6 +100,9 @@ ipython_config.py
# install all needed dependencies.
#Pipfile.lock

# poetry
#poetry.lock

# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/

Expand All @@ -113,8 +114,9 @@ celerybeat.pid
*.sage.py

# Environments
.env
.venv
# .env
.env/
.venv/
env/
venv/
ENV/
Expand Down Expand Up @@ -143,9 +145,14 @@ dmypy.json
# pytype static type analyzer
.pytype/

# operating system-related files
*.DS_Store #file properties cache/storage on macOS
Thumbs.db #thumbnail cache on Windows

# profiling data
.prof


### Python ###
# Byte-compiled / optimized / DLL files

Expand Down Expand Up @@ -185,13 +192,16 @@ dmypy.json
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.

# poetry

# PEP 582; used by e.g. github.com/David-OConnor/pyflow

# Celery stuff

# SageMath parsed files

# Environments
# .env

# Spyder project settings

Expand All @@ -205,8 +215,11 @@ dmypy.json

# pytype static type analyzer

# operating system-related files

# profiling data


### vscode ###
.vscode/*
!.vscode/settings.json
Expand All @@ -226,7 +239,13 @@ docs/_book
# TODO: where does this rule come from?
test/

# End of https://www.toptal.com/developers/gitignore/api/flask,vscode,vue,python
node_modules
### Vuejs ###
# Recommended template: Node.gitignore

node_modules/
npm-debug.log
yarn-error.log

# End of https://www.toptal.com/developers/gitignore/api/flask,python,vscode,vue,vuejs

*.csv
4 changes: 4 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"python.formatting.provider": "yapf",
"python.pythonPath": "/home/anitesh/anaconda3/envs/awesome/bin/python"
}
2 changes: 2 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
run_server:
python server/index.py
18 changes: 18 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Moraliser

A simple chat software with adult and slang filter

## Tags

- "Lojja korena"
- "Ainai giye bol beta"
- "Bolis kire"
- "Abar bol"
- "Pitiye debo"
- "Oslil"
- "Eh"
- "Yeaman"

## UI Design

<https://www.behance.net/gallery/93164379/Social-Concept-Application>
16 changes: 16 additions & 0 deletions app/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import os
from flask import Flask, current_app, send_file
from .api import api_bp

app = Flask(__name__, static_folder='../dist/static')
app.register_blueprint(api_bp)

from .config import Config
# app.logger.info('>>> {}'.format(Config.FLASK_ENV))


@app.route('/')
def index_client():
dist_dir = current_app.config['DIST_DIR']
entry = os.path.join(dist_dir, 'index.html')
return send_file(entry)
8 changes: 8 additions & 0 deletions app/api/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from flask_restful import Api
from flask import Blueprint

api_bp = Blueprint('api_bp', __name__, url_prefix='/api')
api_rest = Api(api_bp)

from .user_route import *
from .chat_route import *
75 changes: 75 additions & 0 deletions app/api/chat_route.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
from flask import request, jsonify
from . import api_bp
import pickle
import re


class TextSpamClassifier():
def __init__(self) -> None:
with open('notebooks/spam_filter/model_spam.pickle', 'rb') as handle:
self.model = pickle.load(handle)
with open('notebooks/spam_filter/vectorizer_spam.pickle',
'rb') as handle:
self.vectorizer = pickle.load(handle)

def __preprocess_text(self, text):
text = text.lower()
text = re.sub(r"[^a-zA-Z0-9]", " ", text)
text = text.strip()
text = text.split()
text = ' '.join(list(filter(lambda x: x not in ['', ' '], text)))
return text

def predict_proba(self, X):
val = self.__preprocess_text(X)
val = self.vectorizer.transform([val])
prob = self.model.predict_proba(val)[0][1]
return prob


class TextProfanityClassifier():
def __init__(self) -> None:
with open('notebooks/profanity_filter/model_profanity.pickle', 'rb') as handle:
self.model = pickle.load(handle)
with open('notebooks/profanity_filter/vectorizer_profanity.pickle',
'rb') as handle:
self.vectorizer = pickle.load(handle)

def __preprocess_text(self, text):
text = text.lower()
text = re.sub(r"[^a-zA-Z0-9]", " ", text)
text = text.strip()
text = text.split()
text = ' '.join(list(filter(lambda x: x not in ['', ' '], text)))
return text

def predict_proba(self, X):
val = self.__preprocess_text(X)
val = self.vectorizer.transform([val])
prob = self.model.predict(val)[0]
return prob


@api_bp.route('/text-validate', methods=['POST'])
def text_chat_validate():
message = request.form.get('message')
tsc = TextSpamClassifier()
tpc = TextProfanityClassifier()
spam_text = "Not a spam!"
prof_text = "No profane!"
if tpc.predict_proba(message) != 0:
prof_text = "Highly profane!"
if tsc.predict_proba(message) > 0.8:
spam_text = "Highly spam!"
elif tsc.predict_proba(message) > 0.6:
spam_text = "Slightly spam!"
elif tsc.predict_proba(message) > 0.4:
spam_text = "Less spam!"
elif tsc.predict_proba(message) > 0.2:
spam_text = "I don't think spam!"

return jsonify({
"spam_text": f"{spam_text}",
"prof_text": f"{prof_text}",
"status": "success"
})
21 changes: 21 additions & 0 deletions app/api/user_route.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from flask import request, jsonify
from . import api_bp


@api_bp.route('/login', methods=['POST'])
def user_login():
username = request.form.get('username')
password = request.form.get('password')
return jsonify({"msg": "Logged in successfully!", "status": "success"})


@api_bp.route('/register', methods=['POST'])
def user_register():
email = request.form.get('email')
username = request.form.get('username')
password = request.form.get('password')
return jsonify({"msg": "Registered successfully!", "status": "success"})

@api_bp.route('/logout', methods=['POST'])
def user_logout():
return jsonify({"msg": "Logged out successfully!", "status": "success"})
25 changes: 25 additions & 0 deletions app/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
"""
Global Flask Application Setting
See `.flaskenv` for default settings.
"""

import os
from app import app


class Config(object):
# If not set fall back to production for safety
FLASK_ENV = os.getenv('FLASK_ENV', 'production')
# Set FLASK_SECRET on your production Environment
SECRET_KEY = os.getenv('FLASK_SECRET', 'Secret')

APP_DIR = os.path.dirname(__file__)
ROOT_DIR = os.path.dirname(APP_DIR)
DIST_DIR = os.path.join(ROOT_DIR, 'dist')

if not os.path.exists(DIST_DIR):
raise Exception('DIST_DIR not found: {}'.format(DIST_DIR))


app.config.from_object('app.config.Config')
Binary file not shown.
109 changes: 109 additions & 0 deletions notebooks/profanity_filter/profanity_filter.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
{
"metadata": {
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.3-final"
},
"orig_nbformat": 2,
"kernelspec": {
"name": "python3",
"display_name": "Python 3.8.3 64-bit ('base': conda)",
"metadata": {
"interpreter": {
"hash": "bcf6d6f1e0171d0d91abd06fa98fe319d27d921fb762534eb067c2306250a62d"
}
}
}
},
"nbformat": 4,
"nbformat_minor": 2,
"cells": [
{
"source": [
"# Profanity Filter"
],
"cell_type": "markdown",
"metadata": {}
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"--2021-03-25 00:41:49-- https://github.com/vzhou842/profanity-check/raw/master/profanity_check/data/clean_data.csv\n",
"Resolving github.com (github.com)... 13.234.210.38\n",
"Connecting to github.com (github.com)|13.234.210.38|:443... connected.\n",
"HTTP request sent, awaiting response... 302 Found\n",
"Location: https://raw.githubusercontent.com/vzhou842/profanity-check/master/profanity_check/data/clean_data.csv [following]\n",
"--2021-03-25 00:41:50-- https://raw.githubusercontent.com/vzhou842/profanity-check/master/profanity_check/data/clean_data.csv\n",
"Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.110.133, 185.199.108.133, ...\n",
"Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.\n",
"HTTP request sent, awaiting response... 200 OK\n",
"Length: 65535263 (62M) [text/plain]\n",
"Saving to: ‘clean_data.csv’\n",
"\n",
"clean_data.csv 100%[===================>] 62.50M 836KB/s in 85s \n",
"\n",
"2021-03-25 00:43:21 (753 KB/s) - ‘clean_data.csv’ saved [65535263/65535263]\n",
"\n"
]
}
],
"source": [
"# download datasets\n",
"# !wget https://github.com/vzhou842/profanity-check/raw/master/profanity_check/data/clean_data.csv"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"from sklearn.feature_extraction.text import CountVectorizer\n",
"from sklearn.calibration import CalibratedClassifierCV\n",
"from sklearn.svm import LinearSVC\n",
"from sklearn.externals import joblib\n",
"import pickle\n",
"\n",
"# Read in data\n",
"data = pd.read_csv('clean_data.csv')\n",
"texts = data['text'].astype(str)\n",
"y = data['is_offensive']\n",
"\n",
"# Vectorize the text\n",
"vectorizer = CountVectorizer(stop_words='english', min_df=0.0001)\n",
"X = vectorizer.fit_transform(texts)\n",
"\n",
"# Train the model\n",
"# model = LinearSVC(class_weight=\"balanced\", dual=False, tol=1e-2, max_iter=1e5)\n",
"model = LinearSVC()\n",
"#cclf = CalibratedClassifierCV(base_estimator=model)\n",
"# cclf.fit(X, y)\n",
"model.fit(X,y)\n",
"\n",
"# Save the model\n",
"# joblib.dump(vectorizer, 'vectorizer.joblib')\n",
"# joblib.dump(cclf, 'model.joblib')\n",
"with open('vectorizer_profanity.pickle', 'wb') as f:\n",
" pickle.dump(vectorizer, f)\n",
"\n",
"with open('model_profanity.pickle', 'wb') as f:\n",
" pickle.dump(model, f)"
]
}
]
}
Binary file not shown.
Loading

0 comments on commit 389bac2

Please sign in to comment.