-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add SentenceClassificationAPI for sentence classification tasks
Added a new API to handle sentence classification tasks (sentence_classification_api.py) which now handles the loading of sentence classifiers and sent2vec model, and processes POST requests for classifying sentences. Refactored curator_dashboard.py to now make a POST request to this new API for sentence classifications. This allows for better separation of concerns, as sentence classifications tasks has been abstracted out of the curator dashboard. This commit also includes the Dockerfiles for both the new API and changes to old Dockerfile for the environment variable pointing to this new API.
- Loading branch information
Showing
5 changed files
with
134 additions
and
66 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
FROM python:3.8-slim | ||
|
||
WORKDIR /usr/src/app/ | ||
ADD requirements.txt . | ||
RUN pip3 install -r requirements.txt | ||
RUN python3 -c "import nltk; nltk.download('stopwords'); nltk.download('punkt')" | ||
COPY src/backend/sentence_embedding_api src/backend/sentence_embedding_api | ||
|
||
ENV PYTHONPATH=$PYTHONPATH:/usr/src/app/ | ||
|
||
EXPOSE ${PORT} | ||
CMD python3 src/backend/sentence_embedding_api/sentence_embedding_api.py >> /var/log/sentence_embedding_api.log 2>&1 |
107 changes: 107 additions & 0 deletions
107
src/backend/sentence_classification_api/sentence_classification_api.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
#!/usr/bin/env python3 | ||
import json | ||
import logging | ||
|
||
import joblib | ||
import sent2vec | ||
import falcon | ||
import os | ||
|
||
from wsgiref import simple_server | ||
from falcon import HTTPStatus | ||
|
||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
class HandleCORS(object): | ||
def process_request(self, req, resp): | ||
allow_headers = req.get_header( | ||
'Access-Control-Request-Headers', | ||
default='*' | ||
) | ||
resp.set_header('Access-Control-Allow-Origin', '*') | ||
resp.set_header('Access-Control-Allow-Methods', '*') | ||
resp.set_header('Access-Control-Allow-Headers', allow_headers) | ||
resp.set_header('Access-Control-Max-Age', 1728000) # 20 days | ||
if req.method == 'OPTIONS': | ||
raise HTTPStatus(falcon.HTTP_200, body='\n') | ||
|
||
|
||
class SentenceClassificationReader: | ||
|
||
def __init__(self): | ||
self.sentence_classifiers = self.load_sentence_classifiers("/var/sentence_classification_models/") | ||
self.sent2vec_model = self.load_sent2vec_model("/var/sentence_classification_models/biosentvec.bin") | ||
|
||
@staticmethod | ||
def load_sent2vec_model(sent2vec_model_path): | ||
logger.info("Loading sentence embedding model...") | ||
biosentvec_model = sent2vec.Sent2vecModel() | ||
try: | ||
biosentvec_model.load_model(sent2vec_model_path) | ||
except Exception as e: | ||
logger.error(e) | ||
logger.info("Sentence embedding model loaded") | ||
return biosentvec_model | ||
|
||
@staticmethod | ||
def load_sentence_classifiers(models_path): | ||
logger.info("Loading sentence classifiers...") | ||
sentence_classifier_all_info_expression = joblib.load(f"{models_path}/all_info_expression.joblib") | ||
sentence_classifier_curatable_expression = joblib.load(f"{models_path}/curatable_expression.joblib") | ||
sentence_classifier_language_expression = joblib.load(f"{models_path}/language_expression.joblib") | ||
sentence_classifier_all_info_kinase = joblib.load(f"{models_path}/all_info_kinase.joblib") | ||
sentence_classifier_curatable_kinase = joblib.load(f"{models_path}/curatable_kinase.joblib") | ||
sentence_classifier_language_kinase = joblib.load(f"{models_path}/language_kinase.joblib") | ||
logger.info("All sentence classifiers loaded") | ||
return { | ||
"expression": { | ||
"all_info": sentence_classifier_all_info_expression, | ||
"curatable": sentence_classifier_curatable_expression, | ||
"language": sentence_classifier_language_expression | ||
}, | ||
"kinase": { | ||
"all_info": sentence_classifier_all_info_kinase, | ||
"curatable": sentence_classifier_curatable_kinase, | ||
"language": sentence_classifier_language_kinase | ||
} | ||
} | ||
|
||
def on_post(self, req, resp, req_type): | ||
if req_type != "classify_sentences" or "sentences" not in req.media: | ||
raise falcon.HTTPError(falcon.HTTP_BAD_REQUEST) | ||
sentence_embeddings = self.sent2vec_model.embed_sentences(req.media["sentences"]) | ||
classes_all_info_expression = self.sentence_classifiers["expression"]["all_info"].predict(sentence_embeddings) | ||
classes_curatable_expression = self.sentence_classifiers["expression"]["curatable"].predict(sentence_embeddings) | ||
classes_language_expression = self.sentence_classifiers["expression"]["language"].predict(sentence_embeddings) | ||
classes_all_info_kinase = self.sentence_classifiers["kinase"]["all_info"].predict(sentence_embeddings) | ||
classes_curatable_kinase = self.sentence_classifiers["kinase"]["curatable"].predict(sentence_embeddings) | ||
classes_language_kinase = self.sentence_classifiers["kinase"]["language"].predict(sentence_embeddings) | ||
classes = { | ||
"expression": { | ||
"all_info": classes_all_info_expression.tolist(), | ||
"curatable": classes_curatable_expression.tolist(), | ||
"language": classes_language_expression.tolist() | ||
}, | ||
"kinase": { | ||
"all_info": classes_all_info_kinase.tolist(), | ||
"curatable": classes_curatable_kinase.tolist(), | ||
"language": classes_language_kinase.tolist() | ||
} | ||
} | ||
resp.body = f'{{"classes": {json.dumps(classes)}}}' | ||
resp.status = falcon.HTTP_200 | ||
|
||
|
||
def main(): | ||
logging.basicConfig(level='INFO', format='%(asctime)s - %(name)s - %(levelname)s:%(message)s') | ||
app = falcon.App(middleware=[HandleCORS()]) | ||
sentence_classification_reader = SentenceClassificationReader() | ||
app.add_route('/api/sentence_classification/{req_type}', sentence_classification_reader) | ||
|
||
httpd = simple_server.make_server('0.0.0.0', int(8002), app) | ||
httpd.serve_forever() | ||
|
||
|
||
main() |