-
Notifications
You must be signed in to change notification settings - Fork 0
/
app.py
181 lines (146 loc) · 6.94 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
from collections import defaultdict
import logging
import os
from typing import List
from dotenv import load_dotenv
from flask import Flask, json, make_response, request
from flask_cors import CORS
from nltk.stem.porter import PorterStemmer
import openai
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
logger = logging.getLogger(__name__)
# Set up the Flask application
app = Flask(__name__)
app.config['MAX_CONTENT_LENGTH'] = 16 * 1000 * 1000
CORS(app)
# Configure the OpenAI API key
load_dotenv()
openai.api_key = os.getenv("REACT_APP_OPENAI_API")
# Set up recommender system
df = None
cv = CountVectorizer(max_features=5000, stop_words='english')
ps = PorterStemmer()
default_response = '[{"skill": "Skill Map", "dependsOn": null},{"skill": "Self Management", ' + \
'"dependsOn": "Skill Map"},{"skill": "Managing Stress", "dependsOn": "Skill Map"},' + \
'{"skill": "Self Awareness", "dependsOn": "Self Management"}]'
@app.route("/api", methods=['POST'])
def handle():
text = json.loads(request.data)['text']
# Generate and process response
gen_response = query_to_response(text)
print(gen_response)
try:
gen_json = json.loads(gen_response)
except:
logger.warning('gen_response is invalid JSON')
gen_json = json.loads(default_response)
results = defaultdict(float)
for skill in gen_json:
if skill.get('skill', 'Skill Map') == 'Skill Map':
continue
for course, similarity in recommend(skill.get('skill'), 10):
results[course] += similarity
courses = []
for course in sorted(results, reverse=True, key=lambda k: results[k])[:5]:
courses.append({
'Course Name': df.loc[course]['Course Name'],
'Difficulty Level': df.loc[course]['Difficulty Level'],
'Course URL': df.loc[course]['Course URL']
})
# Send recommendations back to frontend
if not results or not courses:
logger.error('No courses recommended!')
response = make_response({
'roadmap': gen_response,
'courses': json.dumps(courses)
})
response.headers['Access-Control-Allow-Headers'] = '*'
response.headers['Access-Control-Allow-Origin'] = '*'
response.headers['Access-Control-Allow-Methods'] = '*'
return response
def query_to_response(query) -> str:
messages = [
{
"role": "system",
"content": "The user is a manager of a company who will prompt you with the evaluation of an employee. " +
"Based on the evaluation, generate a skill roadmap for the employee. " +
"Your response must obey the following rules:" +
"- Collapse your response in one line" +
"- List each skill as a JSON string with the 'skill' key storing the title of the skill " +
"and the 'dependsOn' key storing the parent skill title" +
"- Concatenate all skills together into a list and label it as 'skillsAndDependencies'" +
"- The first listed skill should have a skill title of 'Skill Map' and dependsOn null" +
"- No blank skill titles and dependsOn fields" +
"- No duplicate skill titles or additional fields other than 'skill' and 'dependsOn'" +
"- Each skill should be a logical subset of its parent skill, except for when the parent skill is 'Skill Map'" +
"- Each dependsOn field should be the title of an existing skill in the roadmap" +
"- Minimum of 10 skills and maximum of 15 skills" +
"- Only include the list in your response, follow the sample response strictly" +
"- Sample response:" +
'const skillsAndDependencies = [{"skill": "Skill Map", "dependsOn": null},{"skill": "Self Management", ' +
'"dependsOn": "Skill Map"},{"skill": "Managing Stress", "dependsOn": "Skill Map"},' +
'{"skill": "Self Awareness", "dependsOn": "Self Management"}];'
},
{
"role": "user",
"content": query
}
]
# Query ChatGPT for response
chat = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=messages
)
reply = str(chat.choices[0].message.content)
messages.append({"role": "assistant", "content": reply})
# Process response: filter out the JSON list from the rest of the response
try:
response = reply[reply.index('['):reply.index('];') + 1].replace('\n', '').replace(' ', '')
except ValueError:
response = default_response
finally:
return response
# The stemming function for NLP
def stem(text: str) -> str:
global ps
return " ".join(ps.stem(word) for word in text.split())
def init_recommender() -> None:
global df
df = pd.read_csv('Coursera.csv')
# Removing spaces between the words
df['Formatted Name'] = df.loc[:, 'Course Name']
df['Formatted Name'] = df['Formatted Name'].str.replace(' ', ',')
df['Formatted Name'] = df['Formatted Name'].str.replace(',,', ',')
df['Formatted Name'] = df['Formatted Name'].str.replace(':', '')
df['Formatted Description'] = df.loc[:, 'Course Description']
df['Formatted Description'] = df['Formatted Description'].str.replace(' ', ',')
df['Formatted Description'] = df['Formatted Description'].str.replace(',,', ',')
df['Formatted Description'] = df['Formatted Description'].str.replace('_', '')
df['Formatted Description'] = df['Formatted Description'].str.replace(':', '')
df['Formatted Description'] = df['Formatted Description'].str.replace('(', '')
df['Formatted Description'] = df['Formatted Description'].str.replace(')', '')
# Combining column data
df['tags'] = df['Formatted Name'] + ',' + df['Difficulty Level'] + ',' + df['Formatted Description']
df['tags'] = df['tags'].apply(lambda s: s.lower())
df['tags'] = df['tags'].str.replace(',', ' ')
df['tags'] = df['tags'].apply(stem)
init_recommender()
def recommend(text: str, n: int) -> List[str]:
global df, cv
if df is None:
logger.error('Recommender system not initialised!')
return
# Pre-process course
tag = stem(text.replace(' ', ',').replace(',,', ',').replace(':', '').replace('_', '').
replace('(', '').replace(')', '').replace(',', ' ').lower())
# Perform NLP processing
vectors = cv.fit_transform(df['tags'].tolist() + [tag]).toarray()
similarity = cosine_similarity(vectors)
#print(len(similarity))
#print(similarity.shape)
#print(similarity)
# Find similar courses
#print(sorted(enumerate(similarity[-1]), reverse=True, key=lambda t: t[1]))
return sorted(enumerate(similarity[-1]), reverse=True, key=lambda t: t[1])[1:min(n + 1, len(similarity))]