-
Notifications
You must be signed in to change notification settings - Fork 6
/
app_faiss.py
144 lines (122 loc) · 6.12 KB
/
app_faiss.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import os
import re
import uuid
from langchain_openai import OpenAI
import streamlit as st
from langchain_community.chat_models import ChatOpenAI
from langchain.memory import ConversationBufferWindowMemory
from langchain.chains import ConversationalRetrievalChain
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.prompts.chat import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate
MODEL = "gpt-3.5-turbo"
DEBUG = True # True to overwrite files that already exist
# Remove HTML from sources
def remove_html_tags(text):
clean = re.compile('<.*?>')
return re.sub(clean, '', text)
def remove_markdown(text):
# Remove headers (e.g., # Header)
text = re.sub(r'#.*$', '', text, flags=re.MULTILINE)
# Remove bold/italic (e.g., **bold**, *italic*)
text = re.sub(r'\*.*\*', '', text)
# Remove links (e.g., [text](url))
text = re.sub(r'\[.*\]\(.*\)', '', text)
# Remove lists (e.g., - item)
text = re.sub(r'- .*$', '', text, flags=re.MULTILINE)
return text
def clean_text(text):
text = remove_html_tags(text)
text = remove_markdown(text)
return text
if "session_id" not in st.session_state:
st.session_state.session_id = str(uuid.uuid4())
if "messages" not in st.session_state:
st.session_state.messages = []
st.set_page_config(page_title="Chat with Simon Wardley's Book")
st.title("Chat with Simon Wardley's Book")
st.sidebar.markdown("# Query Simon's book using AI")
st.sidebar.markdown("Developed by Mark Craddock](https://twitter.com/mcraddock)", unsafe_allow_html=True)
st.sidebar.markdown("Current Version: 1.1.0")
st.sidebar.markdown("Using gpt-3.5-turbo-1106")
st.sidebar.markdown(st.session_state.session_id)
st.sidebar.markdown("Wardley Mapping is provided courtesy of Simon Wardley and licensed Creative Commons Attribution Share-Alike.")
st.sidebar.divider()
# Check if the user has provided an API key, otherwise default to the secret
user_openai_api_key = st.sidebar.text_input("Enter your OpenAI API Key:", placeholder="sk-...", type="password")
# Get datastore
DATA_STORE_DIR = "data_store"
if user_openai_api_key:
os.environ["OPENAI_API_KEY"] = user_openai_api_key
if "vector_store" not in st.session_state:
if os.path.exists(DATA_STORE_DIR):
st.session_state.vector_store = FAISS.load_local(
DATA_STORE_DIR,
OpenAIEmbeddings(),
allow_dangerous_deserialization='True',
)
else:
st.write(f"Missing files. Upload index.faiss and index.pkl files to {DATA_STORE_DIR} directory first")
custom_system_template="""
You are SimonGPT with the style of a strategy researcher with well over twenty years research in strategy and cloud computing.
You use complicated examples from Wardley Mapping in your answers.
Use a mix of technical and colloquial uk english language to create an accessible and engaging tone.
Your language should be for an 12 year old to understand.
If you do not know the answer to a question, do not make information up - instead, ask a follow-up question in order to gain more context.
Your primary objective is to help the user formulate excellent answers by utilizing the context about the book and
relevant details from your knowledge, along with insights from previous conversations.
----------------
Reference Context and Knowledge from Similar Existing Services: {context}
Previous Conversations: {chat_history}"""
custom_user_template = "Question:'''{question}'''"
prompt_messages = [
SystemMessagePromptTemplate.from_template(custom_system_template),
HumanMessagePromptTemplate.from_template(custom_user_template)
]
prompt = ChatPromptTemplate.from_messages(prompt_messages)
if "memory" not in st.session_state:
st.session_state.memory = ConversationBufferWindowMemory(memory_key="chat_history", return_messages=True, output_key='answer')
if "llm" not in st.session_state:
st.session_state.llm = ChatOpenAI(
model_name=MODEL,
temperature=0,
max_tokens=400,
)
if "chain" not in st.session_state:
st.session_state.chain = ConversationalRetrievalChain.from_llm(
llm=st.session_state.llm,
retriever=st.session_state.vector_store.as_retriever(
search_kwargs={
"k": 3,
#"score_threshold": .95,
}
),
chain_type="stuff",
rephrase_question = True,
return_source_documents=True,
memory=st.session_state.memory,
combine_docs_chain_kwargs={'prompt': prompt}
)
for message in st.session_state.messages:
if message["role"] in ["user", "assistant"]:
with st.chat_message(message["role"]):
st.markdown(message["content"])
if query := st.chat_input("What question do you have for the book?"):
st.session_state.messages.append({"role": "user", "content": query})
with st.chat_message("user"):
st.markdown(query)
with st.spinner():
with st.chat_message("assistant"):
response = st.session_state.chain(query)
st.markdown(response['answer'])
st.divider()
source_documents = response['source_documents']
for index, document in enumerate(source_documents):
if 'source' in document.metadata:
source_details = document.metadata['source']
cleaned_content = clean_text(document.page_content)
st.warning(f"Source {index + 1}: Page {document.metadata['page']}\n")
st.write(f"{cleaned_content}\n")
st.session_state.messages.append({"role": "assistant", "content": response['answer']})
else:
st.warning("Please enter your OpenAI API key", icon="⚠️")