Skip to content

Commit

Permalink
Merge pull request #726 from sneha-4-22/main
Browse files Browse the repository at this point in the history
Quest_Submission[OS InsightX]
  • Loading branch information
doberst authored May 17, 2024
2 parents 2237588 + 1c27d41 commit 6dea558
Show file tree
Hide file tree
Showing 5 changed files with 133 additions and 0 deletions.
17 changes: 17 additions & 0 deletions Quest/OS_InsightX/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
## APP PREVIEW



https://github.com/sneha-4-22/RAG-OS-qa/assets/112711068/04735392-d9e1-46ea-951b-17ab7a662f58


## Model Used

The model used in this project is [Industry-BERT for Insurance](https://huggingface.co/llmware/industry-bert-insurance-v0.1) provided by Hugging Face. It was employed for the operating system question-answering task using the RAG (Retrieval-Augmented Generation) framework.




![image](https://github.com/sneha-4-22/RAG-OS-qa/assets/112711068/749d4468-ca67-4c54-9777-8f3673104151)


88 changes: 88 additions & 0 deletions Quest/OS_InsightX/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
import os
from dotenv import load_dotenv
from langchain_community.llms import HuggingFaceHub
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import SentenceTransformerEmbeddings
import streamlit as st

st.set_page_config(
page_title="📚 Talk to Galvin's OS Textbook",
page_icon="👻",
)
load_dotenv()

os.environ["HUGGINGFACEHUB_API_TOKEN"] = os.getenv("HUGGINGFACEHUB_API_TOKEN")

prompt_template = """To provide the best response, consider the following context and question carefully:
Context: {context}
Question: {question}
Provide an accurate and concise response based on the given context and question.
"""

prompt = PromptTemplate(template=prompt_template, input_variables=["question", "context"])

embeddings = SentenceTransformerEmbeddings(model_name="llmware/industry-bert-insurance-v0.1")

load_vector_store = Chroma(persist_directory="operatingsystem/embed", embedding_function=embeddings)

lvs = load_vector_store.as_retriever(search_kwargs={"k":2})

repo = "llmware/bling-sheared-llama-1.3b-0.1"

hfllm = HuggingFaceHub(
repo_id=repo, model_kwargs={"temperature": 0.3, "max_length": 500}
)

kwargs_type = {"prompt": prompt}

def quesans():
qa = RetrievalQA.from_chain_type(
llm=hfllm,
chain_type="stuff",
retriever=lvs,
return_source_documents=True,
chain_type_kwargs=kwargs_type,
verbose=True
)
return qa

qa = quesans()

def main():

st.title("📚 Talk to Galvin's OS Textbook ")

st.markdown(
"""
<style>
body {
background-color: #FFC0CB;
}
</style>
""",
unsafe_allow_html=True
)

text_query = st.text_area("Type your question here...", height=100)

generate_response_btn = st.button("Get Answer")

st.subheader("🎉 Answer 🎉")

if generate_response_btn and text_query:
with st.spinner("Generating response..."):
text_response = qa(text_query)
if text_response:
st.write(text_response)
st.success("Response generated!")
else:
st.error("Oops! I have no idea what you mean.")
st.balloons()

if __name__ == "__main__":
main()

Binary file not shown.
10 changes: 10 additions & 0 deletions Quest/OS_InsightX/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
torch
sentence_transformers
streamlit
langchain
chromadb
pypdf
huggingface_hub
python-dotenv

langchain-community
18 changes: 18 additions & 0 deletions Quest/OS_InsightX/vector.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import os
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import SentenceTransformerEmbeddings
from langchain.document_loaders import DirectoryLoader
from langchain.document_loaders import PyPDFLoader
from langchain.vectorstores import Chroma


embeddings = SentenceTransformerEmbeddings(model_name="llmware/industry-bert-insurance-v0.1")

loader = DirectoryLoader('data/', glob="**/*.pdf", show_progress=True, loader_cls=PyPDFLoader)
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=700, chunk_overlap=70)
texts = text_splitter.split_documents(documents)

vector_store = Chroma.from_documents(texts, embeddings, collection_metadata={"hnsw:space": "cosine"}, persist_directory="stores/insurance_cosine")

print("Vector DB Successfully Created!")

0 comments on commit 6dea558

Please sign in to comment.