-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.py
97 lines (82 loc) · 2.8 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import streamlit as st
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
import cohere
import os
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
# Initialize Cohere API
cohere_api_key = os.getenv("COHERE_API_KEY")
cohere_client = cohere.Client(cohere_api_key)
# Extract text from uploaded PDF files
def extract_pdf_text(pdf_files):
text = ""
for pdf in pdf_files:
pdf_reader = PdfReader(pdf)
for page in pdf_reader.pages:
text += page.extract_text()
return text
# Split the extracted text into manageable chunks
def split_text_into_chunks(text):
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=10000, chunk_overlap=1000)
chunks = text_splitter.split_text(text)
return chunks
# Handle user question input and provide an answer
def handle_user_query(user_question, context_chunks):
context = " ".join(context_chunks)
prompt = f"""
You are helpful AI Assistant designed to help analyse a PDF.
Use the provided context to assist in extracting relevant information
and performing analysis tasks. If the information is not present in the
context, simply state that the data is unavailable.
Follow these rules at all times:
- Do not access or refer to personal information outside the scope of the
provided context.
- Limit your assistance to the content of the context and queries
related directly to them.
- Stick strictly to factual and verifiable information provided in the
context. Avoid assumptions or interpretations beyond the data presented.
Context:
{context}
Question:
{user_question}
Answer:
"""
response = cohere_client.generate(
model='command-xlarge-nightly',
prompt=prompt,
max_tokens=200
)
st.write("Reply:", response.generations[0].text)
def main():
st.set_page_config(page_title="ChatPDF 💬📄", page_icon="💬")
st.header("ChatPDF 💬📄")
st.markdown(
r"""
<style>
.stAppDeployButton {
visibility: hidden;
}
#MainMenu {
visibility: hidden;
}
</style>
""", unsafe_allow_html=True
)
user_question = st.text_input("Ask a question related to the uploaded PDF")
if user_question and 'context_chunks' in st.session_state:
handle_user_query(user_question, st.session_state['context_chunks'])
with st.sidebar:
st.title("Menu:")
pdf_files = st.file_uploader(
"Upload PDF Files", accept_multiple_files=True)
if st.button("Submit & Process"):
with st.spinner("Processing..."):
raw_text = extract_pdf_text(pdf_files)
context_chunks = split_text_into_chunks(raw_text)
st.session_state['context_chunks'] = context_chunks
st.success("Processing complete!")
if __name__ == "__main__":
main()