-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmultiple_files.py
85 lines (74 loc) · 3 KB
/
multiple_files.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import streamlit as st
import google.generativeai as genai
from PIL import Image
import os
from dotenv import load_dotenv
import fitz # PyMuPDF
import io
load_dotenv()
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
# Load the Gemini Pro Vision Model
model = genai.GenerativeModel('gemini-pro-vision')
def get_gemini_response(input_prompt, documents, user_input_prompt):
response = model.generate_content([input_prompt] + documents + [user_input_prompt])
return response.text
def input_document_bytes(uploaded_file):
if uploaded_file is not None:
bytes_data = uploaded_file.getvalue()
if uploaded_file.type == "application/pdf":
# Convert PDF to images
pdf_document = fitz.open(stream=bytes_data, filetype="pdf")
images = []
for page_number in range(len(pdf_document)):
page = pdf_document.load_page(page_number)
pix = page.get_pixmap()
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
with io.BytesIO() as output:
img.save(output, format="PNG")
img_byte_array = output.getvalue()
images.append({
"mime_type": "image/png",
"data": img_byte_array
})
return images
else:
# For image files (jpeg, jpg, png)
image_parts = [
{
"mime_type": uploaded_file.type,
"data": bytes_data
}
]
return image_parts
else:
raise FileNotFoundError("No File Uploaded")
# Initialize the Streamlit App
st.set_page_config(page_title="Document Content Query App")
input_prompt = """
You are an expert in understanding various document formats. Please try to answer the question using the information
from the uploaded documents.
"""
user_input_prompt = st.text_input("User Input Prompt", key="input")
upload_files = st.file_uploader("Upload Documents", type=["pdf", "jpg", "jpeg", "png"], accept_multiple_files=True)
if upload_files:
for file in upload_files:
if file.type in ["image/jpeg", "image/jpg", "image/png"]:
image = Image.open(file)
st.image(image, caption=f"Uploaded Image: {file.name}", use_column_width=True)
elif file.type == "application/pdf":
st.write(f"Uploaded PDF: {file.name}")
submit = st.button("Find the Answer from the Documents")
if submit and upload_files:
all_documents_data = []
for file in upload_files:
try:
all_documents_data.extend(input_document_bytes(file))
except Exception as e:
st.error(f"Error processing file {file.name}: {str(e)}")
continue
try:
response = get_gemini_response(input_prompt, all_documents_data, user_input_prompt)
st.subheader("Response")
st.write(response)
except Exception as e:
st.error(f"Error generating response: {str(e)}")