-
Notifications
You must be signed in to change notification settings - Fork 191
/
Copy pathsmart_websearch.py
109 lines (90 loc) · 4.15 KB
/
smart_websearch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# Copyright 2023, Gary Sims
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
# BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
# GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
# THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
# DAMAGE.
import requests
import urllib.parse
import json
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from bs4 import BeautifulSoup
def make_request_with_headers(url, headers):
"""
Make an HTTP GET request to a specified URL with given headers.
"""
try:
response = requests.get(url, headers=headers)
response.raise_for_status() # Raise an exception for HTTP errors
return response.text
except requests.RequestException as e:
return f"An error occurred: {e}"
def urlencode(string):
return urllib.parse.quote(string, safe='/+')
def replace_spaces_with_plus(string):
return string.replace(" ", "+")
def fetch_web_content(url):
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36'}
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.content, 'html.parser')
return soup.get_text()
def fetch_content_make_summary(url):
content = fetch_web_content(url)
prompt = "Summarize this text into a 300-word extractive summary, ignore all HTML, CSS, and Javascript. The summary text should be easy to read, and engaging:\n" + content
summary = chat_model.predict(prompt)
return summary
origterms = input("Please enter your search term: ")
terms = replace_spaces_with_plus(origterms)
terms = urlencode(terms)
print("Searching...")
url = "https://api.search.brave.com/res/v1/web/search?result_filter=web&q=" + terms
headers = {
'Accept': 'application/json',
"X-Subscription-Token": 'TOKEN-HERE'
}
response = make_request_with_headers(url, headers)
# Response in is JSON see https://api.search.brave.com/app/documentation/web-search/responses
data = json.loads(response)
web = data['web']
results = web['results']
# Extract first 3 URLs
url1 = results[0]['url']
url2 = results[1]['url']
url3 = results[2]['url']
print("Init ChatGPT...")
# Different models
# gpt-4
# gpt-4-1106-preview
# gpt-3.5-turbo (gpt-3.5-turbo-0613)
chat_model = ChatOpenAI(model_name="gpt-3.5-turbo", openai_api_key="KEY-HERE")
print("Fetch first web page and create summary...")
sum1 = fetch_content_make_summary(url1)
print("Fetch second web page and create summary...")
sum2 = fetch_content_make_summary(url2)
print("Fetch third web page and create summary...")
sum3 = fetch_content_make_summary(url3)
print("Ask ChatGPT about this subject...")
prompt = "Create a detailed summary about " + origterms
sum0 = chat_model.predict(prompt)
print("Ask ChatGPT to create a meta summary...")
prompt = "Rewrite the following text as a blog post, ignoring all duplicate information. The blog post text should be easy to read, and engaging:\n\n" + sum1 + "\n\n\n" + sum2 + "\n\n\n" + sum3 + "\n\n\n" + sum0
msummary = chat_model.predict(prompt)
print("\n\n", msummary)
print("\n\nSources:\n", url1, "\n", url2, "\n", url3, "\nAnd ChatGPT!")