-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
152 lines (129 loc) · 5.09 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
# Import required libraries
import openai
import asyncio
import re
import whisper
import boto3
import pydub
from pydub import playback
import speech_recognition as sr
from EdgeGPT import Chatbot, ConversationStyle
import json
# Set OpenAI API key
with open('config.json') as config_file:
config = json.load(config_file)
openai.api_key = config['openai_api_key']
# Initialize speech recognizer
recognizer = sr.Recognizer()
# Define wake words
BING_WAKE_WORD = 'bing'
GPT_WAKE_WORD = 'gpt'
# Function to get wake word from the recognized phrase
def get_wake_word(phrase):
if BING_WAKE_WORD in phrase.lower():
return BING_WAKE_WORD
elif GPT_WAKE_WORD in phrase.lower():
return GPT_WAKE_WORD
else:
return None
# Function to synthesize speech using Amazon Polly
def synthesize_speech(text, output_filename):
polly = boto3.client('polly', region_name='us-west-2')
response = polly.synthesize_speech(
Text=text,
OutputFormat='mp3',
VoiceId='Salli',
Engine='neural'
)
with open(output_filename, 'wb') as f:
f.write(response['AudioStream'].read())
# Function to play audio file
def play_audio(file):
sound = pydub.AudioSegment.from_file(file, format='mp3')
playback.play(sound)
# Main async function
async def main():
while True:
# Capture audio with microphone
with sr.Microphone() as source:
recognizer.adjust_for_ambient_noise(source)
print(f'Waiting for wake words \'ok bin\' or \'ok chat\'...')
while True:
# Listen for user input and transcribe
audio = recognizer.listen(source)
try:
with open('audio.wav', 'wb') as f:
f.write(audio.get_wav_data())
model = whisper.load_model('tiny')
result = model.transcribe('audio.wav')
phrase = result['text']
print(f'You said: {phrase}')
# Check if the wake word is present
wake_word = get_wake_word(phrase)
if wake_word is not None:
break
else:
print('Not a wake word. Try again.')
except Exception as e:
print('Error transcribing audio: {0}'.format(e))
continue
# Ask for prompt
print('Speak a prompt...')
synthesize_speech('What can I help you with?', 'response.mp3')
play_audio('response.mp3')
audio = recognizer.listen(source)
# Transcribe user prompt
try:
with open('audio_prompt.wav', 'wb') as f:
f.write(audio.get_wav_data())
model = whisper.load_model('base')
result = model.transcribe('audio_prompt.wav')
user_input = result['text']
print(f'You said: {user_input}')
except Exception as e:
print('Error transcribing audio: {0}'.format(e))
continue
# Handle Bing wake word
if wake_word == BING_WAKE_WORD:
bot = Chatbot(cookiePath='cookies.json')
response = await bot.ask(prompt=user_input, conversation_style=ConversationStyle.precise)
for message in response['item']['messages']:
if message['author'] == 'bot':
bot_response = message['text']
# Remove footnotes
bot_response = re.sub('\[\^\d+\^\]', '', bot_response)
# Get creative response from bot
bot = Chatbot(cookiePath='cookies.json')
response = await bot.ask(prompt=user_input, conversation_style=ConversationStyle.creative)
for message in response['item']['messages']:
if message['author'] == 'bot':
bot_response = message['text']
# Remove footnotes
bot_response = re.sub('\[\^\d+\^\]', '', bot_response)
# Handle GPT wake word
else:
response = openai.ChatCompletion.create(
model='gpt-3.5-turbo',
messages=[
{'role': 'system', 'content':
'You are a helpful assistant.'},
{'role': 'user', 'content': user_input},
],
temperature=0.5,
max_tokens=150,
top_p=1,
frequency_penalty=0,
presence_penalty=0,
n=1,
stop=['\nUser:'],
)
# Get bot response
bot_response = response['choices'][0]['message']['content']
# Output bot response and synthesize speech
print('Bot\'s response:', bot_response)
synthesize_speech(bot_response, 'response.mp3')
play_audio('response.mp3')
await bot.close()
# Run the main function
if __name__ == '__main__':
asyncio.run(main())