-
Notifications
You must be signed in to change notification settings - Fork 0
/
eudaimon_screenshots
executable file
·149 lines (131 loc) · 5.61 KB
/
eudaimon_screenshots
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
'''
################################################################################
# #
# eudaimon_screenshots #
# #
################################################################################
# #
# LICENCE INFORMATION #
# #
# This program uploads a screenshot every few seconds with a standard prompt #
# for OpenAI analysis and returns a text response for each screenshot. #
# #
# copyright (C) 2024 William Breaden Madden #
# #
# This software is released under the terms of the GNU General Public License #
# version 3 (GPLv3). #
# #
# This program is free software: you can redistribute it and/or modify it #
# under the terms of the GNU General Public License as published by the Free #
# Software Foundation, either version 3 of the License, or (at your option) #
# any later version. #
# #
# This program is distributed in the hope that it will be useful, but WITHOUT #
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or #
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for #
# more details. #
# #
# For a copy of the GNU General Public License, see #
# <http://www.gnu.org/licenses>. #
# #
################################################################################
usage:
program [options]
options:
-h, --help display help message
--prompt=TEXT prompt [default: Without describing the GUI or including any passcodes or secure information, what's the content in focus in this screenshot?]
--interval=seconds time between screenshots in seconds [default: 45]
'''
from datetime import datetime
import docopt
import base64
import os
import requests
import sys
import time
from openai import OpenAI
import pyautogui
__version__ = '2024-06-10T2255Z'
def take_screenshot():
try:
filename = datetime.utcnow().strftime("%Y-%m-%dT%H%M%S%fZ") + '.png'
screenshot = pyautogui.screenshot()
screenshot.save(filename)
return filename
except:
return None
def encode_image(image_path):
try:
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
except FileNotFoundError:
print(f"error: file {image_path} not found")
return None
except Exception as e:
print(f"error encoding image: {str(e)}")
return None
def analyse_image(image_path="image.png", text_prompt="What's in this image?"):
api_key = os.getenv('OPENAI_API_KEY')
if not api_key:
print('error: OPENAI_API_KEY environment variable not set')
return None
base64_image = encode_image(image_path)
if not base64_image:
return None
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {api_key}"
}
payload = {
"model": "gpt-4o",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": text_prompt
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
}
}
]
}
],
"max_tokens": 300
}
try:
response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
response.raise_for_status() # Raise error for HTTP codes 4xx/5xx
content = response.json()['choices'][0]['message']['content']
return content
except requests.exceptions.HTTPError as http_err:
print(f"HTTP error occurred: {http_err}")
except requests.exceptions.RequestException as req_err:
print(f"request error occurred: {req_err}")
except KeyError:
print("unexpected response format")
return response.json()
except Exception as e:
print(f"error: {str(e)}")
return None
def main(options=docopt.docopt(__doc__)):
interval = int(options['--interval'])
prompt = options['--prompt']
api_key = os.getenv('OPENAI_API_KEY')
client = OpenAI()
print(f"starting in {interval} seconds")
time.sleep(interval)
while True:
image_path = take_screenshot()
print('\n' + image_path)
result = analyse_image(image_path, prompt)
print(result)
time.sleep(interval)
if __name__ == '__main__':
main()