-
Notifications
You must be signed in to change notification settings - Fork 1
/
snapshotbot.py
208 lines (168 loc) · 7.6 KB
/
snapshotbot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
#!/usr/bin/env python
"""
Reddit bot framework
Framework to create Reddit Bots that processes comments.
"""
import praw
import logging
import pyimgur
import pymongo
import re
from datetime import datetime
from urlparse import urlparse
from redditprocessor import Command
class SnapshotCommand(Command):
"""Snapshot Command.
Command that:
* is triggered by the keyword '/u/snapshot_bot' in the comment body
* parses the comment for urls using URL_REGEX expression
* calls the snapito API to snapshot the URL
* uploads the snapshot to imgur
"""
# URL for Snap API
SNAPITO_URL_TEMPLATE \
= 'http://api.snapito.com/web/{API_KEY}/full/{URL}?type=png'
# URL Regex from http://daringfireball.net
URL_REGEX = re.compile(
ur'(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)'
ur'(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+'
ur'|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:\'".,<>?\xab\xbb\u201c\u201d'
ur'\u2018\u2019]))')
# String used to trigger the processing of this command.
TRIGGER_WORD = '/u/snapshot_bot'
def __init__(self, snapito_key, imgur_key, mongo_url,
snap_url_template=SNAPITO_URL_TEMPLATE):
super(SnapshotCommand, self).__init__('SnapshotBot')
self.logger = logging.getLogger('SnapshotBot')
self.imgur = pyimgur.Imgur(imgur_key)
self.snapito_key = snapito_key
self.db = self._open_db(mongo_url)
self.snap_url_template = snap_url_template
def process(self, comment):
'''Process the latest comments for a given subreddit'''
if (comment.author is not None
and comment.author.name != u'snapshot_bot'
and SnapshotCommand.TRIGGER_WORD in comment.body):
self.logger.debug(
'comment: %s - %s - %s',
comment.id, comment.author, comment.body[:50])
self._process_comment(comment)
def _process_comment(self, comment):
'''Process a comment that contains the trigger'''
self.logger.debug(
'Processing comment: %s - %s - %s - %s',
comment.link_id, comment.id, comment.author.name, comment.body[:50])
if (self._is_already_processed(comment)):
return
self.logger.info('not processed')
self._log_to_db(comment)
urls = self._extract_urls(comment.body)
self._log_to_db(comment, urls)
if len(urls) == 0:
self.logger.info('No URLs to process, skipping.')
self._log_to_db(comment, urls, reply_completed=True)
return
imgur_album = self._create_imgur_album(comment)
self._log_to_db(comment, urls, imgur_album)
imgur_urls = self._screenshot_urls(urls, comment, imgur_album)
self._log_to_db(comment, urls, imgur_album, imgur_urls)
if (len(imgur_urls) > 0):
self._log_to_db(comment, urls, imgur_album, imgur_urls)
reply_text = self._create_reply(urls, imgur_urls, imgur_album)
self._log_to_db(comment, urls, imgur_album, imgur_urls,
reply_text, False)
self._send_reply(comment, reply_text)
self._log_to_db(comment, urls, imgur_album, imgur_urls,
reply_text, True)
def _extract_urls(self, body):
matches = re.findall(SnapshotCommand.URL_REGEX, body)
urls = [x[0] for x in matches]
return urls
def _create_reply(self, urls, imgur_urls, album):
REPLY_HEADER = 'The following URLs have been snapshotted:\n\n'
REPLY_LINK = '* {url} - [[snapshot]({snapshot})]\n\n'
REPLY_ALBUM_LINK = '* [Snapshot Album]({album_link})\n\n'
REPLY_FOOTER = (
"`For more information go to:` [r/snapshot_bot]("
"http://reddit.com/r/snapshot_bot).\n\n")
self.logger.debug("FOO: %s\n %s", urls, imgur_urls)
links_txt = ''.join(
[REPLY_LINK.format(url=url, snapshot=imgur_url)
for url, imgur_url in zip(urls, imgur_urls)])
album_txt = REPLY_ALBUM_LINK.format(album_link=album.link)
reply = ''.join([REPLY_HEADER, links_txt, album_txt, REPLY_FOOTER])
self.logger.debug('Reply text: %s', reply)
return reply
def _send_reply(self, comment, text):
try:
comment.reply(text)
except praw.errors.RateLimitExceeded as e:
self.logger.warning('Rate limit exceeded', e)
def _open_db(self, mongo_url):
# Get a connection
conn = pymongo.Connection(mongo_url)
# Get the db
return conn[urlparse(mongo_url).path[1:]]
def _get_db_key(self, comment):
return {
'submission_id': comment.link_id,
'comment_id': comment.id
}
def _is_already_processed(self, comment):
key = self._get_db_key(comment)
curr = self.db.snapshot_log.find_one(key)
return (curr is not None and curr['reply_completed'] is True)
def _log_to_db(self, comment, snapshot_urls=None, imgur_album=None,
imgur_urls=None, reply_text=None, reply_completed=None):
key = self._get_db_key(comment)
imgur_album_id = imgur_album.id if imgur_album is not None else None
self.db.snapshot_log.update(
key,
{
'submission_id': comment.link_id,
'comment_id': comment.id,
'author': comment.author.name,
'created_datetime': datetime.fromtimestamp(comment.created_utc),
'snapshot_urls': snapshot_urls,
'imgur_album': imgur_album_id,
'imgur_urls': imgur_urls,
'reply_text': reply_text,
'reply_completed': reply_completed
},
upsert=True)
self.logger.debug('Wrote log: %s', key)
def _create_imgur_album(self, comment):
ALBUM_DESCRIPTION_TEMPLATE = (
'Snapshot for {author} at {permalink}.\n'
'Snapshot by: /u/snapshot_bot')
album_title = "{0}'s snapshot".format(comment.author.name)
album_description = ALBUM_DESCRIPTION_TEMPLATE.format(
author=comment.author.name, permalink=comment.permalink)
album = self.imgur.create_album(title=album_title,
description=album_description)
self.logger.debug('created album %s', album)
return album
def _screenshot_urls(self, urls, comment, album):
IMAGE_DESCRIPTION_TEMPLATE = (
'Snapshot for {author} at {permalink}.\n'
'URL: {url}\n\n'
'Snapshot by: /u/snapshot_bot')
self.logger.info('capturing url: %s', urls)
imgur_urls = []
for url in urls:
snapito_url = self.snap_url_template.format(
API_KEY=self.snapito_key, URL=url)
self.logger.info('snapito: %s', snapito_url)
title_url = (url[:30] + '..') if len(url) > 30 else url
title = 'Snapshot {0} [{1}]'.format(title_url, datetime.now())
description = IMAGE_DESCRIPTION_TEMPLATE.format(
author=comment.author.name, permalink=comment.permalink,
url=url)
image = self.imgur.upload_image(url=snapito_url,
title=title,
description=description,
album=album.deletehash)
self.logger.info('successfully captured %s to %s',
image.id, image.link)
imgur_urls.append(image.link)
return imgur_urls