-
Notifications
You must be signed in to change notification settings - Fork 20
/
Copy pathstreamupdate.py
executable file
·331 lines (285 loc) · 11.9 KB
/
streamupdate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""Stream loading support. This is adapted from an old django app, and uses
various custom libraries to simplify API access:
pip install https://github.com/jmoiron/python-github/tarball/master
pip install https://bitbucket.org/jmoiron/python-bitbucket/get/tip.tar.gz
pip install twitter-text-py argot lxml
"""
import sys
import os
import datetime
import time
import urllib2
import json
from pprint import pprint, pformat
from hashlib import md5
import pymongo
md5sum = lambda x: md5(x).hexdigest()
# maximum time to put something in cache
cachemax = 2591999
tmp = "/dev/shm/"
moddir = os.path.dirname(__file__)
config = json.load(open(os.path.join(moddir, "config.json")))
host = config.get("DbHost", "localhost")
port = config.get("DbPort", 27017)
con = pymongo.Connection(host, port)
db = con.monet
def touch(path, times=None):
with file(path, 'a'):
os.utime(path, times)
class Stream(object):
def __init__(self, config):
self.id = md5sum(repr(config))[:12]
self.type = config.pop("type")
self.interval = config.pop("interval", 900)
self.path = os.path.join(tmp, "%s-%s" % (self.type, self.id))
self.arguments = dict(config)
def last_updated(self):
try: stat = os.stat(self.path)
except: return 0
return stat.st_mtime
def update(self, force=False):
now = time.time()
if force or (self.last_updated() + self.interval < now):
if self.type == "github":
GithubStream().update(**self.arguments)
touch(self.path)
def __repr__(self):
return "<Stream: %s:%s>" % (self.type, self.id)
def update(streams, force=False):
for stream in streams:
stream.update(force)
def rerender(streams):
for stream in streams:
stream.rerender()
def parse_args():
import optparse
parser = optparse.OptionParser(usage="./prog [options]")
parser.add_option("-r", "--rerender", action="store_true")
parser.add_option("-f", "--force", action="store_true")
# fix an old github bug
parser.add_option("", "--github-fix-1", action="store_true", help="fix an issue with github urls")
return parser.parse_args()
def main():
opts, args = parse_args()
if opts.github_fix_1:
github_fix_1()
return
streams = map(Stream, config["Streams"])
if opts.rerender:
rerender(streams)
return
update(streams, opts.force)
class GithubStream(object):
def get_repos(self, username, all=False):
user = self.handle.user(username)
repos = user.repositories(all=all)
for repo in repos:
repo[u'project'] = '%s/%s' % (repo['owner'], repo['name'])
repo[u'event'] = 'fork' if repo['fork'] else 'create'
repo[u'id'] = '%s-%s-%s' % (repo['event'], repo['owner'], repo['name'])
return repos
def get_commits(self, username, repos, all=False):
user = self.handle.user(username)
all_commits = []
for repo in repos:
commits = user.repository(repo['name']).commits(all=all)
for commit in commits:
commit['repository'] = repo
commit['event'] = 'commit'
commits = [c for c in commits if c['committer'] and c['committer']['login'] == user.username]
all_commits += commits
return all_commits
def update(self, all=False, **args):
from github import Github, to_datetime
from argot import utils
username = args["username"]
self.handle = Github(username=username)
repos = self.get_repos(username, all=all)
commits = self.get_commits(username, repos, all=all)
for repo in repos:
sourceid = str(repo["id"])
if db.stream.find({"type":"github", "sourceid":sourceid}).count():
continue
timestamp = int(time.mktime(to_datetime(repo["created_at"]).timetuple()))
entry = {
"sourceid": sourceid,
"type": "github",
"timestamp": timestamp,
"title": "%s %sed @ %s" % (repo["name"], repo["event"], timestamp),
"url": repo["url"],
"data": json.dumps({"event":repo}),
}
db.stream.save(entry)
for commit in commits:
sourceid = str(commit["sha"])
if db.stream.find({"sourceid": sourceid}).count():
continue
entry = {"type":"github", "sourceid":sourceid}
timestamp = int(time.mktime(to_datetime(commit["commit"]["author"]["date"]).timetuple()))
# This might block for some time:
details = self.handle.repository(username, commit['repository']['name']).commit(commit["sha"])
commit.update(details)
for mod in commit.get('modified', []):
mod['htmldiff'] = utils.pygmentize(mod['diff'], 'diff', cssclass="diff")
entry["timestamp"] = timestamp
entry["title"] = "committed %s to %s" % (commit["sha"], commit['repository']['name'])
if "message" not in commit:
commit["message"] = commit["commit"]["message"]
if commit['url'].startswith("https://api.github.com/repos"):
commit["url"] = commit["url"].replace("https://api.github.com/repos", "")
entry["url"] = ("https://github.com%s" % commit["url"]).replace("/commits/", "/commit/")
entry["data"] = json.dumps({'event' : commit})
db.stream.save(entry)
def github_fix_1():
# fix urls
import github
fixed = 0
double_urls = db.stream.find({"type": "github", "url": {"$regex": "https://github.comhttp.*", "$options": "i"}})
for entry in double_urls:
entry["url"] = entry["url"].replace("github.comhttps", "")
db.stream.save(entry)
fixed += 1
double_semi_urls = db.stream.find({"type": "github", "url": {"$regex": "https://://.*"}})
for entry in double_semi_urls:
entry["url"] = entry["url"].replace("://://", "://")
db.stream.save(entry)
fixed += 1
api_urls = db.stream.find({"type": "github", "url": {"$regex": "https://api.github.*", "$options": "i"}})
for entry in api_urls:
entry["url"] = "http://github.com%s" % (entry["url"].replace("https://api.github.com/repos", ""))
db.stream.save(entry)
fixed += 1
commits_urls = db.stream.find({"type":"github", "url": {"$regex": ".*/commits/.*"}})
for entry in commits_urls:
entry["url"] = entry["url"].replace("/commits/", "/commit/")
db.stream.save(entry)
fixed += 1
# fix messages that are commits but do not have a message in the commit data
entries = db.stream.find({"type": "github"})
for entry in entries:
data = json.loads(entry["data"])
commit = data["event"]
if "message" not in commit and "commit" in commit:
commit["message"] = commit["commit"]["message"]
entry["data"] = json.dumps({"event": commit})
db.stream.save(entry)
fixed += 1
# fix timestamps on creates and forks
entries = db.stream.find({"type": "github"})
for entry in entries:
data = json.loads(entry["data"])
event = data["event"]
if event["event"] != "commit":
entry["timestamp"] = int(time.mktime(github.to_datetime(event["created_at"]).timetuple()))
db.stream.save(entry)
fixed += 1
else:
if "committed_date" in event:
ts = event["committed_date"]
else:
# fix the absense of "committed_date" key in older events
ts = event["commit"]["author"]["date"]
event["committed_date"] = ts
entry["data"] = json.dumps({"event": event})
entry["timestamp"] = time.mktime(github.to_datetime(ts).timetuple())
db.stream.save(entry)
fixed += 1
if fixed == 1:
print "Fixed 1 entry"
else:
print "Fixed %d entries." % fixed
if __name__ == "__main__":
try:
ret = main()
except KeyboardInterrupt:
ret = 0
sys.exit(ret)
'''
def get_detailed_updates(user, limit=50):
"""Gets the most recent `limit` updates for a bitbucket user object.
This object is created by BitBucket().user(username)."""
events = user.events(limit=50)['events']
events = [e for e in events if e['event'] != 'commit']
# filer out issue updates and comments since they have no data
events = [e for e in events if not e['event'].startswith('issue_')]
for e in events:
e['created_on'] = to_datetime(e['created_on'])
repos = user.repositories()
checkins = []
for repo in repos:
repo['url'] = "http://bitbucket.org/%s/%s/" % (user.username, repo['slug'])
# this can be empty for repos that have not had a changeset
changesets = user.repository(repo['slug']).changesets()
if 'changesets' in changesets:
changesets = changesets['changesets']
for cs in changesets:
cs['repository'] = repo
checkins += changesets
for c in checkins:
c['created_on'] = to_datetime(c['timestamp'])
c['event'] = 'commit'
c['description'] = c['message']
ret = events + checkins
ret.sort(key=lambda item: item['created_on'], reverse=True)
return ret[:limit] if limit > 0 else ret
class BitbucketPlugin(object):
settings = stream_settings.get('bitbucket', {})
tag = 'bitbucket'
def get_api_handle(self):
username = self.settings.get('username', None)
password = self.settings.get('password', None)
if username and password:
return BitBucket(username, password)
return BitBucket()
def reprocess(self):
"""Batch re-processes all available StreamEntrys with this plugins tag."""
for entry in StreamEntry.objects.filter(source_tag=self.tag):
update = entry.data['update']
entry.title = self.make_title(update)
entry.permalink = self.make_url(update)
entry.save()
def make_title(self, update):
if update['event'] == 'commit':
return "commit #%d to <a href=\"%s\" title=\"%s\">%s</a>" % (update['revision'],
update['repository']['url'],
update['repository']['description'].replace('"', '"'),
update['repository']['name'])
return "%s event" % (update['event'])
def make_url(self, update):
if update['event'] == 'commit':
return "%schangeset/%s" % (update['repository']['url'], update['node'])
return "FIXME"
def force(self, all=False):
"""Updates the events on the user set in settings."""
bb = self.get_api_handle()
user = bb.user(self.arguments['username'])
limit = 0 if all else 20
updates = get_detailed_updates(user, limit=limit)
for update in updates:
if update['node'] is None:
update['node'] = "%s-%s" % (update['event'], update['repository']['slug'])
checksum = md5sum(repr(update))
if self.in_cache(update['node'], checksum):
continue
try:
entry = StreamEntry.objects.get(source_tag=self.tag, source_id=update['node'])
except StreamEntry.DoesNotExist:
entry = StreamEntry(source_tag=self.tag,
source_id=str(update['node']),
plugin=self.stream_plugin)
if entry.md5sum == checksum:
self.set(update['node'], checksum)
continue
entry.title = self.make_title(update)
entry.timestamp = update['created_on']
entry.permalink = self.make_url(update)
entry.data = {
'update' : update
}
entry.md5sum = checksum
entry.save()
self.stream_plugin.last_run = datetime.datetime.now()
self.stream_plugin.save()
'''