-
Notifications
You must be signed in to change notification settings - Fork 6
/
cache.py
executable file
·234 lines (201 loc) · 7.77 KB
/
cache.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
#!/usr/bin/env python3
"""FeedmeCache and related code.
"""
import sys, os
import shutil
import time
# Use XDG for the config and cache directories if it's available
try:
import xdg.BaseDirectory
except:
pass
import msglog
import utils
class FeedmeCache(object):
"""The FeedmeCache is a dictionary where the keys are site RSS URLs,
and for each feed we have a list of URLs we've seen.
{ siteurl: [ url, url, url, ...] }
It's best to create a new FeedmeCache using the static method
FeedmeCache.newcache().
filename is the cache file we're using;
last_time is the last modified time of the cache file, or None.
"""
def __init__(self, cachefile):
self.filename = cachefile
self.thedict = {}
self.last_fed = {}
self.last_time = None
@staticmethod
def get_cache_dir():
if 'XDG_CACHE_HOME' in os.environ:
cachehome = os.environ['XDG_CACHE_HOME']
elif 'xdg.BaseDirectory' in sys.modules:
cachehome = xdg.BaseDirectory.xdg_cache_home
else:
cachehome = utils.expanduser('~/.cache')
return os.path.join(cachehome, 'feedme')
@staticmethod
def newcache():
"""Find the cache file and load it into a newly created Cache object,
returning the cache object.
If there's no cache file yet, create one.
"""
cachefile = os.path.join(FeedmeCache.get_cache_dir(), "feedme.dat")
if not os.access(cachefile, os.W_OK):
dirname = os.path.dirname(cachefile)
if not os.path.exists(dirname):
os.makedirs(dirname)
cache = FeedmeCache(cachefile)
cache.last_time = None
else:
cache = FeedmeCache(cachefile)
# Make a backup of the cache file, in case something goes wrong.
cache.back_up()
cache.last_time = os.stat(cachefile).st_mtime
cache.read_from_file()
return cache
#
# New style cache files are human readable and look like this:
# FeedMe v. 1
# siteurl|time|url url, url ...
# One line per site.
# urls are a list of URLs on the RSS feed the last time we looked.
# Time is the last time we updated this site, seconds since epoch.
# Urls must all be urlencoded,
# and in particular must have no spaces or colons.
#
def read_from_file(self):
"""Read cache from a cache file, either old or new style."""
with open(self.filename) as fp:
contents = fp.read()
if not contents.startswith("FeedMe v."):
print("Sorry, old-style pickle-based cache files are "
"no longer supported.\nStarting over without cache.")
# It's an old style, pickle-based file.
return
# Must be a new-style file.
for line in contents.split('\n')[1:]:
if not line.strip():
continue
try:
# Format v. 1 has feedname|urllist
# v. 1.1 has feedname|lastfed|urllist
parts = line.split('|')
if len(parts) == 2:
key, urllist = parts
lastfed = 0
elif len(parts) == 3:
key, lastfed, urllist = parts
lastfed = int(lastfed)
else:
print("Confused by", len(parts), "parts in cache",
file=sys.stderr)
continue
except ValueError:
print("Problem splitting on |: '%s'" % line, file=sys.stderr)
continue
key = key.strip()
urls = urllist.strip().split()
self.thedict[key] = urls
self.last_fed[key] = lastfed
def back_up(self):
"""Back up the cache file to a file named for when
the last cache, self.filename, was last modified.
"""
try:
mtime = os.stat(self.filename).st_mtime
timeappend = time.strftime("%y-%m-%d-%a", time.localtime(mtime))
base, ext = os.path.splitext(self.filename)
backupfilebase = "%s-%s%s" % (base, timeappend, ext)
num = 0
for num in range(10):
if num:
backupfile = "%s-%d" % (backupfilebase, num)
else:
backupfile = backupfilebase
if not os.path.exists(backupfile):
break
print("Backing up cache file to", backupfile)
shutil.copy2(self.filename, backupfile)
except Exception as e:
msglog.warn("WARNING: Couldn't back up cache file!")
print(str(e), file=sys.stderr)
utils.ptraceback()
def save_to_file(self):
"""Serialize the cache to a version-1.1 new style cache file.
The existing file should already have been backed up by newcache().
"""
# Write the new cache file.
with open(self.filename, "w") as fp:
print("FeedMe v. 1.1", file=fp)
for k in self.thedict:
try:
last_fed = self.last_fed[k]
except:
last_fed = 0
print("%s|%d|%s" % (FeedmeCache.id_encode(k),
last_fed,
' '.join(map(FeedmeCache.id_encode,
self.thedict[k]))), file=fp)
# Remove backups older than N days.
# XXX should pass in save_days from config file
cachedir = os.path.dirname(self.filename)
files = os.listdir(cachedir)
for f in files:
if not f.startswith("feedme."):
continue
# does it have six numbers after the feedme?
try:
d = int(f[7:14])
except ValueError:
continue
# It matches feedme.nnnnnn. How old is it? st_mtime is secs.
mtime = os.stat(f).st_mtime
age_days = (time.time() - mtime) / 60 / 60 / 24
if age_days > 5:
print("Removing old cache", f, file=sys.stderr)
os.unlink(f)
def add_items(self, sitekey, items):
if not items:
return
self.last_fed[sitekey] = int(time.time())
if sitekey not in self.thedict:
self.thedict[sitekey] = items
return
for item in items:
if item not in self.thedict[sitekey]:
self.thedict[sitekey].append(item)
def last_fed_site(self, sitekey):
try:
return self.last_fed[sitekey]
except Exception as e:
print("Couldn't get last fed time for", sitekey, ":", e,
file=sys.stderr)
return None
def __repr__(self):
return self.thedict.__repr__()
@staticmethod
def id_encode(s):
return s.replace(' ', '+')
# Methods to act like a dictionary:
def __getitem__(self, key):
return self.thedict.__getitem__(key)
def __setitem__(self, key, val):
# XXX This never actually gets called
if not self.thedict:
self.thedict = []
self.last_fed[key] = int(time.time())
return self.thedict.__setitem__(key, val)
def __delitem__(self, name):
return self.thedict.__delitem__(name)
def __len__(self):
# Dictionaries don't always/reliably have __len__, apparently;
# just calling self.__len__() sometimes fails with
# TypeError: an integer is required
return len(list(self.thedict.keys()))
def __iter__(self):
return self.thedict.__iter__()
def __contains__(self, item):
return self.thedict.__contains__(item)
def keys(self):
return list(self.thedict.keys())