-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgooglescrape.py
executable file
·46 lines (38 loc) · 1.42 KB
/
googlescrape.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
#!/usr/bin/env python
import urllib2
import csv
import datetime
from scraperutil import *
def gethistory(symbol):
_atonce = 300
end = endtime()
start = end - datetime.timedelta(_atonce)
urltemplate = 'http://www.google.com/finance/historical?q={symbol}&output=csv&startdate={start}&enddate={end}'
datekey = '\xef\xbb\xbfDate' # WTF google?
fulllist = []
dates = set()
while True:
url = urltemplate.format(symbol=symbol,
start=start.strftime('%b+%d%%2C+%Y'),
end=end.strftime('%b+%d%%2C+%Y'))
alldupe = True # google returns default results if we go out of range, halt on seeing default data
for line in csv.DictReader(urllib2.urlopen(url)):
try:
date = line[datekey]
del line[datekey]
if date not in dates:
alldupe = False
dates.add(date)
line['Date'] = formatdate(date, '%d-%b-%y')
line['Open'] = round(float(line['Open']), 2)
line['Close'] = round(float(line['Close']), 2)
fulllist.append(line)
except:
pass
if alldupe:
break
(start, end) = (start - datetime.timedelta(_atonce), start)
return fulllist
if __name__ == "__main__":
import sys
printdata(gethistory(sys.argv[1]))