This repository has been archived by the owner on Dec 15, 2018. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathget_data.py
78 lines (62 loc) · 2.52 KB
/
get_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import csv
import time
import yahoo_finance as yf
# All dates should use format YYYY-MM-DD
# Gets historical price data of the S&P 500 by parsing csv file
def get_hist_sp(start_date, end_date):
with open("sp500.csv", "rb") as f:
start_date = time.strptime(start_date, "%Y-%m-%d")
end_date = time.strptime(end_date, "%Y-%m-%d")
data = []
reader = csv.reader(f)
for row in reader:
# Will throw error on headers
try:
date = time.strptime(row[0], "%Y-%m-%d")
except:
continue
# csv is in descending order, so this means it passed range
if date < start_date:
break
if date > start_date and date < end_date:
data += {"date": row[0],
#"open": float(row[1]),
"actual": float(row[4])},
f.close()
return data
# Gets historical price data of a specific share
def get_hist_share(symbol, start_date, end_date):
share = yf.Share(symbol)
hist_data = share.get_historical(start_date, end_date)
return hist_data
# Combines an arbitrary number of lists containing dictionaries representing price data by date into one
# NOTE: All lists should have the same number of entries and the dictionaries should be in the form as in the examples given below
# Examples:
# [{"date": "1-1-1990", "actual": 100}] + [{"date": "1-1-1990", "mean": 200}] --> [{"date": "1-1-1990", "actual": 100, "mean": 200}]
# [{"date": "1-1-1990", "actual": 100}, {"date": "1-2-1990", "actual": 200}] + [{"date": "1-1-1990", "mean": 300}, {"date": "1-2-1990", "mean": 400}]
# --> [{"date": "1-1-1990", "actual": 100, "mean": 3200}, {"date": "1-2-1990", "actual": 200, "mean": 400}]
def combine_data(*args):
# Checks to see if arguments' list is empty
# Uses a non-empty list as original combined (makes sure date is defined)
for num in range(0, len(args)):
if args[num] != []:
combined = args[num]
break
for arg in args[num:]:
for dct in arg:
dct_algo = dct.items()[1][0]
dct_value = dct.items()[1][1]
# O(n^2) runtime - faster way?
for comb_dct in combined:
if dct["date"] == comb_dct["date"]:
comb_dct[dct_algo] = dct_value
return combined
if __name__ == "__main__":
# print get_hist_share("AAPL", "2014-04-25", "2014-04-29")
# print get_hist_sp("2016-05-10", "2016-05-20")
lst1 = []
lst2 = [{"date": "1-1-1990", "actual": 100}, {"date": "1-2-1990", "actual": 200}]
lst3 = [{"date": "1-1-1990", "mean": 300}, {"date": "1-2-1990", "mean": 400}]
lst4 = [{"date": "1-1-1990", "dev": 300}, {"date": "1-2-1990", "dev": 400}]
print combine_data(lst1, lst2, lst3, lst4)
print combine_data(lst4, lst3, lst2, lst1)