-
Notifications
You must be signed in to change notification settings - Fork 14
/
compile_data.py
159 lines (108 loc) · 3.68 KB
/
compile_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
## James Quintero
## https://github.com/JamesQuintero
## Created: 2017
## Modified: 2017
import os
import csv
#returns contents of csv at path
def read_from_csv(path):
if os.path.isfile(path):
with open(path, newline='') as file:
contents = csv.reader(file)
temp_list=[]
for row in contents:
temp_matrix=[]
for stuff in row:
temp_matrix.append(stuff)
temp_list.append(temp_matrix)
return temp_list
else:
return []
#saves matrix data to path as csv
def save_to_csv(path, data):
with open(path, 'w', newline='') as file:
contents = csv.writer(file)
contents.writerows(data)
oecd_path="./MEI_standardized_CCI.csv"
nber_path="./USREC.csv"
output_path="./compiled_data_OECD_standardized_CCI.csv"
# relevant_data="OECD Standardised BCI, Amplitude adjusted (Long term average=100), sa"
relevant_data="OECD Standardised CCI, Amplitude adjusted (Long term average=100), sa"
# relevant_data="Amplitude adjusted (CLI)"
# relevant_data="Normalised (CLI)"
# relevant_data="Trend restored (CLI)"
# relevant_data="12-month rate of change of the trend restored CLI"
# relevant_data="Ratio to trend (GDP)"
# relevant_data="Normalised (GDP)"
# relevant_data="Trend (GDP)"
relevant_country='United States'
start_date="1970-01"
start_date_NBER="1970-01-01" #standard date format
contents = read_from_csv(oecd_path)
#reformats the csv data
country_names=[]
country_data={}
for x in range(0, len(contents)):
#only care about the standardized CCI data
if contents[x][1]==relevant_data:
country = contents[x][3]
#adds new country to dictionary
if country not in country_data.keys():
country_data[str(country)]=[]
country_names.append(country)
country_data[str(country)].append(contents[x])
#gets monthly dates from the relevant country's data
to_save=[]
found=False
for x in range(0, len(country_data[str(relevant_country)])):
if found:
row = []
row.append(str(country_data[str(relevant_country)][x][6])) #date
row.append(country_data[str(relevant_country)][x][14]) #value
to_save.append(row)
else:
if country_data[str(relevant_country)][x][6]==start_date:
found=True
#relevant country's data has already been added, so remove its name
country_names.remove(str(relevant_country))
#adds all other country's data
for x in range(0, len(to_save)):
date = to_save[x][0]
print(date)
#finds all data corresponding to date for all countries
for y in range(0, len(country_names)):
country_name = country_names[y]
#iterates through country's data
found_date=False
for z in range(0, len(country_data[str(country_name)])):
if country_data[str(country_name)][z][6]==date:
to_save[x].append(country_data[str(country_name)][z][14])
found_date=True
break
#if data didn't exist for date, add default of 100
if found_date==False:
to_save[x].append(100)
#loads NBER recession data for neural network's output
NBER = read_from_csv(nber_path)
#inserts NBER data into to_save
#NBER represents when the US is in a recession per month. 0 if not, 1 if yes.
for x in range(0, len(NBER)):
if NBER[x][0]==start_date_NBER:
for y in range(0, len(to_save)):
# try:
# #because we want the model to be able to predict recessions, modify the NBER so that the recessions start earlier than they actually did.
# #looks 3 months ahead
# if int(NBER[x+y+1 + 3][1])==1:
# to_save[y].insert(1, 1)
# continue
# except Exception as error:
# # print(error)
# pass
to_save[y].insert(1, NBER[x+y+1][1])
break
#adds header row
header=["", str(relevant_country), str(relevant_country)+" OECD"]
for x in range(0, len(country_names)):
header.append(country_names[x])
to_save.insert(0, header)
save_to_csv(output_path, to_save)