-
Notifications
You must be signed in to change notification settings - Fork 17
/
200 firms and 6 macros download.R
197 lines (171 loc) · 7.85 KB
/
200 firms and 6 macros download.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
# --------------------------------------------------------
# Project: FRM
#---------------------------------------------------------
# QuantLet : FRM_download_data
#---------------------------------------------------------
# Published in : FRM: Financial Risk Meter
#---------------------------------------------------------
# Description : download the data of 206 variables
# from Yahoo finance and Feferal reserve board.
# --------------------------------------------------------
# Keywords : automatic, VaR, returns, Yahoo Finance,
# Feferal Reserve Board, quantile regression, lasso, risk
#---------------------------------------------------------
# See also :
#---------------------------------------------------------
# Author : Thijs Benschop, Lukas Borke, Lining Yu
#---------------------------------------------------------
# Submitted :
#---------------------------------------------------------
# Datafile : companylist 2015.csv
#----------------------------------------------------------
# clear all variables
rm(list = ls(all = TRUE))
# install and load packages
libraries = c("quantmod")
lapply(libraries, function(x) if (!(x %in% installed.packages())) {
install.packages(x)
})
lapply(libraries, library, quietly = TRUE, character.only = TRUE)
##################################### Part 1 download 200 firms ###################################
# set the working directory
# setwd("//clapton.wiwi.hu-berlin.de/frm/codes")
# for symbols in Yahoo finance
companylist = read.csv("companylist 2015.csv")
# companylist2015 - downloaded from this source
# http://www.nasdaq.com/screening/companies-by-industry.aspx?industry=Finance&sortname=country&sorttype=1
# Array with firm names sorterd by market capitalization
firm_names = as.character(companylist[, 1])
# starting date for 200 firms´
a = "2006-12-29"
# starting date for macro variables, 1 day lag in data retrieval
a_m = "2006-12-28"
# date for 200 firms
b = Sys.Date()
# date for macro variables, 1 day lag
b_m = Sys.Date() - 1
# calls Yahoo Finance through internet
example = getSymbols(firm_names[1], src = "yahoo", from = a, to = b, auto.assign = FALSE)
example_time = as.matrix(example[-1, 1])
time_points = nrow(as.data.frame(example))
# setting parameters for the loop number of firms
n = length(firm_names)
# counter (initial value is 1)
s = 1
# number of firms used
max_num = 200
# creating initial matrix for all firms and all time points
firms_closed_price = matrix(0, time_points, n)
colnames(firms_closed_price) = firm_names
# sometimes data for a company can not be captured (bad internet connection,
# problems with correct name) in this case extend this list the list can vary
# from time to time
bad_list = c("ZIONZ", "SNFCA", "KMPA", "CATYW", "MBFIP", "HAWKB", "JLL")
# Main loop : data from yahoo to firms_closed_price
for (i in 1:n) {
# 1) Check whether contained in bad list, if then skip
if (firm_names[i] %in% bad_list) {
next
}
# 2) check whether the complete time series is available, if not skip and print
# name
prices = getSymbols(firm_names[i], src = "yahoo", from = a, to = b, auto.assign = FALSE)
if (nrow(prices) != time_points) {
# skip this firm if different length of time points
print(firm_names[i])
next
}
# 3) get data and save in firms_closed_price
prices_data = as.data.frame(prices)
firms_closed_price[, i] = as.matrix(prices_data[, 6])
print(s)
# increase counter
s = s + 1
# 4) stop if max_num achieved
if (s > max_num)
break
}
# checking which firms were filled
cs = colSums(firms_closed_price)
# sum over daily observations to check for zero columns number of firms for which
# the sum is larger than 0
length(cs[cs > 0])
# taking submatrix from firms_closed_price with 200 companies
collected_firms = names(cs[cs > 0])
data_firms = firms_closed_price[, collected_firms]
# transform the company_prices_200 into log returns
returns_final = diff(log(data_firms))
rownames(returns_final) = rownames(example_time)
################################## Part 2 download macro variables ################################
macro_names = c("^VIX", "^GSPC", "IYR", "DGS3MO", "DGS10", "DBAA")
# Part 2.1: download VIX, GSPC (S&P500) and IYR (iShares Dow Jones US Real Estate) from yahoo finance
VIX = as.matrix(getSymbols(macro_names[1], src = "yahoo", from = a_m, to = b_m, auto.assign = FALSE)[, 6])
GSPC = as.matrix(getSymbols(macro_names[2], src = "yahoo", from = a_m, to = b_m, auto.assign = FALSE)[, 6])
IYR = as.matrix(getSymbols(macro_names[3], src = "yahoo", from = a_m, to = b_m, auto.assign = FALSE)[, 6])
# data of first three variables
data_ft = as.matrix(cbind(VIX, GSPC, IYR))
# transform GSPC and IYR into log returns without VIX
returns_m = diff(log(data_ft[, -1]))
First_three_macro_in = cbind(data_ft[-1, 1], returns_m)
# remove last row
First_three_macro = First_three_macro_in[-nrow(First_three_macro_in), ]
# Part 2.2: download the other 3 macro from Federal reserve Bank measure the
# length of first three macro variables, so that the last three variables have
# the same length with them.
c = nrow(First_three_macro)
# 3 month Treasury change download 3 month Treasury maturities, from 20150713 can
# not use getSymbol to download from Federal any more, only can use the following
# links.
ThreeMT = as.numeric(as.matrix(read.csv("https://research.stlouisfed.org/fred2/series/DGS3MO/downloaddata/DGS3MO.csv",
na.strings = ".")[, -1]))
True_ThreeMT = na.omit(ThreeMT)
lt = as.matrix(True_ThreeMT)
# set the length of this variable the same as the first three variables, take
# last c values
output_ThreeMT = as.matrix(lt[(length(lt) - c):length(lt), 1])
# calculate the 3 month Treasury change
change_ThreeMT = diff(output_ThreeMT)
# Slope of yield curve download 10 year Treasury maturities
Tenyield = as.numeric(as.matrix(read.csv("https://research.stlouisfed.org/fred2/series/DGS10/downloaddata/DGS10.csv",
na.strings = ".")[, -1]))
True_Tenyield = na.omit(Tenyield)
lyc = as.matrix(True_Tenyield)
output_Tenyield = as.matrix(lyc[(length(lyc) - c):length(lyc), 1])
# calculate Slope of yield curve
slope_yield = as.matrix(output_Tenyield - output_ThreeMT)[-1]
# credit spread download BAA
DayBAA = as.numeric(as.matrix(read.csv("https://research.stlouisfed.org/fred2/series/DBAA/downloaddata/DBAA.csv",
na.strings = ".")[, -1]))
True_DayBAA = na.omit(DayBAA)
lc = as.matrix(True_DayBAA)
output_True_DayBAA = as.matrix(lc[(length(lc) - c):length(lc), 1])
# calculate credit spread
credit_spread = as.matrix(output_True_DayBAA - output_Tenyield)[-1]
# combine all the macro variables
rest_three_macro = cbind(change_ThreeMT, slope_yield, credit_spread)
six_macro = cbind(First_three_macro, rest_three_macro)
# scale variables to [0,1]
scale_macro = six_macro
nnrow = nrow(scale_macro)
nncol = ncol(scale_macro)
m = matrix(0, nnrow, nncol)
for (i in 1:nncol) {
m[, i] = (scale_macro[, i] - min(scale_macro[, i]))/(max(scale_macro[, i]) -
min(scale_macro[, i]))
}
colnames(m) = c("^VIX", "^GSPC", "IYR", "3MTCM", "Yield", "Credit")
####################### Part 3 combine 200 firms and 6 macro variables #########################
firms_data = returns_final
macro_data = m
full_data = cbind(firms_data, macro_data)
full_data = round(full_data, digits = 9)
Date_wf = strptime(as.character(rownames(full_data)), "%Y-%m-%d")
Date_rf = format(Date_wf, "%d/%m/%Y")
Date = as.data.frame(Date_rf)
names(Date) = "Date"
rownames(full_data) = NULL
final_data = cbind(Date, full_data)
# change the working directory to save the data
#setwd("//clapton.wiwi.hu-berlin.de/frm/data")
write.csv(format(final_data, scientific = FALSE), file = paste("200_firms_returns_and_scaled_macro_",
b, ".csv", sep = ""), row.names = FALSE, quote = FALSE)