-
Notifications
You must be signed in to change notification settings - Fork 0
/
dataLoader.py
49 lines (33 loc) · 1.58 KB
/
dataLoader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import os
import numpy as np
import pandas as pd
def loadData(pickledDataFile, excelPath, sheetname):
if not os.path.isfile(pickledDataFile):
data=pd.read_excel(excelPath, sheetname=sheetname, index_col=0).sort_index()
data.to_pickle(pickledDataFile)
return pd.read_pickle(pickledDataFile)
def priceSeriesToReturnSeries(values):
return np.log(values[1:]/values[:-1])
def pricesDataFrameToReturnsDataFrame( df ):
result = pd.DataFrame( index= df.index[1:], columns=df.columns )
for ticker in df:
result.loc[:,ticker] = priceSeriesToReturnSeries( df[ticker].values )
return result
def getReturnsDataframe(excelPath, sheetname, rawPath, transformedDataPath):
if not os.path.isfile(transformedDataPath):
data = loadData(rawPath, excelPath, sheetname)
transformed = pricesDataFrameToReturnsDataFrame(data)
transformed.to_pickle(transformedDataPath)
return pd.read_pickle(transformedDataPath)
def loadVariables():
excelPath = '../TP1MNP_PreciosCierre.xlsx'
sheetname= 'Precios'
rawPickledDataPath = 'datos'
transformedPickledDataPath = 'datosTransformadosARetornos'
indexExcelPath = '../spx.xlsx'
indexSheetname = 'Sheet1'
rawIndexPickledDataPath = 'indice'
transformedIndexPickledDataPath = 'indiceTransformadoARetornos'
data = getReturnsDataframe(excelPath, sheetname, rawPickledDataPath, transformedPickledDataPath)
index = getReturnsDataframe(indexExcelPath, indexSheetname, rawIndexPickledDataPath, transformedIndexPickledDataPath)
return (data.values.T, np.squeeze(index.values))