-
Notifications
You must be signed in to change notification settings - Fork 26
/
ml_models.py
73 lines (57 loc) · 2.12 KB
/
ml_models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# Class that encapsulates the gathering of the data.
# Author: Albert Sanchez
# May 2018
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
class ML_Models:
def __init__(self):
pass
def _x_y(self, data):
"""
:param data: pandas DataFrame with all the data
:return: x and y values to give the ML model
"""
y = data['Up']
x = data.drop(['Up','Symbol','Date'], axis = 1)
return x, y
def _train_test_set(self, x, y):
"""
:param x: x parameter for the model
:param y: y parameter for the model
:return: x_train, x_test, y_train, y_test to give the ML model
"""
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, shuffle = False)
return x_train, x_test, y_train, y_test
def _results(self, y_test, predictions):
"""
:param y_test: y_test column
:param predictions: predictions for the y_test column
:return: nothing, just prints the results
"""
print(classification_report(y_test,predictions))
print(confusion_matrix(y_test,predictions))
def logistic_regression(self, data):
"""
:param data: pandas DataFrame with all the data
:return:
"""
x, y = self._x_y(data)
x_train, x_test, y_train, y_test = self._train_test_set(x,y)
logmodel = LogisticRegression()
logmodel.fit(x_train,y_train)
predictions = logmodel.predict(x_test)
self._results(y_test,predictions)
def random_forest(self, data):
"""
:param data: pandas DataFrame with all the data
:return:
"""
x, y = self._x_y(data)
x_train, x_test, y_train, y_test = self._train_test_set(x,y)
rfc = RandomForestClassifier(n_estimators=200)
rfc.fit(x_train, y_train)
predictions = rfc.predict(x_test)
self._results(y_test,predictions)