-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlogistic_regression.py
113 lines (94 loc) · 3.83 KB
/
logistic_regression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import numpy as np
import scipy.optimize
DIMENSIONS = 4
def J_binary(w, b, dataset, labels, lambda_, pi, SVM=False):
norm_term = lambda_ / 2 * (np.linalg.norm(w) ** 2)
sum_term_positive = 0
sum_term_negative = 0
for i in range(dataset.shape[1]):
if labels[i] == 1:
sum_term_positive += np.log1p(np.exp(-np.dot(w.T, dataset[:, i]) - b))
else:
sum_term_negative += np.log1p(np.exp(np.dot(w.T, dataset[:, i]) + b))
if not SVM:
j = (
norm_term
+ (pi / dataset[:, labels == 1].shape[1]) * sum_term_positive
+ ((1 - pi) / dataset[:, labels == 0].shape[1]) * sum_term_negative
)
else:
j = (
norm_term
+ (pi / dataset[:, labels == 1].shape[1]) * sum_term_positive
+ ((1 - pi) / dataset[:, labels == -1].shape[1]) * sum_term_negative
)
return j
def binary_logreg_obj(v, dataset, labels, l, prior=0.5, SVM=False):
w, b = v[0:-1], v[-1]
j = J_binary(w, b, dataset, labels, l, prior, SVM)
return j
def J_multiclass(w, b, dataset, labels, lambda_, pi, SVM=False):
norm_term = lambda_ / 2 * (np.linalg.norm(w) ** 2)
sum_term = 0
for i in range(dataset.shape[1]):
# Ensure that w and dataset[:, i] have compatible shapes for the dot product operation
w_reshaped = w[:dataset.shape[0]].reshape(-1, 1)
dataset_reshaped = dataset[:, i].reshape(-1, 1)
sum_term += np.log1p(np.exp(-np.dot(w_reshaped.T, dataset_reshaped) - b[labels[i]]))
if not SVM:
j = norm_term + (1 / dataset.shape[1]) * sum_term
else:
j = norm_term + (1 / dataset.shape[1]) * sum_term
return j
def multiclass_logreg_obj(v, dataset, labels, l, prior=0.5, SVM=False):
w, b = v[0:-len(set(labels))], v[-len(set(labels)):]
j = J_multiclass(w, b, dataset, labels, l, prior, SVM)
return j
class LogisticRegression:
def __init__(self) -> None:
self.x_estimated_minimum_position = 0
self.f_objective_value_at_minimum = 0
self.d_info = ""
def train(self, dataset, labels, lambda_, pi=0.5, SVM=False):
(
self.x_estimated_minimum_position,
self.f_objective_value_at_minimum,
self.d_info,
) = scipy.optimize.fmin_l_bfgs_b(
binary_logreg_obj,
np.zeros(dataset.shape[0] + 1),
args=(dataset, labels, lambda_, pi, SVM),
approx_grad=True,
)
def getScores(self, X):
scores = np.dot(self.x_estimated_minimum_position[0:-1], X) + self.x_estimated_minimum_position[-1]
return scores
def predict(self, X):
scores = self.getScores(X)
prediction = (scores > 0).astype(int)
return prediction
class MulticlassLogisticRegression:
def __init__(self) -> None:
self.x_estimated_minimum_position = 0
self.f_objective_value_at_minimum = 0
self.d_info = ""
def train(self, dataset, labels, lambda_, pi=0.5, SVM=False):
(
self.x_estimated_minimum_position,
self.f_objective_value_at_minimum,
self.d_info,
) = scipy.optimize.fmin_l_bfgs_b(
multiclass_logreg_obj,
np.zeros((dataset.shape[0], DIMENSIONS + 1)),
args=(dataset, labels, lambda_, pi, SVM),
approx_grad=True,
)
def getScores(self, X):
# Ensure that x_estimated_minimum_position and X have compatible shapes for the dot product operation
x_estimated_minimum_position_reshaped = self.x_estimated_minimum_position[:X.shape[0]].reshape(-1, 1)
scores = np.dot(x_estimated_minimum_position_reshaped.T, X) + self.x_estimated_minimum_position[-1]
return scores
def predict(self, X):
scores = self.getScores(X)
prediction = np.argmax(scores, axis=0)
return prediction