forked from TheAlgorithms/Python
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlocal_weighted_learning.py
135 lines (113 loc) · 4.34 KB
/
local_weighted_learning.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
# Required imports to run this file
import matplotlib.pyplot as plt
import numpy as np
# weighted matrix
def weighted_matrix(point: np.mat, training_data_x: np.mat, bandwidth: float) -> np.mat:
"""
Calculate the weight for every point in the
data set. It takes training_point , query_point, and tau
Here Tau is not a fixed value it can be varied depends on output.
tau --> bandwidth
xmat -->Training data
point --> the x where we want to make predictions
>>> weighted_matrix(np.array([1., 1.]),np.mat([[16.99, 10.34], [21.01,23.68],
... [24.59,25.69]]), 0.6)
matrix([[1.43807972e-207, 0.00000000e+000, 0.00000000e+000],
[0.00000000e+000, 0.00000000e+000, 0.00000000e+000],
[0.00000000e+000, 0.00000000e+000, 0.00000000e+000]])
"""
# m is the number of training samples
m, n = np.shape(training_data_x)
# Initializing weights as identity matrix
weights = np.mat(np.eye(m))
# calculating weights for all training examples [x(i)'s]
for j in range(m):
diff = point - training_data_x[j]
weights[j, j] = np.exp(diff * diff.T / (-2.0 * bandwidth**2))
return weights
def local_weight(
point: np.mat, training_data_x: np.mat, training_data_y: np.mat, bandwidth: float
) -> np.mat:
"""
Calculate the local weights using the weight_matrix function on training data.
Return the weighted matrix.
>>> local_weight(np.array([1., 1.]),np.mat([[16.99, 10.34], [21.01,23.68],
... [24.59,25.69]]),np.mat([[1.01, 1.66, 3.5]]), 0.6)
matrix([[0.00873174],
[0.08272556]])
"""
weight = weighted_matrix(point, training_data_x, bandwidth)
W = (training_data_x.T * (weight * training_data_x)).I * (
training_data_x.T * weight * training_data_y.T
)
return W
def local_weight_regression(
training_data_x: np.mat, training_data_y: np.mat, bandwidth: float
) -> np.mat:
"""
Calculate predictions for each data point on axis.
>>> local_weight_regression(np.mat([[16.99, 10.34], [21.01,23.68],
... [24.59,25.69]]),np.mat([[1.01, 1.66, 3.5]]), 0.6)
array([1.07173261, 1.65970737, 3.50160179])
"""
m, n = np.shape(training_data_x)
ypred = np.zeros(m)
for i, item in enumerate(training_data_x):
ypred[i] = item * local_weight(
item, training_data_x, training_data_y, bandwidth
)
return ypred
def load_data(dataset_name: str, cola_name: str, colb_name: str) -> np.mat:
"""
Function used for loading data from the seaborn splitting into x and y points
>>> pass # this function has no doctest
"""
import seaborn as sns
data = sns.load_dataset(dataset_name)
col_a = np.array(data[cola_name]) # total_bill
col_b = np.array(data[colb_name]) # tip
mcol_a = np.mat(col_a)
mcol_b = np.mat(col_b)
m = np.shape(mcol_b)[1]
one = np.ones((1, m), dtype=int)
# horizontal stacking
training_data_x = np.hstack((one.T, mcol_a.T))
return training_data_x, mcol_b, col_a, col_b
def get_preds(training_data_x: np.mat, mcol_b: np.mat, tau: float) -> np.ndarray:
"""
Get predictions with minimum error for each training data
>>> get_preds(np.mat([[16.99, 10.34], [21.01,23.68],
... [24.59,25.69]]),np.mat([[1.01, 1.66, 3.5]]), 0.6)
array([1.07173261, 1.65970737, 3.50160179])
"""
ypred = local_weight_regression(training_data_x, mcol_b, tau)
return ypred
def plot_preds(
training_data_x: np.mat,
predictions: np.ndarray,
col_x: np.ndarray,
col_y: np.ndarray,
cola_name: str,
colb_name: str,
) -> plt.plot:
"""
This function used to plot predictions and display the graph
>>> pass #this function has no doctest
"""
xsort = training_data_x.copy()
xsort.sort(axis=0)
plt.scatter(col_x, col_y, color="blue")
plt.plot(
xsort[:, 1],
predictions[training_data_x[:, 1].argsort(0)],
color="yellow",
linewidth=5,
)
plt.title("Local Weighted Regression")
plt.xlabel(cola_name)
plt.ylabel(colb_name)
plt.show()
if __name__ == "__main__":
training_data_x, mcol_b, col_a, col_b = load_data("tips", "total_bill", "tip")
predictions = get_preds(training_data_x, mcol_b, 0.5)
plot_preds(training_data_x, predictions, col_a, col_b, "total_bill", "tip")