-
Notifications
You must be signed in to change notification settings - Fork 0
/
linearRegression.py
66 lines (49 loc) · 1.67 KB
/
linearRegression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# Packages Used
import numpy as np
import pandas as pd
import pickle as pk
from sklearn import linear_model, model_selection
from matplotlib import style, pyplot
# Importing data to file
data = pd.read_csv('student-mat.csv', sep=";")
# Getting attribute for training
data = data[['G1', 'G2', 'G3', 'age', 'studytime','absences']]
# Data Point we want to predict
predict = 'G3'
# Placing attributes into an array
X = np.array(data.drop([predict], 1))
Y = np.array(data[predict])
iteration = int(input("Enter how may times you want to test the data:"))
currentScore = 0
# Splitting attributes for Testing [storing them in a tuple]
xTrain, xTest, yTrain, yTest = model_selection.train_test_split(X, Y, test_size=0.1)
for test in range(iteration):
xTrain, xTest, yTrain, yTest
# Getting the accuracy of the prediction
## Testing Data
linear = linear_model.LinearRegression()
linear.fit(xTrain, yTrain)
accuracy = linear.score(xTest, yTest)
print("curent accuracy: " + str(accuracy))
# Results File name
resultFileName = "resultLinearRegression.pickle"
if accuracy > currentScore:
currentScore = accuracy
# Save Model
with open(resultFileName, 'wb') as file:
pk.dump(linear, file)
# Read Model
openFile = open(resultFileName, 'rb')
linear = pk.load(openFile)
# Predicting Untested Data
prediction = linear.predict(xTest)
for grades in range(len(prediction)):
print(prediction[grades], xTest[grades], yTest[grades])
# Select data point you want to check
dataPoint = 'age'
# Graphing Function
style.use('ggplot')
pyplot.scatter(data['G3'], data[dataPoint])
pyplot.xlabel('final grade'.upper())
pyplot.ylabel(dataPoint.upper())
pyplot.show()