-
Notifications
You must be signed in to change notification settings - Fork 0
/
handson_rf.py
65 lines (47 loc) · 2.38 KB
/
handson_rf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import pickle
import os.path
import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.model_selection import ShuffleSplit
from sklearn.svm import LinearSVC, SVC
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import ExtraTreeClassifier
from sklearn.metrics import accuracy_score
from utils import open_mnist_or_download_if_missing
if __name__ == "__main__":
random_state = 65
mnist = open_mnist_or_download_if_missing()
train_idx, test_idx = next(
ShuffleSplit(n_splits=1, train_size=60000, random_state=random_state).split(mnist.data, mnist.target))
train_idx, cv_idx = next(
ShuffleSplit(n_splits=1, train_size=50000, random_state=random_state).split(mnist.data.iloc[train_idx],
mnist.target.iloc[train_idx]))
X_train = mnist.data.iloc[train_idx]
y_train = mnist.target.iloc[train_idx]
X_cv = mnist.data.iloc[cv_idx]
y_cv = mnist.target.iloc[cv_idx]
X_test = mnist.data.iloc[test_idx]
y_test = mnist.target.iloc[test_idx]
# svc_clf = SVC(max_iter=100, probability=True, random_state=random_state)
linear_svc_clf = LinearSVC(max_iter=100, random_state=random_state)
rf_clf = RandomForestClassifier(random_state=random_state)
lr_clf = LogisticRegression(random_state=random_state)
extra_tree_clf = ExtraTreeClassifier(random_state=random_state)
classifiers = [linear_svc_clf, rf_clf, lr_clf, extra_tree_clf]
for clf in classifiers:
print(f"Training {clf}")
clf.fit(X_train, y_train)
print(f"Classifiers:{classifiers}")
print(f"Scores: {[clf.score(X_cv, y_cv) for clf in classifiers]}")
named_estimators = [("random_forest", rf_clf), ("logistic_regression", lr_clf),
("extra_tree", extra_tree_clf)]
voting_clf = VotingClassifier(named_estimators)
voting_clf.fit(X_train, y_train)
print(f"hard voting classifier score: {voting_clf.score(X_cv, y_cv)}")
print(voting_clf.estimators_)
print(dir(voting_clf))
print(f"Scores: {[clf.score(X_cv, y_cv) for clf in voting_clf.estimators_]}")
voting_clf.voting = "soft"
print(f"soft voting classifier score: {voting_clf.score(X_cv, y_cv)}")
print(f"Scores: {[clf.score(X_cv, y_cv) for clf in voting_clf.estimators_]}")