forked from anthonyjatoba/deep_radiomics
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathradiomics_nsga_svm_features.py
110 lines (85 loc) · 3.88 KB
/
radiomics_nsga_svm_features.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import pandas as pd
import numpy as np
import matplotlib.pylab as plt
from sklearn.metrics import make_scorer
from sklearn.model_selection import cross_validate
from platypus import NSGAII, Problem, Binary, nondominated
from radiomics_all_svm import specificity_loss_func, read_data, get_model
class SVM(Problem):
def __init__(self):
super(SVM, self).__init__(1, 2)
self.X, self.Y = read_data('radiomics.csv')
self.types[:] = Binary(self.X.shape[1])
self.model = get_model()
self.directions[:] = Problem.MAXIMIZE
def evaluate(self, solution):
columns = solution.variables[:]
# Selecting the columns
X = self.X[:, columns[0]]
scores = {'AUC': 'roc_auc', 'ACC': 'accuracy', 'F1': 'f1', 'Sensitivity': 'recall',
'Precision': 'precision', 'Specificity': make_scorer(specificity_loss_func, greater_is_better=True)}
results = cross_validate(
self.model, X, self.Y, scoring=scores, cv=3, return_estimator=True, n_jobs=3)
solution.objectives[:] = [
np.mean(results['test_Sensitivity']), np.mean(results['test_Specificity'])]
print(solution.objectives)
def calculate_relevancy(results, objective_idx, threshold = 0, max_variables = 10):
# results: Results of algorithm execution
# objective_idx: 1 - Sensitivity, 2 - Specificity
# threshold: Minimum condition to consider a model
# max_variables: Amount of features in result plot
limit = -1
# Sorting results by parameter specified in objective_idx and selecting max index of solutions which archieves threshold
results.sort(key=lambda s: s.objectives[objective_idx], reverse=True)
for r in results:
if r.objectives[objective_idx] >= threshold:
limit += 1
else:
break
num_of_variables = len(results[0].variables[0])
variables_frequency = np.zeros(num_of_variables) # Array of counters for each feature in selected models
# Counting occurrence of each feature in each model
for i in range(limit + 1):
r = results[i]
for j in range(num_of_variables):
if r.variables[0][j]:
variables_frequency[j] += 1
variables_frequency = variables_frequency * 100 / (limit + 1) # Transforming results in percentage
data = pd.read_csv('radiomics.csv', usecols=lambda column: column not in ["class"])
variable_names = data.columns.tolist()
# Creating dict for ordering features names according with its percentages
hash_variables_f = {}
for i in range(num_of_variables):
hash_variables_f[variable_names[i]] = variables_frequency[i]
hash_variables_f = sorted(hash_variables_f.items(), key=lambda item: item[1], reverse=True)
hash_variables_f = hash_variables_f[0:max_variables]
var_list = [e[0] for e in hash_variables_f]
y_pos = np.arange(len(var_list))
relevancy = [e[1] for e in hash_variables_f]
# Ploting results in a horizontal bar chart
_, ax = plt.subplots(figsize=(11, 11))
ax.barh(y_pos, relevancy, align='center')
ax.set_yticks(y_pos)
ax.set_yticklabels(var_list)
ax.invert_yaxis()
ax.set_xlabel('Relevancy')
if objective_idx == 0:
ax.set_title('Best Variables - Sensitivity')
else:
ax.set_title('Best Variables - Specificity')
plt.show()
if __name__ == "__main__":
algorithm = NSGAII(SVM(), population_size=30)
algorithm.run(100)
nondominated_results = nondominated(algorithm.result)
# prints results
fig1 = plt.figure(figsize=[11, 11])
plt.scatter([s.objectives[0] for s in nondominated_results],
[s.objectives[1] for s in nondominated_results])
plt.xlim([0, 1.1])
plt.ylim([0, 1.1])
plt.xlabel("Sensitivity")
plt.ylabel("Specificity")
plt.show()
calculate_relevancy(nondominated_results, 0, 0.81, 15)
calculate_relevancy(nondominated_results, 1, 0.83, 15)