-
Notifications
You must be signed in to change notification settings - Fork 1
/
RandomForest.R
163 lines (130 loc) · 7.29 KB
/
RandomForest.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
# Load required libraries
library(randomForest)
library(caret)
library(glmnet)
# Print message indicating that libraries are loaded
cat("Libraries loaded successfully.\n")
# Read the data
scaled_parkinsons_data <- read.csv("scaled_parkinsons_data.csv")
# Print message indicating that data is loaded
cat("Data loaded successfully.\n")
# Define the dataset and target variables
X <- scaled_parkinsons_data[, -c(1, 4, 5, 6)] # Exclude columns 1, 4, 5, and 6 as predictors
Y_motor <- scaled_parkinsons_data[, 5] # Column 5 is the motor_UPDRS target variable
Y_total <- scaled_parkinsons_data[, 6] # Column 6 is the total_UPDRS target variable
# Print message indicating dataset and target variables are defined
cat("Dataset and target variables defined successfully.\n")
# Perform train-test split for motor_UPDRS
set.seed(123) # for reproducibility
train_indices_motor <- createDataPartition(Y_motor, p = 0.8, list = FALSE)
X_train_motor <- X[train_indices_motor, ]
Y_train_motor <- Y_motor[train_indices_motor]
X_test_motor <- X[-train_indices_motor, ]
Y_test_motor <- Y_motor[-train_indices_motor]
# Print message indicating train-test split is completed for motor_UPDRS
cat("Train-test split completed for motor_UPDRS.\n")
# Perform feature selection using LASSO for motor_UPDRS
cat("Performing feature selection using LASSO for motor_UPDRS...\n")
lasso_model_motor <- cv.glmnet(as.matrix(X_train_motor), Y_train_motor, alpha = 1)
lasso_coef_motor <- coef(lasso_model_motor, s = "lambda.min")[-1,]
lasso_selected_indices_motor <- order(abs(lasso_coef_motor), decreasing = TRUE)[1:12] # Select top features
X_train_lasso_motor <- X_train_motor[, lasso_selected_indices_motor]
X_test_lasso_motor <- X_test_motor[, lasso_selected_indices_motor]
# Print selected features after LASSO for motor_UPDRS
cat("\nSelected features after LASSO for motor_UPDRS:\n")
selected_features_lasso_motor <- colnames(X_train_lasso_motor)
print(selected_features_lasso_motor)
# Append motor_UPDRS to the selected features data frame for motor_UPDRS
X_train_lasso_motor$motor_UPDRS <- Y_train_motor
#X_test_lasso_motor$motor_UPDRS <- Y_test_motor
# Print message indicating feature selection with LASSO for motor_UPDRS is completed
cat("Feature selection with LASSO for motor_UPDRS completed.\n")
# Define the hyperparameter grid for Random Forest for motor_UPDRS
grid_mtry_motor <- seq(1, ncol(X_train_lasso_motor) - 1) # Exclude the target variable
#grid_ntree <- c(100, 200, 300,400,500,600)
grid_motor <- expand.grid(mtry = grid_mtry_motor)
# Print message indicating hyperparameter grid is defined for motor_UPDRS
cat("Hyperparameter grid defined successfully for motor_UPDRS.\n")
# Define the training control for hyperparameter tuning
ctrl <- trainControl(method = "cv", number = 5, verboseIter = TRUE)
# Print message indicating training control is defined
cat("Training control defined successfully.\n")
# Train Random Forest for motor_UPDRS with LASSO selected features
cat("Training Random Forest for motor_UPDRS...\n")
tryCatch({
rf_motor_lasso <- train(motor_UPDRS ~ ., data = X_train_lasso_motor, method = "rf", trControl = ctrl, tuneGrid = grid_motor)
best_mtry_motor_lasso <- rf_motor_lasso$bestTune$mtry
# Print message indicating training Random Forest for motor_UPDRS with LASSO is completed
cat("Training Random Forest for motor_UPDRS completed.\n")
}, error = function(e) {
print("Error occurred during training Random Forest for motor_UPDRS:")
print(e)
})
# Make predictions for motor_UPDRS with LASSO
cat("Making predictions for motor_UPDRS...\n")
predictions_motor_lasso <- predict(rf_motor_lasso, newdata = X_test_lasso_motor)
rmse_motor_lasso <- sqrt(mean((predictions_motor_lasso - Y_test_motor)^2))
r_squared_motor_lasso <- cor(predictions_motor_lasso, Y_test_motor)^2
mae_motor_lasso <- mean(abs(predictions_motor_lasso - Y_test_motor))
# Print results for motor_UPDRS
cat("\nMotor UPDRS :\n")
cat("RMSE:", rmse_motor_lasso, "\n")
cat("R-squared:", r_squared_motor_lasso, "\n")
cat("MAE:", mae_motor_lasso, "\n\n")
# Save the trained model for motor_UPDRS
saveRDS(rf_motor_lasso, "lasso_rf_model_motor.rds")
cat("\nModel for motor_UPDRS saved.\n\n")
# Perform train-test split for total_UPDRS
train_indices_total <- createDataPartition(Y_total, p = 0.8, list = FALSE)
X_train_total <- X[train_indices_total, ]
Y_train_total <- Y_total[train_indices_total]
X_test_total <- X[-train_indices_total, ]
Y_test_total <- Y_total[-train_indices_total]
# Print message indicating train-test split is completed for total_UPDRS
cat("Train-test split completed for total_UPDRS.\n")
# Perform feature selection using LASSO for total_UPDRS
cat("Performing feature selection using LASSO for total_UPDRS...\n")
lasso_model_total <- cv.glmnet(as.matrix(X_train_total), Y_train_total, alpha = 1)
lasso_coef_total <- coef(lasso_model_total, s = "lambda.min")[-1,]
lasso_selected_indices_total <- order(abs(lasso_coef_total), decreasing = TRUE)[1:12] # Select top features
X_train_lasso_total <- X_train_total[, lasso_selected_indices_total]
X_test_lasso_total <- X_test_total[, lasso_selected_indices_total]
# Print selected features after LASSO for total_UPDRS
cat("\nSelected features after LASSO for total_UPDRS:\n")
selected_features_lasso_total <- colnames(X_train_lasso_total)
print(selected_features_lasso_total)
# Append total_UPDRS to the selected features data frame for total_UPDRS
X_train_lasso_total$total_UPDRS <- Y_train_total
#X_test_lasso_total$total_UPDRS <- Y_test_total
# Print message indicating feature selection with LASSO for total_UPDRS is completed
cat("Feature selection with LASSO for total_UPDRS completed.\n")
# Define the hyperparameter grid for Random Forest for total_UPDRS
grid_mtry_total <- seq(1, ncol(X_train_lasso_total) - 1) # Exclude the target variable
grid_total <- expand.grid(mtry = grid_mtry_total)
# Print message indicating hyperparameter grid is defined for total_UPDRS
cat("Hyperparameter grid defined successfully for total_UPDRS.\n")
# Train Random Forest for total_UPDRS with LASSO selected features
cat("Training Random Forest for total_UPDRS...\n")
tryCatch({
rf_total_lasso <- train(total_UPDRS ~ ., data = X_train_lasso_total, method = "rf", trControl = ctrl, tuneGrid = grid_total)
best_mtry_total_lasso <- rf_total_lasso$bestTune$mtry
# Print message indicating training Random Forest for total_UPDRS with LASSO is completed
cat("Training Random Forest for total_UPDRS completed.\n")
}, error = function(e) {
print("Error occurred during training Random Forest for total_UPDRS:")
print(e)
})
# Make predictions for total_UPDRS with LASSO
cat("Making predictions for total_UPDRS...\n")
predictions_total_lasso <- predict(rf_total_lasso, newdata = X_test_lasso_total)
rmse_total_lasso <- sqrt(mean((predictions_total_lasso - Y_test_total)^2))
r_squared_total_lasso <- cor(predictions_total_lasso, Y_test_total)^2
mae_total_lasso <- mean(abs(predictions_total_lasso - Y_test_total))
# Print results for total_UPDRS
cat("\nTotal UPDRS :\n")
cat("RMSE:", rmse_total_lasso, "\n")
cat("R-squared:", r_squared_total_lasso, "\n")
cat("MAE:", mae_total_lasso, "\n\n")
# Save the trained model for total_UPDRS
saveRDS(rf_total_lasso, "lasso_rf_model_total.rds")
cat("\nModel for total_UPDRS saved.\n\n")