-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathQ3
106 lines (95 loc) · 4.03 KB
/
Q3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
---
title: "Q3"
author: "Camryn Blawas"
date: "7/22/2020"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
library(modelr)
library(leaps)
library(bestglm)
library(ggplot2)
library(broom)
library(purrr)
PlacementData = read_csv('PlacementData.csv')
```
```{r, echo=FALSE}
#FromNoah
RegressionData = PlacementData
RegressionData$JobStatus <-ifelse(PlacementData$JobStatus=="Placed", 1, 0)
RegressionData$SE_BoE <-ifelse(PlacementData$SE_BoE=="Central", 1, 0)
RegressionData$HSE_BoE <-ifelse(PlacementData$HSE_BoE=="Central", 1, 0)
RegressionData$WorkExp <-ifelse(PlacementData$WorkExp_1=="TRUE", 1, 0)
RegressionData$Gender <-ifelse(PlacementData$Gender=="M", 1, 0)
RegressionData$UG_Specialization <-ifelse(PlacementData$UG_Specialization=="Comm&Mgmt",1,0)
RegressionData$HSE_SpecializationD1 <-ifelse(PlacementData$HSE_Specialization=="Commerce",1,0)
RegressionData$HSE_SpecializationD2 <-ifelse(PlacementData$HSE_Specialization=="Arts",1,0)
RegressionData$MBA_Specialization <-ifelse(PlacementData$MBA_Specialization=="Mkt&Fin",1,0)
RegressionDataFinal <-select(RegressionData,"Gender","SE_Grade","SE_BoE","HSE_Grade", "HSE_BoE","HSE_SpecializationD1","HSE_SpecializationD2","UG_Grade", "UG_Specialization","WorkExp","EmploymentTest","MBA_Specialization","MBA_Grade","Salary")
RegressionDataFinal <- RegressionDataFinal[complete.cases(RegressionDataFinal),]
```
```{r, echo=FALSE}
#did not include JobStatus
models <- regsubsets(Salary~.,data=RegressionDataFinal, nvmax=13)
summary(models)
res.sum <- summary(models)
data.frame(
Adj.R2 = which.max(res.sum$adjr2),
CP = which.min(res.sum$cp),
BIC = which.min(res.sum$bic),
RSQ = which.max(res.sum$rsq),
RSS = which.min(res.sum$rss)
)
```
```{r, echo=FALSE}
#13 Variables
Model1 <-function(data) {glm(Salary~.,data=RegressionDataFinal)}
#12 Variables
Model2<-function(data) {glm(Salary~(Gender+SE_Grade+SE_BoE+HSE_BoE+HSE_SpecializationD1+HSE_SpecializationD2+UG_Grade+UG_Specialization+EmploymentTest+MBA_Specialization+MBA_Grade),data=RegressionDataFinal)}
#6 Variables
Model3 <- function(data){glm(Salary~Gender+HSE_SpecializationD2+UG_Grade+UG_Specialization+MBA_Specialization+MBA_Grade, data=RegressionDataFinal)}
#3 Variables
Model4 <- function(data) {glm(Salary~Gender+UG_Specialization+MBA_Grade, data=RegressionDataFinal)}
#2 Variables
Model5 <- function(data) {glm(Salary~Gender+MBA_Grade, data=RegressionDataFinal)}
```
```{r}
#RegressionNames
#Models <- c("Model 1", "Model 2", "Model 3", "Model 4", "Model 5")
#RSquaredValues
#LOGMeanSquaredErrors
#LogMSES <- c(log(mse(Model1, RegressionDataFinal)),log(mse(Model2, RegressionDataFinal)), log(mse(Model3, RegressionDataFinal)), log(mse(Model4, RegressionDataFinal)), log(mse(Model5, RegressionDataFinal)))
#MeanAbsoluteErrors
#MAES <- c((mae(Model1, RegressionDataFinal)),(mae(Model2, RegressionDataFinal)), (mae(Model3, RegressionDataFinal)), (mae(Model4, RegressionDataFinal)), (mae(Model5, RegressionDataFinal)))
#
#create tibble
#Errors <- data.frame(Models, LogMSES, MAES)
#Errors
```
```{r}
#I'm trying to cross validate here
set.seed(100)
cv = RegressionDataFinal %>% modelr::crossv_kfold(13)
pred = cv %>% dplyr::mutate(pmodel1 = map(train, Model1),
pmodel2 = map(train, Model2),
pmodel3 = map(train, Model3),
pmodel4 = map(train, Model4),
pmodel5 = map(train, Model5))
pred.value = pred %>%
dplyr::mutate(predict1=map2(test, pmodel1, ~augment(.y, newdata=.x, type.predict="response")),
predict2=map2(test, pmodel2, ~augment(.y, newdata=.x, type.predict="response")),
predict3=map2(test, pmodel3, ~augment(.y, newdata=.x, type.predict="response")),
predict4=map2(test, pmodel4, ~augment(.y, newdata=.x, type.predict="response")),
predict5=map2(test, pmodel5, ~augment(.y, newdata=.x, type.predict="response")))
```
```{r}
```
```{r}
?predict.glm
Predict <- predict.lm(Model1)
as.data.frame(Predict)
ggplot(Predict, aes(x))+
geom_dotplot()
```