-
Notifications
You must be signed in to change notification settings - Fork 0
/
states_rangeANDmean.R
96 lines (60 loc) · 3.19 KB
/
states_rangeANDmean.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
library(pollstR)
library(foreign)
library(RCurl)
library(dplyr)
library(ggplot2)
library(reshape2)
library(ggthemes)
# https://github.com/fivethirtyeight/data/tree/master/pollster-ratings
silver.polls <- read.delim('https://raw.githubusercontent.com/fivethirtyeight/data/master/pollster-ratings/raw-polls.tsv')
silver.ratings <- read.delim('https://raw.githubusercontent.com/fivethirtyeight/data/master/pollster-ratings/pollster-ratings.tsv')
silver.data <- merge(silver.polls, silver.ratings, by.x = 'pollster', by.y = 'Pollster', all = T)
silver.data$poll.date <- as.Date(silver.data$polldate, '%m/%d/%Y')
silver.data$poll.month <- format(silver.data$poll.date, "%m/%d")
silver.data$election.date <- as.Date(silver.data$electiondate, '%m/%d/%Y')
silver.data$election.month <- format(silver.data$election.date, "%m/%d")
states.data <- filter(silver.data, type_simple == 'Pres-G' & location != 'US')
states.data$cand1_diff <- states.data$cand1_actual-states.data$cand1_pct
states.data$cand2_diff <- states.data$cand2_actual-states.data$cand2_pct
head(states.data)
table(states.data$location)
View(states.data %>%
filter(location == 'IA') %>%
group_by(pollster, year, location) %>%
summarize(count = n()) %>%
arrange(pollster, year))
states.data <- states.data %>%
select(pollster, year, X538.Grade, location, cand1_diff, cand2_diff, cand1_pct, cand1_actual, cand2_pct, cand2_actual, poll.date)
head(states.data)
states.data <- states.data %>%
select(pollster, year, X538.Grade, location, cand1_diff, cand2_diff, poll.date)
states.m <- melt(states.data, id.vars = c('X538.Grade','pollster', 'year', 'location', 'cand1_actual', 'cand2_actual', 'poll.date'))
states.m <- melt(states.data, id.vars = c('X538.Grade','pollster', 'year', 'location', 'poll.date'))
head(states.m)
qplot(data=states.m,year,value,color=variable)
ggplot(states.m) +
geom_point(aes(x = value, y = factor(year),size=2,shape="line",color = location)) +
facet_wrap( ~variable)
View(states.data %>%
filter(location == 'IA') %>%
group_by(pollster, year, location) %>%
summarize(count = n()) %>%
arrange(pollster, year))
states.avg1 <- states.m %>% filter(variable == 'cand1_diff') %>% group_by(location,year) %>% summarize(value = mean(value))
states.avg2 <- states.m %>% filter(variable == 'cand2_diff') %>% group_by(location,year) %>% summarize(value = mean(value))
states.range <- states.m %>% filter(variable == 'cand1_diff') %>% group_by(location,year) %>% mutate(range = max(value)-min(value)) #doesn't work yet
ggplot(states.avg2, aes(x= year, y= value, colour=location, label=location))+
geom_text(aes(label=location),hjust=0, vjust=0)
ggplot(states.range, aes(x= year, y= value, colour=location, label=location))+
geom_point() +geom_text(aes(label=location),hjust=0, vjust=0)
for (i in unique(states.m$location)) {
ggplot(states.m[states.m$location == i, ]) +
geom_point(aes(x = value, y = factor(year))) +
facet_wrap(location~variable)
ggsave()
}
ggplot(states.m[states.m$location == 'IA', ]) +
geom_point(aes(x = value, y = factor(year), color = variable)) +
facet_wrap(location~variable) +
scale_color_manual(values = c('blue', 'red')) +
theme(legend.position = 'none')