-
Notifications
You must be signed in to change notification settings - Fork 1
/
fig2recreation.r
188 lines (187 loc) · 7.64 KB
/
fig2recreation.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
#' ---
#' title: Recreating Figure 2 of Miorini et al.
#' output: html_document
#' ---
#'
#' We are going to be recreating Figure 2 from the 2017 Morini, Raetano, and
#' Everhart paper "Control of white mold of dry bean and residual activity of
#' fungicides applied by chemigation." (DOI:
#' [10.1016.j/cropro.2016.12.023](https://dx.doi.org/10.1016/j.cropro.2016.12.023)).
#'
#' The data are stored in a spreadsheet called "Results_combined-class2019.csv".
#'
#' Preparing Data and packages
#' ===========================
#'
#'
#' We will recreate the figure using dplyr, tidyr, and ggplot2
#+ package_loading, warning = FALSE, message = FALSE
library("dplyr") ## if you do not have these, download them using the "Packages" tab in bottom right
library("tidyr")
library("ggplot2")
#'
#' Now we can read in the data and validate that it is what we expect.
res <- read.csv("ExampleData.csv", head = TRUE, stringsAsFactors = FALSE)
head(res)
dim(res)
#' We can see that we have the columns, DAI, Number, Your.name, Area, and
#' Treatment.
#'
#' Creating simulated data set if we don't have a file to import
#' ---------------------------------------------------------
#'
#' For the moment, I'm going to simulate some data to recreate the
#' figure, so this block of code can be deleted when the data is all filled.
#'
#' For each treatment, we are going to randomly sample from a normal
#' distribution with a mean from 1 to 10. Then, if the treatment is "control",
#' we'll multiply that area by 3 to make sure the effect is large, otherwise
#' each value will grow by 0.5 each day after infection.
#'
#
set.seed(999)
res <- res %>%
group_by(Treatment) %>% # group the rows by Treatment
mutate(Area = rnorm(n(), sample(1:10, 1), 1)) %>% # generate random data
mutate(Area = if ( all(Treatment == "control") ) Area * 3 else Area + DAI*0.5) %>% # manipulate data
ungroup() # remove the grouping
res
#' #'
#' #' Adding in Block information
#' #' ---------------------------
#' #'
#' #'
#' #' Because we will need to transform the data, we will need to include the block
block <- rep(1:12, each = 4)
block
res$Block <- block
res
#'
#' Reordering factors
#' ------------------
#'
#' The factors in the treatments will be out of order when plotting because it's
#' always alphabetical by default (and 1 comes before 2). Here we are reordering
#' the factors:
#'
unique(res$Treatment) # The correct order
res <- mutate(res, Treatment = factor(Treatment, levels = unique(Treatment)))
levels(res$Treatment)
#'
#' Calculationg Percent Disease Control
#' ====================================
#'
#' Percent disease control is "estimated as the difference in lesion area of the
#' control and treatment, divided by lesion area of the control and expressed as
#' percent." Because we will have to make this calculation many times, it's
#' best to write a function for it.
#'
#' Step 1: create a function
#' --------------------------
#'
#' Our function will take in two numbers, the lesion area of the control and
#' the lesion area of the treatment.
percent_control <- function(control, treatment){
res <- (control - treatment)/control # estimate the disease control
return(res*100) # express as percent
}
#'
#' We can show that this works:
percent_control(control = 10, treatment = 5)
#'
#' Step 2: spread the data in different columns by treatment
#' ---------------------------------------------------------
#'
#' Our data were recorded in a "tidy" fashion in which we had one observation
#' per row. In order to calculate the percent control, we'll need to rehshape
#' our data so that we have one column per treatment such that each row will
#' represent a single block per day after application. We'll use the *tidyr*
#' function `spread()` to do this.
blocks <- res %>%
select(DAI, Block, Treatment, Area) %>% # We don't need name or number here
spread(Treatment, Area) # make new columns from treatment, and fill with Area
blocks
#'
#' Step 3: calculate percent control
#' ---------------------------------
#'
#' Now that we have reshaped our data, we can manipulate each treatment column
#' to give us the percent control.
# Note: the backtics "`" allow R to recognize a variable with spaces.
percents <- blocks %>%
mutate(`2.53 mm water` = percent_control(control, `2.53 mm water`)) %>%
mutate(`5.07 mm water` = percent_control(control, `5.07 mm water`)) %>%
mutate(`10.13 mm water` = percent_control(control, `10.13 mm water`)) %>%
mutate(control = percent_control(control, 0))
percents
#' Because figure 2 plotted the average value, we want to summarize our data in
#' averages. To do this, we need to convert our data back to tidy format by
#' using the *tidyr* function `gather()`:
percents <- percents %>%
gather(key = Treatment, value = Area, -DAI, -Block) %>%
mutate(Treatment = factor(Treatment, levels = unique(Treatment))) # reset factor
percents
#'
#' Step 4: summarize the average lesion area
#' -----------------------------------------
#'
#' We can summarize the average area per DAI and Treatment, which will allow us
#' to plot the data in the manner of Morini *et al.* 2017.
avgs <- percents %>%
group_by(DAI, Treatment) %>%
summarize(meanArea = mean(Area)) %>%
ungroup()
avgs
#'
#' Now that we have our averages, we can plot it using *ggplot2*
#+ fig.width = 6, fig.height = 4
ggplot(avgs, aes(x = DAI, y = meanArea, group = Treatment)) +
geom_point(aes(pch = Treatment), size = 3) + # plot the points
stat_smooth(aes(lty = Treatment), method = "lm", se = FALSE, color = "black") + # plot the regression
theme_classic() + # change the appearance
ylim(0, 100) + # set the limits on the y axis
theme(text = element_text(size = 14)) + # increase the text size
labs(list( # Set the labels
x = "Days after fluazinam application",
y = "Percent disease control",
pch = "Irrigation levels",
lty = "Irrigation levels"))
#'
#' Additional visualiztations
#' ==========================
#'
#' When we plot the averages, we inadvertently hide the data. Of course, if we
#' tried to plot all the data in one graph, it would look a bit messy:
#+ fig.width = 6, fig.height = 4
ggplot(percents, aes(x = DAI, y = Area, group = Treatment)) +
geom_point(aes(pch = Treatment), size = 3) + # plot the points
stat_smooth(aes(lty = Treatment), method = "lm", se = FALSE, color = "black") + # plot the regression
theme_classic() + # change the appearance
ylim(0, 100) + # set the limits on the y axis
theme(text = element_text(size = 14)) + # increase the text size
labs(list( # Set the labels
x = "Days after fluazinam application",
y = "Percent disease control",
pch = "Irrigation levels",
lty = "Irrigation levels"))
#'
#' With ggplot2, we can spread the data out into "facets":
#+ fig.width = 8, fig.height = 3
ggplot(percents, aes(x = DAI, y = Area, group = Treatment)) +
geom_point(aes(pch = Treatment), size = 2) + # plot the points
stat_smooth(aes(lty = Treatment), method = "lm", se = FALSE, color = "black") + # plot the regression
theme_classic() + # change the appearance
ylim(0, 100) + # set the limits on the y axis
theme(text = element_text(size = 14)) + # increase the text size
facet_wrap(~Treatment, nrow = 1) +
theme(aspect.ratio = 1) +
labs(list( # Set the labels
x = "Days after fluazinam application",
y = "Percent disease control",
pch = "Irrigation levels",
lty = "Irrigation levels"))
#' Once you have completed the analysis of your leaf area measurements taken by
#' the class. Please upload an image of your screen showing RStudio, with the
#' plot visualized in the lower right-hand corner. Contact me if you need
#' assistance running the data analysis. Here is URL to HW6 to upload results:
#' https://canvas.unl.edu/courses/74116/assignments/641050