-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathPartialROC.R
240 lines (207 loc) · 11 KB
/
PartialROC.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
#' PartialROC - A function to evaluate output of presence only nich models on continuous model outputs.
#'
#' Noninteractive version. For interactive version refer \link{iPartialROC}
#'
#' Function PartialROC generates the area under the curve values using bootstrap method. PartialROC is a model evaluation tool, used for
#' continuous model outputs as compared to binary model outputs. This method is specially used for model trained using presence only data.
#' For more details refer DOI: 10.1016/j.ecolmodel.2007.11.008
#' Input parameters required for this function are
#' 1. PresenceFile - (test data set) in .csv format, must have 3 columns SpName, Longitude, Latitude.
#' 2. PredictionFile - (Thresholded continuous model output). This file should be in .asc format
#' 3. OmissionVal - Amount of error admissible along the Y-axis, given the requirements and conditions of the study. Value should range
#' between 0 - 1
#' 4. RandomPercent - Occurrence points to be sampled randomly from the test data for bootstrapping.
#' 5. NoOfIteration - Number of iteration for bootstrapping
#' 6. OutputFile - Name of output file. If you specify the path, it will store it there otherwise it will save it in current working
#' directory.
#' OutputFile will have 4 columns, IterationNo, AUC_at_specified_value, AUC_AT_Random, AUC_Ratio. The first row will always have 0 th interation
#' which is the actual Area Under the Curve without bootstrapping. And the rest of the rows contains auc ratio for all the bootstrap.
#' @import raster
#' @import sqldf
#' @param PresenceFile - occurence point file (test data)
#' @param PredictionFile - model prediction in .asc format
#' @param OmissionVal - Amount of error admissible along the Y-axis, given the requirements and conditions of the study. Value should range between 0 - 1
#' @param RandomPercent - Occurrence points to be sampled randomly from the test data for bootstrapping.
#' @param NoOfIteration - Number of iteration for bootstrapping
#' @param OutputFile - Name of output file. If you specify the path, it will store it there otherwise it will save it in current working directory.
#' @examples \dontrun{
#' PartialROC()
#' }
#' @export
PartialROC <- function(PresenceFile=NA, PredictionFile=NA, OmissionVal=NA,
RandomPercent=NA, NoOfIteration=NA, OutputFile=NA)
{
## Generate a matrix with point id in presence and suitablility value for that point
## Matrix format, PointID, Long, Lat, SuitVal
if(is.na(PresenceFile)){
stop("Please specify PresenceFile (occurrence table) or use iPartialROC for interactive version")
}
if(is.na(PredictionFile)){
stop("Please specify PredictionFile (Model Prediction) or use iPartialROC for interactive version")
}
if(is.na(OmissionVal)){
stop("Please specify OmissionVal (value between 0 to 1 [1 - omission)]) or use iPartialROC for interactive version")
}
if(is.na(RandomPercent)){
stop("Please specify RandomPercent (Percentage of random points to draw [1-100]) or use iPartialROC for interactive version")
}
if(is.na(NoOfIteration)){
stop("Please specify NoOfIteration (Number of iteration for bootstrapping) or use iPartialROC for interactive version")
}
if(is.na(OutputFile)){
stop("Please specify OutputFile (output file name) or use iPartialROC for interactive version")
OutputFile = readline("Enter output file name : ")
}
OutMat = matrix(0,nrow=NoOfIteration+1, ncol = 4)
InRast = raster(PredictionFile)
## Currently fixing the number of classes to 100. But later flexibility should be given in the parameter.
InRast = round(InRast * 10)
plot(InRast)
## This function should be called only once outside the loop. This function generates values for x-axis.
## As x-axis is not going to change.
ClassPixels = AreaPredictedPresence(InRast)
Occur = read.table(PresenceFile, header=T, sep =",")
Occur = Occur[,-1]
ExtRast = extract(InRast, Occur)
## Remove all the occurrences in the class NA. As these points are not used in the calibration.
OccurTbl = cbind(Occur, ExtRast)
OccurTbl = OccurTbl[which(is.na(OccurTbl[,3]) == FALSE),]
PointID = seq(1:nrow(OccurTbl))
OccurTbl = cbind(PointID, OccurTbl)
names(OccurTbl)= c("PointID", "Longitude", "Latitude", "ClassID")
# # ## Generate the % points within each class in this table. Write SQL, using sqldf package
# # OccurINClass = sqldf("Select count(*), ClassID from OccurTbl group by ClassID order by ClassID desc")
# # OccurINClass = cbind(OccurINClass, cumsum(OccurINClass[,1]), cumsum(OccurINClass[,1]) / nrow(OccurTbl))
# # names(OccurINClass) = c("OccuCount", "ClassID", "OccuSumBelow", "Percent")
## Now the occurrence table contains the class-id and this table should be used to select points randomly and generate
## AUC for bootstrap values.
for(IterationNo in 0:NoOfIteration)
{
print(paste("Current iteration number is ", IterationNo, sep = ""))
if (IterationNo > 0)
{
ll = sample(nrow(OccurTbl), round(RandomPercent/100 * nrow(OccurTbl)), replace=TRUE)
OccurTbl1 = OccurTbl[ll,]
}
else
{
OccurTbl1 = OccurTbl
}
## Generate the % points within each class in this table. Write SQL, using sqldf package
OccurINClass = sqldf("Select count(*), ClassID from OccurTbl1 group by ClassID order by ClassID desc")
OccurINClass = cbind(OccurINClass, cumsum(OccurINClass[,1]), cumsum(OccurINClass[,1]) / nrow(OccurTbl1))
names(OccurINClass) = c("OccuCount", "ClassID", "OccuSumBelow", "Percent")
#### Raster file will contain all the classes in ClassID column, while occurrences table may not have all the classes.
#### Somehow we have to make the ClassID same as raster file ClassID. This could be done with SQL command update.
#### but update is not working, not sure about the reason. So I am running the loop which is very very slow.
XYTable = GenerateXYTable(ClassPixels,OccurINClass)
plot(XYTable[,2], XYTable[,3])
AreaRow = CalculateAUC(XYTable, OmissionVal, IterationNo)
OutMat[IterationNo+1,] = AreaRow
}
OutMat=data.frame(OutMat)
names(OutMat) = c("IterationNo", paste("AUC_at_Value_", OmissionVal, sep = ""), "AUC_at_0.5", "AUC_ratio")
write.table(OutMat, OutputFile, row.names=F, col.names=T, sep =",")
return(OutMat)
}
AreaPredictedPresence <- function(InRast)
{
### Now calculate proportionate area predicted under each suitability
ClassPixels = freq(InRast)
### Remove the NA pixels from the table.
if (is.na(ClassPixels[dim(ClassPixels)[1],1])== TRUE)
{
ClassPixels = ClassPixels[-dim(ClassPixels)[1],]
}
ClassPixels = ClassPixels[order(nrow(ClassPixels):1),]
TotPixPerClass = cumsum(ClassPixels[,2])
PercentPixels = TotPixPerClass / sum(ClassPixels[,2])
ClassPixels = cbind(ClassPixels, TotPixPerClass, PercentPixels)
ClassPixels = ClassPixels[order(nrow(ClassPixels):1),]
return(ClassPixels)
}
## This function generates the XY coordinate table. Using this table areas is calculated.
GenerateXYTable<-function(ClassPixels, OccurINClass)
{
XYTable = ClassPixels[,c(1,4)]
XYTable = cbind(XYTable,rep(-1,nrow(XYTable)))
# names(XYTable) = c("ClassID", "XCoor", "YCoor")
## Set the previous value for 1-omission, i.e Y-axis as the value of last
## class id in Occurrence table. LAst class id will always smallest
## area predicted presence.
PrevYVal = OccurINClass[1,4]
for (i in nrow(ClassPixels):1)
{
CurClassID = XYTable[i,1]
YVal = OccurINClass[which(OccurINClass[,2]==CurClassID),4]
## print(paste("Length of YVal :",length(YVal), "Current Loop count :", i, "Current value of YVal : ", YVal, sep = " " ))
if (length(YVal) == 0 )
{
XYTable[i,3] = PrevYVal
}
else
{
XYTable[i,3] = YVal
PrevYVal = YVal
}
}
## Add A dummy class id in the XYTable with coordinate as 0,0
XYTable = rbind(XYTable, c(XYTable[nrow(XYTable),1] + 1, 0, 0))
XYTable = as.data.frame(XYTable)
names(XYTable) = c("ClassID", "XCoor", "YCoor")
### Now calculate the area using trapezoid method.
return(XYTable)
}
CalculateAUC <- function(XYTable, OmissionVal, IterationNo)
{
## if OmissionVal is 0, then calculate the complete area under the curve. Otherwise calculate only partial area
if (OmissionVal > 0)
{
PartialXYTable = XYTable[which(XYTable[,3] >= OmissionVal),]
### Here calculate the X, Y coordinate for the parallel line to x-axis depending upon the OmissionVal
### Get the classid which is bigger than the last row of the XYTable and get the XCor and Ycor for that class
### So that slope of the line is calculated and then intersection point between line parallel to x-axis and passing through
### ommissionval on Y-axis is calculated.
PrevXCor = XYTable[which(XYTable[,1]==PartialXYTable[nrow(PartialXYTable),1])+1,2]
PrevYCor = XYTable[which(XYTable[,1]==PartialXYTable[nrow(PartialXYTable),1])+1,3]
XCor1 = PartialXYTable[nrow(PartialXYTable),2]
YCor1 = PartialXYTable[nrow(PartialXYTable),3]
## Calculate the point of intersection of line parallel to x-asiz and this line. Use the equation of line
## in point-slope form y1 = m(x1-x2)+y2
Slope = (YCor1 - PrevYCor) / (XCor1 - PrevXCor)
YCor0 = OmissionVal
XCor0 = (YCor0 - PrevYCor + (Slope * PrevXCor)) / Slope
### Add this coordinate in the PartialXYTable with classid greater than highest class id in the table.
### Actually class-id is not that important now, only the place where we add this xcor0 and ycor0 is important.
### add this as last row in the table
PartialXYTable = rbind(PartialXYTable, c(PartialXYTable[nrow(PartialXYTable),1]+1, XCor0, YCor0))
}
else
{
PartialXYTable = XYTable
} ### if OmissionVal > 0
## Now calculate the area under the curve on this table.
XCor1 = PartialXYTable[nrow(PartialXYTable),2]
YCor1 = PartialXYTable[nrow(PartialXYTable),3]
AUCValue = 0
AUCValueAtRandom = 0
for (i in (nrow(PartialXYTable)-1):1)
{
XCor2 = PartialXYTable[i,2]
YCor2 = PartialXYTable[i,3]
# This is calculating the AUCArea for 2 point trapezoid.
TrapArea = (YCor1 * (abs(XCor2 - XCor1))) + (abs(YCor2 - YCor1) * abs(XCor2 - XCor1)) / 2
AUCValue = AUCValue + TrapArea
# now caluclate the area below 0.5 line.
# Find the slope of line which goes to the point
# Equation of line parallel to Y-axis is X=k and equation of line at 0.5 is y = x
TrapAreaAtRandom = (XCor1 * (abs(XCor2 - XCor1))) + (abs(XCor2 - XCor1) * abs(XCor2 - XCor1)) / 2
AUCValueAtRandom = AUCValueAtRandom + TrapAreaAtRandom
XCor1 = XCor2
YCor1 = YCor2
}
NewRow = c(IterationNo, AUCValue, AUCValueAtRandom, AUCValue/AUCValueAtRandom)
return(NewRow)
}
## PartialROC (PresenceFile, PredictionFile, OmissionVal, RandomPercent, NoOfIteration, OutputFile)
## PartialROC ("Symplocos_pubescens.csv", "Symplocos_pubescens_avg.asc", 0.9, 50, 100, "TestRoc.txt")